diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2023-10-30 16:38:15 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2023-10-30 16:38:15 +0100 |
| commit | cb2a623faa78b02b93b402f665030f6b75cdf0a9 (patch) | |
| tree | 6b376883dc43953b876c2fd8fa12fce94b29eb1e /src | |
| parent | 7e80e7ba86c51487e71e95d52fb72f45c94030ba (diff) | |
| download | box64-cb2a623faa78b02b93b402f665030f6b75cdf0a9.tar.gz box64-cb2a623faa78b02b93b402f665030f6b75cdf0a9.zip | |
Added full SSE 4.2 support
Diffstat (limited to 'src')
| -rw-r--r-- | src/emu/x64compstrings.c | 120 | ||||
| -rw-r--r-- | src/emu/x64compstrings.h | 13 | ||||
| -rw-r--r-- | src/emu/x64run660f.c | 60 | ||||
| -rw-r--r-- | src/emu/x64run670f.c | 1 | ||||
| -rw-r--r-- | src/emu/x64runf20f.c | 38 | ||||
| -rw-r--r-- | src/tools/my_cpuid.c | 3 | ||||
| -rw-r--r-- | src/wrapped/wrappedlibc.c | 2 |
7 files changed, 236 insertions, 1 deletions
diff --git a/src/emu/x64compstrings.c b/src/emu/x64compstrings.c new file mode 100644 index 00000000..72781ebb --- /dev/null +++ b/src/emu/x64compstrings.c @@ -0,0 +1,120 @@ +#include <stdint.h> + +#include "box64stack.h" +#include "x64emu.h" +#include "x64run_private.h" +#include "x64emu_private.h" +#include "x64compstrings.h" + +static int overrideIfDataInvalid(sse_regs_t* mem, int lmem, sse_regs_t* reg, int lreg, int j, int i, int imm8) +{ + int valid1 = (i<lreg); + int valid2 = (j<lmem); + if(!valid1 && !valid2) + switch((imm8>>2)&3) { + case 0b00: + case 0b01: return 0; + case 0b10: + case 0b11: return 1; + } + if(!valid1 && valid2) + switch((imm8>>2)&3) { + case 0b00: + case 0b01: + case 0b10: return 0; + case 0b11: return 1; + } + if(valid1 && !valid2) + return 0; + switch((imm8>>2)&3) { + case 0b01: // range + switch (imm8&3) { + case 0b00: // ub + return (i&1)?((reg->ub[i]>=mem->ub[j])):((reg->ub[i]<=mem->ub[j])); + case 0b01: // uw + return (i&1)?((reg->uw[i]>=mem->uw[j])):((reg->uw[i]<=mem->uw[j])); + case 0b10: // sb + return (i&1)?((reg->sb[i]>=mem->sb[j])):((reg->sb[i]<=mem->sb[j])); + case 0b11: // sw + return (i&1)?((reg->sw[i]>=mem->sw[j])):((reg->sw[i]<=mem->sw[j])); + } + break; + default: // the others + switch (imm8&1) { + case 0: // byte + return (reg->ub[i] == mem->ub[j]); + case 1: // word + return (reg->uw[i] == mem->uw[j]); + } + } +} + +uint32_t sse42_compare_string_explicit_len(x64emu_t* emu, sse_regs_t* mem, int lmem, sse_regs_t* reg, int lreg, uint8_t imm8) +{ + // get number of packed byte/word + int n_packed = (imm8&1)?8:16; + if(lreg<0) lreg = -lreg; + if(lmem<0) lmem = -lmem; + if(lreg>n_packed) lreg = n_packed; + if(lmem>n_packed) lmem = n_packed; + // aggregate to intres1 + uint32_t intres1 = 0; + switch((imm8>>2)&3) { + case 0b00: //Equal any + for(int j=0; j<n_packed; ++j) + for(int i=0; i<n_packed; ++i) { + intres1 |= overrideIfDataInvalid(mem, lmem, reg, lreg, j, i, imm8)<<j; + } + break; + case 0b01: // Range + for(int j=0; j<n_packed; ++j) + for(int i=0; i<n_packed; i+=2) { + intres1 |= (overrideIfDataInvalid(mem, lmem, reg, lreg, j, i, imm8) & overrideIfDataInvalid(mem, lmem, reg, lreg, j, i+1, imm8))<<j; + } + break; + case 0b10: // Equal each + for(int i=0; i<n_packed; ++i) { + intres1 |= overrideIfDataInvalid(mem, lmem, reg, lreg, i, i, imm8)<<i; + } + break; + case 0b11: // Equal ordered + intres1 = (1<<n_packed)-1; + for(int j=0; j<n_packed; ++j) + for(int i=0; i<n_packed-j; ++i) { + int k = i+j; + intres1 &= (((1<<n_packed)-1)^(1<<j)) | overrideIfDataInvalid(mem, lmem, reg, lreg, k, i, imm8)<<j; + } + break; + } + // build intres2 + uint32_t intres2 = intres1; + switch((imm8>>4)&3) { + case 0b01: intres2 ^= ((1<<n_packed)-1); + case 0b11: intres2 ^= ((1<<lmem)-1); + } + // and now set the flags + RESET_FLAGS(emu); + CONDITIONAL_SET_FLAG(intres2, F_CF); + CONDITIONAL_SET_FLAG(lmem<n_packed, F_ZF); + CONDITIONAL_SET_FLAG(lreg<n_packed, F_SF); + CONDITIONAL_SET_FLAG(intres2&1, F_OF); + CLEAR_FLAG(F_AF); + CLEAR_FLAG(F_PF); + + return intres2; +} + +uint32_t sse42_compare_string_implicit_len(x64emu_t* emu, sse_regs_t* mem, sse_regs_t* reg, uint8_t imm8) +{ + int lmem = 0; + int lreg = 0; + // get lmem and lreg + if(imm8&1) { + while(lmem<8 && mem->uw[lmem]) ++lmem; + while(lreg<8 && reg->uw[lreg]) ++lreg; + } else { + while(lmem<16 && mem->ub[lmem]) ++lmem; + while(lreg<16 && reg->ub[lreg]) ++lreg; + } + return sse42_compare_string_explicit_len(emu, mem, lmem, reg, lreg, imm8); +} \ No newline at end of file diff --git a/src/emu/x64compstrings.h b/src/emu/x64compstrings.h new file mode 100644 index 00000000..b2a785b2 --- /dev/null +++ b/src/emu/x64compstrings.h @@ -0,0 +1,13 @@ +#ifndef __X64_CMPSTRINGS_H__ +#define __X64_CMPSTRINGS_H__ + +#include <stdint.h> + +#include "regs.h" + +typedef struct x64emu_s x64emu_t; + +uint32_t sse42_compare_string_explicit_len(x64emu_t* emu, sse_regs_t* a, int la, sse_regs_t* b, int lb, uint8_t imm8); +uint32_t sse42_compare_string_implicit_len(x64emu_t* emu, sse_regs_t* a, sse_regs_t* b, uint8_t imm8); + +#endif //__X64_CMPSTRINGS_H__ \ No newline at end of file diff --git a/src/emu/x64run660f.c b/src/emu/x64run660f.c index 9b969c83..72180c0c 100644 --- a/src/emu/x64run660f.c +++ b/src/emu/x64run660f.c @@ -22,6 +22,7 @@ #include "bridge.h" #include "modrm.h" +#include "x64compstrings.h" static uint8_t ff_mult(uint8_t a, uint8_t b) { @@ -1118,6 +1119,65 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr) } break; + case 0x60: /* PCMPESTRM */ + nextop = F8; + GETEX(1); + GETGX; + tmp8u = F8; + tmp32u = sse42_compare_string_explicit_len(emu, EX, R_EDX, GX, R_EAX, tmp8u); + if(tmp8u&0b1000000) { + switch(tmp8u&1) { + case 0: for(int i=0; i<16; ++i) GX->ub[i] = ((tmp32u>>i)&1)?0xff:0x00; break; + case 1: for(int i=0; i<8; ++i) GX->uw[i] = ((tmp32u>>i)&1)?0xffff:0x0000; break; + } + } else { + GX->q[1] = GX->q[0] = 0; + GX->uw[0] = tmp32u; + } + break; + case 0x61: /* PCMPESTRI */ + nextop = F8; + GETEX(1); + GETGX; + tmp8u = F8; + tmp32u = sse42_compare_string_explicit_len(emu, EX, R_EDX, GX, R_EAX, tmp8u); + if(!tmp32u) + R_RCX = (tmp8u&1)?8:16; + else if(tmp8u&0b1000000) + R_RCX = 31-__builtin_clz(tmp32u); + else + R_RCX = __builtin_ffs(tmp32u) - 1; + break; + case 0x62: /* PCMPESTRM */ + nextop = F8; + GETEX(1); + GETGX; + tmp8u = F8; + tmp32u = sse42_compare_string_implicit_len(emu, EX, GX, tmp8u); + if(tmp8u&0b1000000) { + switch(tmp8u&1) { + case 0: for(int i=0; i<16; ++i) GX->ub[i] = ((tmp32u>>i)&1)?0xff:0x00; break; + case 1: for(int i=0; i<8; ++i) GX->uw[i] = ((tmp32u>>i)&1)?0xffff:0x0000; break; + } + } else { + GX->q[1] = GX->q[0] = 0; + GX->uw[0] = tmp32u; + } + break; + case 0x63: /* PCMPISTRI */ + nextop = F8; + GETEX(1); + GETGX; + tmp8u = F8; + tmp32u = sse42_compare_string_implicit_len(emu, EX, GX, tmp8u); + if(!tmp32u) + R_RCX = (tmp8u&1)?8:16; + else if(tmp8u&0b1000000) + R_RCX = 31-__builtin_clz(tmp32u); + else + R_RCX = __builtin_ffs(tmp32u) - 1; + break; + case 0xDF: // AESKEYGENASSIST Gx, Ex, u8 nextop = F8; GETEX(1); diff --git a/src/emu/x64run670f.c b/src/emu/x64run670f.c index 015b3712..2f94bd33 100644 --- a/src/emu/x64run670f.c +++ b/src/emu/x64run670f.c @@ -25,6 +25,7 @@ #endif #include "modrm.h" +#include "x64compstrings.h" #ifdef TEST_INTERPRETER uintptr_t Test670F(x64test_t *test, rex_t rex, int rep, uintptr_t addr) diff --git a/src/emu/x64runf20f.c b/src/emu/x64runf20f.c index d5ce598f..020a896a 100644 --- a/src/emu/x64runf20f.c +++ b/src/emu/x64runf20f.c @@ -156,6 +156,44 @@ uintptr_t RunF20F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step) GD->dword[1] = 0; } break; + + case 0x38: // more opcodes + opcode = F8; + switch(opcode) { + + case 0xF0: // CRC32 Gd, Eb + nextop = F8; + GETEB(0); + GETGD; + GD->dword[0] ^= EB->byte[0]; + for (int i = 0; i < 8; i++) { + if (GD->dword[0] & 1) + GD->dword[0] = (GD->dword[0] >> 1) ^ 0x82f63b78; + else + GD->dword[0] = (GD->dword[0] >> 1); + } + GD->dword[1] = 0; + break; + case 0xF1: // CRC32 Gd, Ed + nextop = F8; + GETED(0); + GETGD; + for(int j=0; j<4*(rex.w+1); ++j) { + GD->dword[0] ^= ED->byte[j]; + for (int i = 0; i < 8; i++) { + if (GD->dword[0] & 1) + GD->dword[0] = (GD->dword[0] >> 1) ^ 0x82f63b78; + else + GD->dword[0] = (GD->dword[0] >> 1); + } + } + GD->dword[1] = 0; + break; + + default: + return 0; + } + break; case 0x51: /* SQRTSD Gx, Ex */ nextop = F8; diff --git a/src/tools/my_cpuid.c b/src/tools/my_cpuid.c index 6039f135..81399633 100644 --- a/src/tools/my_cpuid.c +++ b/src/tools/my_cpuid.c @@ -193,6 +193,7 @@ void my_cpuid(x64emu_t* emu, uint32_t tmp32u) R_EAX |= cpu<<24; }*/ R_EDX = 1 // fpu + | 1<<2 // debugging extension | 1<<4 // rdtsc | 1<<8 // cmpxchg8 | 1<<11 // sep (sysenter & sysexit) @@ -202,6 +203,7 @@ void my_cpuid(x64emu_t* emu, uint32_t tmp32u) | 1<<24 // fxsr (fxsave, fxrestore) | 1<<25 // SSE | 1<<26 // SSE2 + | 1<<28 // HT / Multi-core ; R_ECX = 1<<0 // SSE3 | 1<<1 // PCLMULQDQ @@ -209,6 +211,7 @@ void my_cpuid(x64emu_t* emu, uint32_t tmp32u) | 1<<12 // fma | 1<<13 // cx16 (cmpxchg16) | 1<<19 // SSE4_1 + | 1<<20 // SSE4_2 | 1<<22 // MOVBE | 1<<23 // POPCOUNT | 1<<25 // aesni diff --git a/src/wrapped/wrappedlibc.c b/src/wrapped/wrappedlibc.c index fbbbef76..b9ed9c42 100644 --- a/src/wrapped/wrappedlibc.c +++ b/src/wrapped/wrappedlibc.c @@ -1562,7 +1562,7 @@ void CreateCPUInfoFile(int fd) P; sprintf(buff, "bogomips\t: %g\n", getBogoMips()); P; - sprintf(buff, "flags\t\t: fpu cx8 sep ht cmov clflush mmx sse sse2 syscall tsc lahf_lm ssse3 ht tm lm fma fxsr cpuid pclmulqdq cx16 aes movbe pni sse4_1 lzcnt popcnt\n"); + sprintf(buff, "flags\t\t: fpu cx8 sep ht cmov clflush mmx sse sse2 syscall tsc lahf_lm ssse3 ht tm lm fma fxsr cpuid pclmulqdq cx16 aes movbe pni sse4_1 sse4_2 lzcnt popcnt\n"); P; sprintf(buff, "address sizes\t: 48 bits physical, 48 bits virtual\n"); P; |