From cb2a623faa78b02b93b402f665030f6b75cdf0a9 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Mon, 30 Oct 2023 16:38:15 +0100 Subject: Added full SSE 4.2 support --- src/emu/x64compstrings.c | 120 +++++++++++++++++++++++++++++++++++++++++++++++ src/emu/x64compstrings.h | 13 +++++ src/emu/x64run660f.c | 60 ++++++++++++++++++++++++ src/emu/x64run670f.c | 1 + src/emu/x64runf20f.c | 38 +++++++++++++++ 5 files changed, 232 insertions(+) create mode 100644 src/emu/x64compstrings.c create mode 100644 src/emu/x64compstrings.h (limited to 'src/emu') diff --git a/src/emu/x64compstrings.c b/src/emu/x64compstrings.c new file mode 100644 index 00000000..72781ebb --- /dev/null +++ b/src/emu/x64compstrings.c @@ -0,0 +1,120 @@ +#include + +#include "box64stack.h" +#include "x64emu.h" +#include "x64run_private.h" +#include "x64emu_private.h" +#include "x64compstrings.h" + +static int overrideIfDataInvalid(sse_regs_t* mem, int lmem, sse_regs_t* reg, int lreg, int j, int i, int imm8) +{ + int valid1 = (i>2)&3) { + case 0b00: + case 0b01: return 0; + case 0b10: + case 0b11: return 1; + } + if(!valid1 && valid2) + switch((imm8>>2)&3) { + case 0b00: + case 0b01: + case 0b10: return 0; + case 0b11: return 1; + } + if(valid1 && !valid2) + return 0; + switch((imm8>>2)&3) { + case 0b01: // range + switch (imm8&3) { + case 0b00: // ub + return (i&1)?((reg->ub[i]>=mem->ub[j])):((reg->ub[i]<=mem->ub[j])); + case 0b01: // uw + return (i&1)?((reg->uw[i]>=mem->uw[j])):((reg->uw[i]<=mem->uw[j])); + case 0b10: // sb + return (i&1)?((reg->sb[i]>=mem->sb[j])):((reg->sb[i]<=mem->sb[j])); + case 0b11: // sw + return (i&1)?((reg->sw[i]>=mem->sw[j])):((reg->sw[i]<=mem->sw[j])); + } + break; + default: // the others + switch (imm8&1) { + case 0: // byte + return (reg->ub[i] == mem->ub[j]); + case 1: // word + return (reg->uw[i] == mem->uw[j]); + } + } +} + +uint32_t sse42_compare_string_explicit_len(x64emu_t* emu, sse_regs_t* mem, int lmem, sse_regs_t* reg, int lreg, uint8_t imm8) +{ + // get number of packed byte/word + int n_packed = (imm8&1)?8:16; + if(lreg<0) lreg = -lreg; + if(lmem<0) lmem = -lmem; + if(lreg>n_packed) lreg = n_packed; + if(lmem>n_packed) lmem = n_packed; + // aggregate to intres1 + uint32_t intres1 = 0; + switch((imm8>>2)&3) { + case 0b00: //Equal any + for(int j=0; j>4)&3) { + case 0b01: intres2 ^= ((1<uw[lmem]) ++lmem; + while(lreg<8 && reg->uw[lreg]) ++lreg; + } else { + while(lmem<16 && mem->ub[lmem]) ++lmem; + while(lreg<16 && reg->ub[lreg]) ++lreg; + } + return sse42_compare_string_explicit_len(emu, mem, lmem, reg, lreg, imm8); +} \ No newline at end of file diff --git a/src/emu/x64compstrings.h b/src/emu/x64compstrings.h new file mode 100644 index 00000000..b2a785b2 --- /dev/null +++ b/src/emu/x64compstrings.h @@ -0,0 +1,13 @@ +#ifndef __X64_CMPSTRINGS_H__ +#define __X64_CMPSTRINGS_H__ + +#include + +#include "regs.h" + +typedef struct x64emu_s x64emu_t; + +uint32_t sse42_compare_string_explicit_len(x64emu_t* emu, sse_regs_t* a, int la, sse_regs_t* b, int lb, uint8_t imm8); +uint32_t sse42_compare_string_implicit_len(x64emu_t* emu, sse_regs_t* a, sse_regs_t* b, uint8_t imm8); + +#endif //__X64_CMPSTRINGS_H__ \ No newline at end of file diff --git a/src/emu/x64run660f.c b/src/emu/x64run660f.c index 9b969c83..72180c0c 100644 --- a/src/emu/x64run660f.c +++ b/src/emu/x64run660f.c @@ -22,6 +22,7 @@ #include "bridge.h" #include "modrm.h" +#include "x64compstrings.h" static uint8_t ff_mult(uint8_t a, uint8_t b) { @@ -1118,6 +1119,65 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr) } break; + case 0x60: /* PCMPESTRM */ + nextop = F8; + GETEX(1); + GETGX; + tmp8u = F8; + tmp32u = sse42_compare_string_explicit_len(emu, EX, R_EDX, GX, R_EAX, tmp8u); + if(tmp8u&0b1000000) { + switch(tmp8u&1) { + case 0: for(int i=0; i<16; ++i) GX->ub[i] = ((tmp32u>>i)&1)?0xff:0x00; break; + case 1: for(int i=0; i<8; ++i) GX->uw[i] = ((tmp32u>>i)&1)?0xffff:0x0000; break; + } + } else { + GX->q[1] = GX->q[0] = 0; + GX->uw[0] = tmp32u; + } + break; + case 0x61: /* PCMPESTRI */ + nextop = F8; + GETEX(1); + GETGX; + tmp8u = F8; + tmp32u = sse42_compare_string_explicit_len(emu, EX, R_EDX, GX, R_EAX, tmp8u); + if(!tmp32u) + R_RCX = (tmp8u&1)?8:16; + else if(tmp8u&0b1000000) + R_RCX = 31-__builtin_clz(tmp32u); + else + R_RCX = __builtin_ffs(tmp32u) - 1; + break; + case 0x62: /* PCMPESTRM */ + nextop = F8; + GETEX(1); + GETGX; + tmp8u = F8; + tmp32u = sse42_compare_string_implicit_len(emu, EX, GX, tmp8u); + if(tmp8u&0b1000000) { + switch(tmp8u&1) { + case 0: for(int i=0; i<16; ++i) GX->ub[i] = ((tmp32u>>i)&1)?0xff:0x00; break; + case 1: for(int i=0; i<8; ++i) GX->uw[i] = ((tmp32u>>i)&1)?0xffff:0x0000; break; + } + } else { + GX->q[1] = GX->q[0] = 0; + GX->uw[0] = tmp32u; + } + break; + case 0x63: /* PCMPISTRI */ + nextop = F8; + GETEX(1); + GETGX; + tmp8u = F8; + tmp32u = sse42_compare_string_implicit_len(emu, EX, GX, tmp8u); + if(!tmp32u) + R_RCX = (tmp8u&1)?8:16; + else if(tmp8u&0b1000000) + R_RCX = 31-__builtin_clz(tmp32u); + else + R_RCX = __builtin_ffs(tmp32u) - 1; + break; + case 0xDF: // AESKEYGENASSIST Gx, Ex, u8 nextop = F8; GETEX(1); diff --git a/src/emu/x64run670f.c b/src/emu/x64run670f.c index 015b3712..2f94bd33 100644 --- a/src/emu/x64run670f.c +++ b/src/emu/x64run670f.c @@ -25,6 +25,7 @@ #endif #include "modrm.h" +#include "x64compstrings.h" #ifdef TEST_INTERPRETER uintptr_t Test670F(x64test_t *test, rex_t rex, int rep, uintptr_t addr) diff --git a/src/emu/x64runf20f.c b/src/emu/x64runf20f.c index d5ce598f..020a896a 100644 --- a/src/emu/x64runf20f.c +++ b/src/emu/x64runf20f.c @@ -156,6 +156,44 @@ uintptr_t RunF20F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step) GD->dword[1] = 0; } break; + + case 0x38: // more opcodes + opcode = F8; + switch(opcode) { + + case 0xF0: // CRC32 Gd, Eb + nextop = F8; + GETEB(0); + GETGD; + GD->dword[0] ^= EB->byte[0]; + for (int i = 0; i < 8; i++) { + if (GD->dword[0] & 1) + GD->dword[0] = (GD->dword[0] >> 1) ^ 0x82f63b78; + else + GD->dword[0] = (GD->dword[0] >> 1); + } + GD->dword[1] = 0; + break; + case 0xF1: // CRC32 Gd, Ed + nextop = F8; + GETED(0); + GETGD; + for(int j=0; j<4*(rex.w+1); ++j) { + GD->dword[0] ^= ED->byte[j]; + for (int i = 0; i < 8; i++) { + if (GD->dword[0] & 1) + GD->dword[0] = (GD->dword[0] >> 1) ^ 0x82f63b78; + else + GD->dword[0] = (GD->dword[0] >> 1); + } + } + GD->dword[1] = 0; + break; + + default: + return 0; + } + break; case 0x51: /* SQRTSD Gx, Ex */ nextop = F8; -- cgit 1.4.1