diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2024-05-29 10:53:38 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2024-05-29 10:53:38 +0200 |
| commit | af05d7439fa6866f900c7e571812ba3f970bd72b (patch) | |
| tree | 1943aa5fd7d3ee4df00f8d4cc6a39e054004f2f3 /src | |
| parent | 6e22f4fd6d56a62025cafe2076b851c730492cef (diff) | |
| download | box64-af05d7439fa6866f900c7e571812ba3f970bd72b.tar.gz box64-af05d7439fa6866f900c7e571812ba3f970bd72b.zip | |
[INTERPRETER] yet more avx/avx2 opcodes
Diffstat (limited to 'src')
| -rw-r--r-- | src/emu/x64runavx.c | 2 | ||||
| -rw-r--r-- | src/emu/x64runavx660f.c | 72 | ||||
| -rw-r--r-- | src/emu/x64runavx660f38.c | 258 | ||||
| -rw-r--r-- | src/emu/x64runavx660f3a.c | 230 |
4 files changed, 555 insertions, 7 deletions
diff --git a/src/emu/x64runavx.c b/src/emu/x64runavx.c index 8b4b9871..b66a275c 100644 --- a/src/emu/x64runavx.c +++ b/src/emu/x64runavx.c @@ -76,7 +76,7 @@ uintptr_t RunAVX(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) else addr = 0; if(!addr) - printf_log(LOG_NONE, "Unimplemented AVX opcode size %d prefix %s map %s opcode %X ", 128<<vex.l, avx_prefix_string(vex.p), avx_map_string(vex.m), opcode); + printf_log(LOG_NONE, "Unimplemented AVX opcode size %d prefix %s map %s opcode %02X ", 128<<vex.l, avx_prefix_string(vex.p), avx_map_string(vex.m), opcode); return addr; } diff --git a/src/emu/x64runavx660f.c b/src/emu/x64runavx660f.c index a1b089e1..7b712a7e 100644 --- a/src/emu/x64runavx660f.c +++ b/src/emu/x64runavx660f.c @@ -230,7 +230,21 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) GY->u128 = 0; } break; - + case 0x57: /* VXORPD Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GX->u128 = VX->u128 ^ EX->u128; + GETGY; + if(vex.l) { + GETEY; + GETVY; + GY->u128 = VY->u128 ^ EY->u128; + } else { + GY->u128 = 0; + } + break; case 0x58: /* VADDPD Gx, Vx, Ex */ nextop = F8; GETEX(0); @@ -829,6 +843,54 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) return 0; } break; + case 0x74: /* VPCMPEQB Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + for (int i=0; i<16; ++i) + GX->ub[i] = (VX->ub[i]==EX->ub[i])?0xff:0; + if(vex.l) { + GETEY; + GETVY; + for (int i=0; i<16; ++i) + GY->ub[i] = (VY->ub[i]==EY->ub[i])?0xff:0; + } else + GY->u128 = 0; + break; + case 0x75: /* VPCMPEQW Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + for (int i=0; i<8; ++i) + GX->uw[i] = (VX->uw[i]==EX->uw[i])?0xffff:0; + if(vex.l) { + GETEY; + GETVY; + for (int i=0; i<8; ++i) + GY->uw[i] = (VY->uw[i]==EY->uw[i])?0xffff:0; + } else + GY->u128 = 0; + break; + case 0x76: /* VPCMPEQD Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + for (int i=0; i<4; ++i) + GX->ud[i] = (VX->ud[i]==EX->ud[i])?0xffffffff:0; + if(vex.l) { + GETEY; + GETVY; + for (int i=0; i<4; ++i) + GY->ud[i] = (VY->ud[i]==EY->ud[i])?0xffffffff:0; + } else + GY->u128 = 0; + break; case 0x7C: /* VHADDPD Gx, Vx, Ex */ nextop = F8; @@ -1019,6 +1081,14 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) GY->u128 = 0; break; + case 0xC5: /* VPEXTRW Gw,Ex,Ib */ + nextop = F8; + GETEX(1); + GETGD; + tmp8u = F8; + GD->q[0] = EX->uw[tmp8u&7]; // 16bits extract, 0 extended + break; + case 0xD0: /* VADDSUBPD Gx, Vx, Ex */ nextop = F8; GETEX(0); diff --git a/src/emu/x64runavx660f38.c b/src/emu/x64runavx660f38.c index 53a8e345..f127be72 100644 --- a/src/emu/x64runavx660f38.c +++ b/src/emu/x64runavx660f38.c @@ -66,8 +66,8 @@ uintptr_t RunAVX_660F38(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) uint64_t tmp64u, tmp64u2; int64_t tmp64s; reg64_t *oped, *opgd; - sse_regs_t *opex, *opgx, *opvx, eax1; - sse_regs_t *opey, *opgy, *opvy, eay1; + sse_regs_t *opex, *opgx, *opvx, eax1, eax2; + sse_regs_t *opey, *opgy, *opvy, eay1, eay2; // AES opcodes constants // A0 B1 C2 D3 E4 F5 G6 H7 I8 J9 Ka Lb Mc Nd Oe Pf // A F K P E J O D I N C H M B G L @@ -153,6 +153,176 @@ uintptr_t RunAVX_660F38(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) } else GY->q[0] = GY->q[1] = 0; break; + case 0x01: /* VPHADDW Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + GETEY; + u8 = (VX==EX); + if(GX==EX) {eax1=*EX; EX=&eax1;} + for (int i=0; i<4; ++i) + GX->sw[i] = VX->sw[i*2+0]+VX->sw[i*2+1]; + if(u8) { + GX->q[1] = GX->q[0]; + } else { + for (int i=0; i<4; ++i) + GX->sw[4+i] = EX->sw[i*2+0] + EX->sw[i*2+1]; + } + if(vex.l) { + GETVY; + if(EY==GY) {eay1=*EY; EY=&eay1;} + for (int i=0; i<4; ++i) + GY->sw[i] = VY->sw[i*2+0]+VY->sw[i*2+1]; + if(u8) { + GY->q[1] = GY->q[0]; + } else { + for (int i=0; i<4; ++i) + GY->sw[4+i] = EY->sw[i*2+0] + EY->sw[i*2+1]; + } + } else + GY->u128 = 0; + break; + case 0x02: /* VPHADDD Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + GETEY; + u8 = (VX==EX); + if(GX==EX) {eax1=*EX; EX=&eax1;} + GX->sd[0] = VX->sd[0] + VX->sd[1]; + GX->sd[1] = VX->sd[2] + VX->sd[3]; + if(u8) { + GX->q[1] = GX->q[0]; + } else { + GX->sd[2] = EX->sd[0] + EX->sd[1]; + GX->sd[3] = EX->sd[2] + EX->sd[3]; + } + if(vex.l) { + GETVY; + if(EY==GY) {eay1=*EY; EY=&eay1;} + GY->sd[0] = VY->sd[0] + VY->sd[1]; + GY->sd[1] = VY->sd[2] + VY->sd[3]; + if(u8) { + GY->q[1] = GY->q[0]; + } else { + GY->sd[2] = EY->sd[0] + EY->sd[1]; + GY->sd[3] = EY->sd[2] + EY->sd[3]; + } + } else + GY->u128 = 0; + break; + case 0x03: /* VPHADDSW Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + GETEY; + u8 = (VX==EX); + if(GX==EX) {eax1=*EX; EX=&eax1;} + for (int i=0; i<4; ++i) { + tmp32s = VX->sw[i*2+0]+VX->sw[i*2+1]; + GX->sw[i] = (tmp32s<-32768)?-32768:((tmp32s>32767)?32767:tmp32s); + } + if(u8) { + GX->q[1] = GX->q[0]; + } else { + for (int i=0; i<4; ++i) { + tmp32s = EX->sw[i*2+0] + EX->sw[i*2+1]; + GX->sw[4+i] = (tmp32s<-32768)?-32768:((tmp32s>32767)?32767:tmp32s); + } + } + if(vex.l) { + GETVY; + if(EY==GY) {eay1=*EY; EY=&eay1;} + for (int i=0; i<4; ++i) { + tmp32s = VY->sw[i*2+0]+VY->sw[i*2+1]; + GY->sw[i] = (tmp32s<-32768)?-32768:((tmp32s>32767)?32767:tmp32s); + } + if(u8) { + GY->q[1] = GY->q[0]; + } else { + for (int i=0; i<4; ++i) { + tmp32s = EY->sw[i*2+0] + EY->sw[i*2+1]; + GY->sw[4+i] = (tmp32s<-32768)?-32768:((tmp32s>32767)?32767:tmp32s); + } + } + } else + GY->u128 = 0; + break; + + case 0x0C: /* VPERMILPS Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + GETEY; + GETVY; + if(GX==VX) {eax1 = *VX; VX = &eax1;} + for(int i=0; i<4; ++i) + GX->ud[i] = VX->ud[EX->ud[i]&3]; + if(vex.l) { + if(GY==VY) {eay1 = *VY; VY = &eay1;} + for(int i=0; i<4; ++i) + GY->ud[i] = VY->ud[EY->ud[i]&3]; + } else + GY->u128 = 0; + break; + case 0x0D: /* VPERMILPD Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + GETEY; + GETVY; + if(GX==VX) {eax1 = *VX; VX = &eax1;} + for(int i=0; i<2; ++i) + GX->q[i] = VX->q[(EX->q[i]>>1)&1]; + if(vex.l) { + if(GY==VY) {eay1 = *VY; VY = &eay1;} + for(int i=0; i<2; ++i) + GY->q[i] = VY->q[(EY->q[i]>>1)&1]; + } else + GY->u128 = 0; + break; + + case 0x16: /* VPERMPS Gx, Vx, Ex */ + // same code as 0x36 + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETEY; + GETGY; + GETVY; + if(!vex.l) emit_signal(emu, SIGILL, (void*)R_RIP, 0); + if(GX==EX) { + eax1 = *EX; + EX = &eax1; + eay1 = *EY; + EY = &eay1; + } + if(GX==VX) { + eax2 = *VX; + VX = &eax2; + eay2 = *VY; + VY = &eay2; + } + for(int i=0; i<4; ++i) { + u8 = VX->ud[i]&7; + GX->ud[i] = (u8>3)?EY->ud[u8&3]:EX->ud[u8]; + } + for(int i=0; i<4; ++i) { + u8 = VY->ud[i]&7; + GY->ud[i] = (u8>3)?EY->ud[u8&3]:EX->ud[u8]; + } + break; case 0x18: /* VBROADCASTSS Gx, Ex */ nextop = F8; @@ -240,6 +410,22 @@ uintptr_t RunAVX_660F38(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) GY->u128 = 0; break; + case 0x29: /* VPCMPEQQ Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + for(int i=1; i>=0; --i) + GX->sq[i] = (VX->sq[i]==EX->sq[i])?-1LL:0LL; + if(vex.l) { + GETEY; + GETVY; + for(int i=1; i>=0; --i) + GY->sq[i] = (VY->sq[i]==EY->sq[i])?-1LL:0LL; + } else + GY->u128 = 0; + break; case 0x2A: /* VMOVNTDQA Gx, Ex */ nextop = F8; GETEX(0); @@ -351,6 +537,72 @@ uintptr_t RunAVX_660F38(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) } break; + case 0x36: /* VPERMD Gx, Vx, Ex */ + // same code as 0x16 + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETEY; + GETGY; + GETVY; + if(!vex.l) emit_signal(emu, SIGILL, (void*)R_RIP, 0); + if(GX==EX) { + eax1 = *EX; + EX = &eax1; + eay1 = *EY; + EY = &eay1; + } + if(GX==VX) { + eax2 = *VX; + VX = &eax2; + eay2 = *VY; + VY = &eay2; + } + for(int i=0; i<4; ++i) { + u8 = VX->ud[i]&7; + GX->ud[i] = (u8>3)?EY->ud[u8&3]:EX->ud[u8]; + } + for(int i=0; i<4; ++i) { + u8 = VY->ud[i]&7; + GY->ud[i] = (u8>3)?EY->ud[u8&3]:EX->ud[u8]; + } + break; + case 0x37: /* VPCMPGTQ Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + for(int i=1; i>=0; --i) + GX->sq[i] = (VX->sq[i]>EX->sq[i])?-1LL:0LL; + if(vex.l) { + GETEY; + GETVY; + for(int i=1; i>=0; --i) + GY->sq[i] = (VY->sq[i]>EY->sq[i])?-1LL:0LL; + } else + GY->u128 = 0; + break; + case 0x38: /* VPERMILD Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + GETEY; + GETVY; + if(GX==EX) {eax1 = *EX; EX = &eax1;} + for(int i=0; i<2; ++i) + GX->q[i] = EX->q[(VX->q[i]>>1)&1]; + if(vex.l) { + if(GY==EY) {eay1 = *EY; EY = &eay1;} + for(int i=0; i<2; ++i) + GY->q[i] = EY->q[(VY->q[i]>>1)&1]; + } else + GY->u128 = 0; + break; + case 0x58: /* VPBROADCASTD Gx, Ex */ nextop = F8; GETEX(0); @@ -413,6 +665,7 @@ uintptr_t RunAVX_660F38(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) GY->u128 = 0; break; + case 0x90: /* VPGATHERDD Gx, VSIB, Vx */ case 0x92: /* VGATHERDPD/VGATHERDPS Gx, VSIB, Vx */ nextop = F8; if(((nextop&7)!=4) || MODREG) { @@ -479,6 +732,7 @@ uintptr_t RunAVX_660F38(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) } else GY->u128 = 0; break; + case 0x91: /* VPGATHERQD Gx, VSIB, Vx */ case 0x93: /* VGATHERQPD/VGATHERQPS Gx, VSIB, Vx */ nextop = F8; if(((nextop&7)!=4) || MODREG) { diff --git a/src/emu/x64runavx660f3a.c b/src/emu/x64runavx660f3a.c index 16d10af0..36daecad 100644 --- a/src/emu/x64runavx660f3a.c +++ b/src/emu/x64runavx660f3a.c @@ -23,6 +23,7 @@ #include "bridge.h" #include "signals.h" #include "x64shaext.h" +#include "x64compstrings.h" #ifdef DYNAREC #include "custommem.h" #include "../dynarec/native_lock.h" @@ -58,8 +59,8 @@ uintptr_t RunAVX_660F3A(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) reg64_t *oped, *opgd; float tmpf; double tmpd; - sse_regs_t *opex, *opgx, *opvx, eax1; - sse_regs_t *opey, *opgy, *opvy, eay1; + sse_regs_t *opex, *opgx, *opvx, eax1,eax2; + sse_regs_t *opey, *opgy, *opvy, eay1,eay2; // AES opcodes constants const uint8_t subbytes[256] = { 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, @@ -90,6 +91,26 @@ uintptr_t RunAVX_660F3A(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) switch(opcode) { + case 0x00: /* VPERMQ Gx, Ex, Imm8 */ + case 0x01: /* VPERMPD Gx, Ex, Imm8 */ + nextop = F8; + GETEX(1); + GETGX; + GETGY; + GETEY; + u8 = F8; + if(!vex.l) emit_signal(emu, SIGILL, (void*)R_RIP, 0); + if(GX==EX) { + eax1 = *EX; + EX = &eax1; + eay1 = *EY; + EY = &eay1; + } + for(int i=0; i<2; ++i) + GX->q[i] = (((u8>>(i*2))&3)>1)?EY->q[(u8>>(i*2))&1]:EX->q[(u8>>(i*2))&1]; + for(int i=2; i<4; ++i) + GY->q[i-2] = (((u8>>(i*2))&3)>1)?EY->q[(u8>>(i*2))&1]:EX->q[(u8>>(i*2))&1]; + break; case 0x02: /* VBLENDD Gx, Vx, Ex, u8 */ nextop = F8; GETEX(1); @@ -108,6 +129,90 @@ uintptr_t RunAVX_660F3A(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) GY->u128 = 0; break; + case 0x04: /* VPERMILPS Gx, Ex, Imm8 */ + nextop = F8; + GETEX(1); + GETGX; + GETGY; + GETEY; + u8 = F8; + if(GX==EX) { + eax1 = *EX; + EX = &eax1; + } + for(int i=0; i<4; ++i) + GX->ud[i] = EX->ud[(u8>>(i*2))&3]; + if(vex.l) { + if(GY==EY) { + eay1 = *EY; + EY = &eay1; + } + for(int i=0; i<4; ++i) + GY->ud[i] = EY->ud[(u8>>(i*2))&3]; + } else + GY->u128 = 0; + break; + case 0x05: /* VPERMILD Gx, Ex, Imm8 */ + nextop = F8; + GETEX(1); + GETGX; + GETGY; + GETEY; + u8 = F8; + if(GX==EX) { + eax1 = *EX; + EX = &eax1; + } + for(int i=0; i<2; ++i) + GX->q[i] = EX->q[(u8>>i)&1]; + if(vex.l) { + if(GY==EY) { + eay1 = *EY; + EY = &eay1; + } + for(int i=0; i<2; ++i) + GY->q[i] = EY->q[(u8>>(i+2))&1]; + } else + GY->u128 = 0; + break; + case 0x06: /* VPERM2F128 Gx, Vx, Ex, Imm8 */ + nextop = F8; + GETEX(1); + GETGX; + GETVX; + GETEY; + GETGY; + GETVY; + u8 = F8; + if(!vex.l) emit_signal(emu, SIGILL, (void*)R_RIP, 0); + if(GX==EX) { + eax1 = *EX; + EX = &eax1; + eay1 = *EY; + EY = &eay1; + } + if(GX==VX) { + eax2 = *VX; + VX = &eax2; + eay2 = *VY; + VY = &eay2; + } + switch(u8&0x0f) { + case 0 : GX->u128 = VX->u128; break; + case 1 : GX->u128 = VY->u128; break; + case 2 : GX->u128 = EX->u128; break; + case 3 : GX->u128 = EY->u128; break; + default: GX->u128 = 0; break; + } + switch((u8>>4)&0x0f) { + case 0 : GY->u128 = VX->u128; break; + case 1 : GY->u128 = VY->u128; break; + case 2 : GY->u128 = EX->u128; break; + case 3 : GY->u128 = EY->u128; break; + default: GY->u128 = 0; break; + } + break; + case 0x0C: /* VBLENDPS Gx, Vx, Ex, u8 */ nextop = F8; GETEX(1); @@ -191,6 +296,26 @@ uintptr_t RunAVX_660F3A(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) GY->u128 = 0; break; + case 0x14: // VPEXTRB ED, GX, u8 + nextop = F8; + GETED(1); + GETGX; + tmp8u = F8; + if(MODREG) + ED->q[0] = GX->ub[tmp8u&0x0f]; + else + ED->byte[0] = GX->ub[tmp8u&0x0f]; + break; + case 0x15: // VPEXTRW Ew,Gx,Ib + nextop = F8; + GETED(1); + GETGX; + tmp8u = F8; + if(MODREG) + ED->q[0] = GX->uw[tmp8u&7]; // 16bits extract, 0 extended + else + ED->word[0] = GX->uw[tmp8u&7]; + break; case 0x16: // VPEXTRD/Q ED, GX, u8 nextop = F8; GETED(1); @@ -414,6 +539,44 @@ uintptr_t RunAVX_660F3A(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) GY->u128 = 0; break; + case 0x46: /* VPERM2I128 Gx, Vx, Ex, Imm8 */ + nextop = F8; + GETEX(1); + GETGX; + GETVX; + GETEY; + GETGY; + GETVY; + u8 = F8; + if(!vex.l) emit_signal(emu, SIGILL, (void*)R_RIP, 0); + if(GX==EX) { + eax1 = *EX; + EX = &eax1; + eay1 = *EY; + EY = &eay1; + } + if(GX==VX) { + eax2 = *VX; + VX = &eax2; + eay2 = *VY; + VY = &eay2; + } + switch(u8&0x0f) { + case 0 : GX->u128 = VX->u128; break; + case 1 : GX->u128 = VY->u128; break; + case 2 : GX->u128 = EX->u128; break; + case 3 : GX->u128 = EY->u128; break; + default: GX->u128 = 0; break; + } + switch((u8>>4)&0x0f) { + case 0 : GY->u128 = VX->u128; break; + case 1 : GY->u128 = VY->u128; break; + case 2 : GY->u128 = EX->u128; break; + case 3 : GY->u128 = EY->u128; break; + default: GY->u128 = 0; break; + } + break; + case 0x4A: /* VBLENDVPS Gx, Vx, Ex, XMMImm8 */ nextop = F8; GETEX(1); @@ -433,7 +596,7 @@ uintptr_t RunAVX_660F3A(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) break; case 0x4B: /* VBLENDVPD Gx, Vx, Ex, XMMImm8 */ nextop = F8; - GETEX(0); + GETEX(1); GETGX; GETVX; GETGY; @@ -469,6 +632,67 @@ uintptr_t RunAVX_660F3A(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) GY->u128 = 0; break; + case 0x60: /* VPCMPESTRM */ + nextop = F8; + GETEX(1); + GETGX; + tmp8u = F8; + tmp32u = sse42_compare_string_explicit_len(emu, EX, R_EDX, GX, R_EAX, tmp8u); + if(tmp8u&0b1000000) { + switch(tmp8u&1) { + case 0: for(int i=0; i<16; ++i) emu->xmm[0].ub[i] = ((tmp32u>>i)&1)?0xff:0x00; break; + case 1: for(int i=0; i<8; ++i) emu->xmm[0].uw[i] = ((tmp32u>>i)&1)?0xffff:0x0000; break; + } + } else { + emu->xmm[0].q[1] = emu->xmm[0].q[0] = 0; + emu->xmm[0].uw[0] = tmp32u; + emu->ymm[0].u128 = 0; + } + break; + case 0x61: /* VPCMPESTRI */ + nextop = F8; + GETEX(1); + GETGX; + tmp8u = F8; + tmp32u = sse42_compare_string_explicit_len(emu, EX, R_EDX, GX, R_EAX, tmp8u); + if(!tmp32u) + R_RCX = (tmp8u&1)?8:16; + else if(tmp8u&0b1000000) + R_RCX = 31-__builtin_clz(tmp32u); + else + R_RCX = __builtin_ffs(tmp32u) - 1; + break; + case 0x62: /* VPCMPISTRM */ + nextop = F8; + GETEX(1); + GETGX; + tmp8u = F8; + tmp32u = sse42_compare_string_implicit_len(emu, EX, GX, tmp8u); + if(tmp8u&0b1000000) { + switch(tmp8u&1) { + case 0: for(int i=0; i<16; ++i) emu->xmm[0].ub[i] = ((tmp32u>>i)&1)?0xff:0x00; break; + case 1: for(int i=0; i<8; ++i) emu->xmm[0].uw[i] = ((tmp32u>>i)&1)?0xffff:0x0000; break; + } + } else { + emu->xmm[0].q[1] = emu->xmm[0].q[0] = 0; + emu->xmm[0].uw[0] = tmp32u; + emu->ymm[0].u128 = 0; + } + break; + case 0x63: /* VPCMPISTRI */ + nextop = F8; + GETEX(1); + GETGX; + tmp8u = F8; + tmp32u = sse42_compare_string_implicit_len(emu, EX, GX, tmp8u); + if(!tmp32u) + R_RCX = (tmp8u&1)?8:16; + else if(tmp8u&0b1000000) + R_RCX = 31-__builtin_clz(tmp32u); + else + R_RCX = __builtin_ffs(tmp32u) - 1; + break; + case 0xDF: // VAESKEYGENASSIST Gx, Ex, u8 nextop = F8; GETEX(1); |