diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2024-05-28 14:55:17 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2024-05-28 14:55:17 +0200 |
| commit | b79f86b8d1c864cc58d79730a628e72c56ea960d (patch) | |
| tree | cab9d2fb0095d8dfe529b7645fbb05dfde8649e0 /src | |
| parent | b9f5929439ab5e0ebf9d64b2dc2659a9a018f19d (diff) | |
| download | box64-b79f86b8d1c864cc58d79730a628e72c56ea960d.tar.gz box64-b79f86b8d1c864cc58d79730a628e72c56ea960d.zip | |
[INTERPRETER] More avx/avx2 opcodes
Diffstat (limited to 'src')
| -rw-r--r-- | src/emu/x64runavx0f.c | 33 | ||||
| -rw-r--r-- | src/emu/x64runavx660f.c | 64 | ||||
| -rw-r--r-- | src/emu/x64runavx660f38.c | 136 | ||||
| -rw-r--r-- | src/emu/x64runavx660f3a.c | 53 | ||||
| -rw-r--r-- | src/emu/x64runavxf20f.c | 115 | ||||
| -rw-r--r-- | src/emu/x64runavxf30f.c | 137 |
6 files changed, 532 insertions, 6 deletions
diff --git a/src/emu/x64runavx0f.c b/src/emu/x64runavx0f.c index 34372ca6..8fec79f9 100644 --- a/src/emu/x64runavx0f.c +++ b/src/emu/x64runavx0f.c @@ -268,7 +268,20 @@ uintptr_t RunAVX_0F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) } else GY->u128 = 0; break; - + case 0x5A: /* VCVTPS2PD Gx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETGY; + if(vex.l) { + GETEY; + GY->d[1] = EX->f[3]; + GY->d[0] = EX->f[2]; + } else + GY->u128 = 0; + GX->d[1] = EX->f[1]; + GX->d[0] = EX->f[0]; + break; case 0x5B: /* VCVTDQ2PS Gx, Ex */ nextop = F8; GETEX(0); @@ -304,6 +317,24 @@ uintptr_t RunAVX_0F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) GY->u128 = 0; break; + case 0x5E: /* VDIVPS Gx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + for(int i=0; i<4; ++i) + GX->f[i] = VX->f[i] / EX->f[i]; + if(vex.l) { + GETEY; + GETVY; + for(int i=0; i<4; ++i) + GY->f[i] = VY->f[i] / EY->f[i]; + } else + GY->u128 = 0; + break; + + case 0x77: if(!vex.l) { // VZEROUPPER if(vex.v!=0) { diff --git a/src/emu/x64runavx660f.c b/src/emu/x64runavx660f.c index 3136433f..c8f23596 100644 --- a/src/emu/x64runavx660f.c +++ b/src/emu/x64runavx660f.c @@ -47,6 +47,7 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) reg64_t *oped, *opgd; sse_regs_t *opex, *opgx, *opvx, eax1; sse_regs_t *opey, *opgy, *opvy, eay1; + int is_nan; #ifdef TEST_INTERPRETER @@ -205,7 +206,40 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) } else GY->u128 = 0; break; - + + case 0x5E: /* VDIVPD Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + for (int i=0; i<2; ++i) { + #ifndef NOALIGN + is_nan = isnan(VX->d[i]) || isnan(EX->d[i]); + #endif + GX->d[i] = VX->d[i] / EX->d[i]; + #ifndef NOALIGN + if(!is_nan && isnan(GX->d[i])) + GX->d[i] = -NAN; + #endif + } + if(vex.l) { + GETEY; + GETVY; + for (int i=0; i<2; ++i) { + #ifndef NOALIGN + is_nan = isnan(VY->d[i]) || isnan(EY->d[i]); + #endif + GY->d[i] = VY->d[i] / EY->d[i]; + #ifndef NOALIGN + if(!is_nan && isnan(GY->d[i])) + GY->d[i] = -NAN; + #endif + } + } else + GY->u128 = 0; + break; + case 0x64: /* VPCMPGTB Gx,Vx, Ex */ nextop = F8; GETEX(0); @@ -615,6 +649,34 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) } break; + case 0xE6: /* CVTTPD2DQ Gx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETGY; + if(isnan(EX->d[0]) || isinf(EX->d[0]) || EX->d[0]>0x7fffffff) + GX->sd[0] = 0x80000000; + else + GX->sd[0] = EX->d[0]; + if(isnan(EX->d[1]) || isinf(EX->d[1]) || EX->d[1]>0x7fffffff) + GX->sd[1] = 0x80000000; + else + GX->sd[1] = EX->d[1]; + if(vex.l) { + GETEY; + if(isnan(EY->d[0]) || isinf(EY->d[0]) || EY->d[0]>0x7fffffff) + GX->sd[2] = 0x80000000; + else + GX->sd[2] = EY->d[0]; + if(isnan(EY->d[1]) || isinf(EY->d[1]) || EY->d[1]>0x7fffffff) + GX->sd[3] = 0x80000000; + else + GX->sd[3] = EY->d[1]; + } else + GX->q[1] = 0; + GY->u128 = 0; + break; + case 0xEB: /* VPOR Gx, Vx, Ex */ nextop = F8; GETEX(0); diff --git a/src/emu/x64runavx660f38.c b/src/emu/x64runavx660f38.c index 3ec1f0ff..ae6d1cbf 100644 --- a/src/emu/x64runavx660f38.c +++ b/src/emu/x64runavx660f38.c @@ -59,7 +59,7 @@ uintptr_t RunAVX_660F38(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) { uint8_t opcode; uint8_t nextop; - uint8_t tmp8u; + uint8_t tmp8u, u8; int8_t tmp8s; int32_t tmp32s, tmp32s2; uint32_t tmp32u, tmp32u2; @@ -233,6 +233,140 @@ uintptr_t RunAVX_660F38(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) GY->u128 = EX->u128; break; + case 0x92: /* VGATHERDPD/VGATHERDPS Gx, VSIB, Vx */ + nextop = F8; + if(((nextop&7)!=4) || MODREG) { + emit_signal(emu, SIGILL, (void*)R_RIP, 0); + } + GETGX; + GETVX; + GETGY; + GETVY; + tmp8u = F8; //SIB + // compute base + tmp64u = emu->regs[(tmp8u&0x7)+(rex.b<<3)].q[0]; + if(nextop&0x40) + tmp64u += F8S; + else if(nextop&0x80) + tmp64u += F32S; + // get vxmm + EX = &emu->xmm[((tmp8u>>3)&7)+(rex.x<<3)]; + EY = &emu->ymm[((tmp8u>>3)&7)+(rex.x<<3)]; + u8 = tmp8u>>6; + // prepare mask + if(!vex.l) + VY->u128 = 0; + if(rex.w) + for(int i=0; i<2; ++i) + VX->sq[i]>>=63; + else + for(int i=0; i<4; ++i) + VX->sd[i]>>=31; + // go gather + if(rex.w) { + for(int i=0; i<2; ++i) + if(VX->q[i]) { + GX->q[i] = *(uint64_t*)(tmp64u + (EX->sd[i]<<u8)); + VX->q[i] = 0; + } + } else { + for(int i=0; i<4; ++i) + if(VX->ud[i]) { + GX->ud[i] = *(uint32_t*)(tmp64u + (EX->sd[i]<<u8)); + VX->ud[i] = 0; + } + } + if(vex.l) { + if(rex.w) + for(int i=0; i<2; ++i) + VY->sq[i]>>=63; + else + for(int i=0; i<4; ++i) + VY->sd[i]>>=31; + if(rex.w) { + for(int i=0; i<2; ++i) + if(VY->q[i]) { + GY->q[i] = *(uint64_t*)(tmp64u + (EX->sd[2+i]<<u8)); + VY->q[i] = 0; + } + } else { + for(int i=0; i<4; ++i) + if(VY->ud[i]) { + GY->ud[i] = *(uint32_t*)(tmp64u + (EY->sd[i]<<u8)); + VY->ud[i] = 0; + } + } + } else + GY->u128 = 0; + break; + case 0x93: /* VGATHERQPD/VGATHERQPS Gx, VSIB, Vx */ + nextop = F8; + if(((nextop&7)!=4) || MODREG) { + emit_signal(emu, SIGILL, (void*)R_RIP, 0); + } + GETGX; + GETVX; + GETGY; + GETVY; + tmp8u = F8; //SIB + // compute base + tmp64u = emu->regs[(tmp8u&0x7)+(rex.b<<3)].q[0]; + if(nextop&0x40) + tmp64u += F8S; + else if(nextop&0x80) + tmp64u += F32S; + // get vxmm + EX = &emu->xmm[((tmp8u>>3)&7)+(rex.x<<3)]; + EY = &emu->ymm[((tmp8u>>3)&7)+(rex.x<<3)]; + u8 = tmp8u>>6; + // prepare mask + if(!vex.l) { + VY->u128 = 0; + } + if(!vex.l || !rex.w) + GY->u128 = 0; + if(rex.w) + for(int i=0; i<2; ++i) + VX->sq[i]>>=63; + else + for(int i=0; i<4; ++i) + VX->sd[i]>>=31; + // go gather + if(rex.w) { + for(int i=0; i<2; ++i) + if(VX->q[i]) { + GX->q[i] = *(uint64_t*)(tmp64u + (EX->sq[i]<<u8)); + VX->q[i] = 0; + } + } else { + for(int i=0; i<(vex.l?4:2); ++i) + if(VX->ud[i]) { + GX->ud[i] = *(uint32_t*)(tmp64u + (((i>1)?EY->sq[i-2]:EX->sq[i])<<u8)); + VX->ud[i] = 0; + } + } + if(vex.l) { + if(rex.w) + for(int i=0; i<2; ++i) + VY->sq[i]>>=63; + else + VY->u128=0; + if(rex.w) { + for(int i=0; i<2; ++i) + if(VY->q[i]) { + GY->q[i] = *(uint64_t*)(tmp64u + (EY->sq[i]<<u8)); + VY->q[i] = 0; + } + } else { + VY->u128 = 0; + } + } + if(!rex.w && !vex.l) { + GX->q[1] = 0; + VX->q[1] = 0; + } + break; + case 0xDB: /* VAESIMC Gx, Ex */ nextop = F8; GETEX(0); diff --git a/src/emu/x64runavx660f3a.c b/src/emu/x64runavx660f3a.c index cc5e784d..e09de6bf 100644 --- a/src/emu/x64runavx660f3a.c +++ b/src/emu/x64runavx660f3a.c @@ -57,6 +57,7 @@ uintptr_t RunAVX_660F3A(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) int64_t tmp64s; reg64_t *oped, *opgd; float tmpf; + double tmpd; sse_regs_t *opex, *opgx, *opvx, eax1; sse_regs_t *opey, *opgy, *opvy, eay1; // AES opcodes constants @@ -170,6 +171,14 @@ uintptr_t RunAVX_660F3A(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) } break; + case 0x17: // VEXTRACTPS ED, GX, u8 + nextop = F8; + GETED(1); + GETGX; + tmp8u = F8; + ED->dword[0] = GX->ud[tmp8u&3]; + if(MODREG) ED->dword[1] = 0; + break; case 0x18: /* VINSERTF128 Gx, Ex, imm8 */ nextop = F8; GETEX(1); @@ -217,7 +226,20 @@ uintptr_t RunAVX_660F3A(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) GY->u128 = 0; break; - case 0x40: /* DPPS Gx, Ex, Ib */ + case 0x39: /* VEXTRACTI128 Ex, Gx, Ib */ + nextop = F8; + GETGX; + GETEX(1); + GETGY; + tmp8u = F8; + EX->u128 = (tmp8u&1)?GY->u128:GX->u128; + if(MODREG) { + GETEY; + EY->u128 = 0; + } + break; + + case 0x40: /* VDPPS Gx, VX, Ex, Ib */ nextop = F8; GETEX(1); GETGX; @@ -231,6 +253,8 @@ uintptr_t RunAVX_660F3A(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) for(int i=0; i<4; ++i) GX->f[i] = (tmp8u&(1<<i))?tmpf:0.0f; if(vex.l) { + GETEY; + GETVY; tmpf = 0.0f; for(int i=0; i<4; ++i) if(tmp8u&(1<<(i+4))) @@ -240,6 +264,33 @@ uintptr_t RunAVX_660F3A(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) } else GY->u128 = 0; break; + case 0x41: /* VDPPD Gx, Vx, Ex, Ib */ + nextop = F8; + GETEX(1); + GETGX; + GETVX; + GETGY; + tmp8u = F8; + tmpd = 0.0; + if(tmp8u&(1<<(4+0))) + tmpd += VX->d[0]*EX->d[0]; + if(tmp8u&(1<<(4+1))) + tmpd += VX->d[1]*EX->d[1]; + GX->d[0] = (tmp8u&(1<<(0)))?tmpd:0.0; + GX->d[1] = (tmp8u&(1<<(1)))?tmpd:0.0; + if(vex.l) { + GETEY; + GETVY; + tmpd = 0.0; + if(tmp8u&(1<<(4+0))) + tmpd += VY->d[0]*EY->d[0]; + if(tmp8u&(1<<(4+1))) + tmpd += VY->d[1]*EY->d[1]; + GY->d[0] = (tmp8u&(1<<(0)))?tmpd:0.0; + GY->d[1] = (tmp8u&(1<<(1)))?tmpd:0.0; + } else + GY->u128 = 0; + break; case 0x44: /* VPCLMULQDQ Gx, Vx, Ex, imm8 */ nextop = F8; diff --git a/src/emu/x64runavxf20f.c b/src/emu/x64runavxf20f.c index 642946b5..fc51a3a4 100644 --- a/src/emu/x64runavxf20f.c +++ b/src/emu/x64runavxf20f.c @@ -48,6 +48,7 @@ uintptr_t RunAVX_F20F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) reg64_t *oped, *opgd; sse_regs_t *opex, *opgx, *opvx, eax1; sse_regs_t *opey, *opgy, *opvy, eay1; + int is_nan; #ifdef TEST_INTERPRETER @@ -86,6 +87,90 @@ uintptr_t RunAVX_F20F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) } break; + case 0x2A: /* VCVTSI2SD Gx, Vx, Ed */ + nextop = F8; + GETED(0); + GETGX; + GETVX; + GETGY; + if(rex.w) { + GX->d[0] = ED->sq[0]; + } else { + GX->d[0] = ED->sdword[0]; + } + GX->q[1] = VX->q[1]; + GY->u128 = 0; + break; + + case 0x2C: /* VCVTTSD2SI Gd, Ex */ + nextop = F8; + GETEX(0); + GETGD; + if(rex.w) + if(isnan(EX->d[0]) || isinf(EX->d[0]) || EX->d[0]>0x7fffffffffffffffLL) + GD->q[0] = 0x8000000000000000LL; + else + GD->sq[0] = EX->d[0]; + else { + if(isnan(EX->d[0]) || isinf(EX->d[0]) || EX->d[0]>0x7fffffff) + GD->dword[0] = 0x80000000; + else + GD->sdword[0] = EX->d[0]; + GD->dword[1] = 0; + } + break; + case 0x2D: /* VCVTSD2SI Gd, Ex */ + nextop = F8; + GETEX(0); + GETGD; + if(rex.w) { + if(isnan(EX->d[0]) || isinf(EX->d[0]) || EX->d[0]>0x7fffffffffffffffLL) + GD->q[0] = 0x8000000000000000LL; + else + switch(emu->mxcsr.f.MXCSR_RC) { + case ROUND_Nearest: { + int round = fegetround(); + fesetround(FE_TONEAREST); + GD->sq[0] = nearbyint(EX->d[0]); + fesetround(round); + break; + } + case ROUND_Down: + GD->sq[0] = floor(EX->d[0]); + break; + case ROUND_Up: + GD->sq[0] = ceil(EX->d[0]); + break; + case ROUND_Chop: + GD->sq[0] = EX->d[0]; + break; + } + } else { + if(isnan(EX->d[0]) || isinf(EX->d[0]) || EX->d[0]>0x7fffffff) + GD->dword[0] = 0x80000000; + else + switch(emu->mxcsr.f.MXCSR_RC) { + case ROUND_Nearest: { + int round = fegetround(); + fesetround(FE_TONEAREST); + GD->sdword[0] = nearbyint(EX->d[0]); + fesetround(round); + break; + } + case ROUND_Down: + GD->sdword[0] = floor(EX->d[0]); + break; + case ROUND_Up: + GD->sdword[0] = ceil(EX->d[0]); + break; + case ROUND_Chop: + GD->sdword[0] = EX->d[0]; + break; + } + GD->dword[1] = 0; + } + break; + case 0x58: /* VADDSD Gx, Vx, Ex */ nextop = F8; GETEX(0); @@ -99,6 +184,36 @@ uintptr_t RunAVX_F20F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) GY->u128 = 0; break; + case 0x5A: /* VCVTSD2SS Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + GX->f[0] = EX->d[0]; + GX->ud[1] = VX->ud[1]; + GX->q[1] = VX->q[1]; + GY->u128 = 0; + break; + + case 0x5E: /* VDIVSD Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + #ifndef NOALIGN + is_nan = isnan(VX->d[0]) || isnan(EX->d[0]); + #endif + GX->d[0] = VX->d[0] / EX->d[0]; + #ifndef NOALIGN + if(!is_nan && isnan(GX->d[0])) + GX->d[0] = -NAN; + #endif + GX->q[1] = VX->q[1]; + GY->u128 = 0; + break; + case 0xC2: /* VCMPSD Gx, Vx, Ex, Ib */ nextop = F8; GETEX(1); diff --git a/src/emu/x64runavxf30f.c b/src/emu/x64runavxf30f.c index 98fb8b4d..9afb1c10 100644 --- a/src/emu/x64runavxf30f.c +++ b/src/emu/x64runavxf30f.c @@ -87,6 +87,94 @@ uintptr_t RunAVX_F30F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) } break; + case 0x2A: /* VCVTSI2SS Gx, Vx, Ed */ + nextop = F8; + GETED(0); + GETGX; + GETVX; + GETGY; + if(rex.w) + GX->f[0] = ED->sq[0]; + else + GX->f[0] = ED->sdword[0]; + GX->ud[1] = VX->ud[1]; + GX->q[1] = VX->q[1]; + GY->u128 = 0; + break; + + case 0x2C: /* VCVTTSS2SI Gd, Ex */ + nextop = F8; + GETEX(0); + GETGD; + if (rex.w) { + if(isnanf(EX->f[0]) || isinff(EX->f[0]) || EX->f[0]>(float)0x7fffffffffffffffLL) + GD->q[0] = 0x8000000000000000LL; + else + GD->sq[0] = EX->f[0]; + } else { + if(isnanf(EX->f[0]) || isinff(EX->f[0]) || EX->f[0]>0x7fffffff) + GD->dword[0] = 0x80000000; + else + GD->sdword[0] = EX->f[0]; + GD->dword[1] = 0; + } + break; + case 0x2D: /* VCVTSS2SI Gd, Ex */ + nextop = F8; + GETEX(0); + GETGD; + if(rex.w) { + if(isnanf(EX->f[0]) || isinff(EX->f[0]) || EX->f[0]>(float)0x7fffffffffffffffLL) + GD->q[0] = 0x8000000000000000LL; + else + switch(emu->mxcsr.f.MXCSR_RC) { + case ROUND_Nearest: { + int round = fegetround(); + fesetround(FE_TONEAREST); + GD->sq[0] = nearbyintf(EX->f[0]); + fesetround(round); + break; + } + case ROUND_Down: + GD->sq[0] = floorf(EX->f[0]); + break; + case ROUND_Up: + GD->sq[0] = ceilf(EX->f[0]); + break; + case ROUND_Chop: + GD->sq[0] = EX->f[0]; + break; + } + } else { + if(isnanf(EX->f[0])) + tmp64s = INT32_MIN; + else + switch(emu->mxcsr.f.MXCSR_RC) { + case ROUND_Nearest: { + int round = fegetround(); + fesetround(FE_TONEAREST); + tmp64s = nearbyintf(EX->f[0]); + fesetround(round); + break; + } + case ROUND_Down: + tmp64s = floorf(EX->f[0]); + break; + case ROUND_Up: + tmp64s = ceilf(EX->f[0]); + break; + case ROUND_Chop: + tmp64s = EX->f[0]; + break; + } + if (tmp64s==(int32_t)tmp64s) + GD->sdword[0] = (int32_t)tmp64s; + else + GD->sdword[0] = INT32_MIN; + GD->dword[1] = 0; + } + break; + case 0x58: /* VADDSS Gx, Vx, Ex */ nextop = F8; GETEX(0); @@ -111,7 +199,38 @@ uintptr_t RunAVX_F30F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) GX->q[1] = VX->q[1]; GY->q[0] = GY->q[1] = 0; break; - + case 0x5B: /* VCVTTPS2DQ Gx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETGY; + for(int i=0; i<4; ++i) { + if(isnanf(EX->f[i])) + tmp64s = INT32_MIN; + else + tmp64s = EX->f[i]; + if (tmp64s==(int32_t)tmp64s) { + GX->sd[i] = (int32_t)tmp64s; + } else { + GX->sd[i] = INT32_MIN; + } + } + if(vex.l) { + GETEY; + for(int i=0; i<4; ++i) { + if(isnanf(EY->f[i])) + tmp64s = INT32_MIN; + else + tmp64s = EY->f[i]; + if (tmp64s==(int32_t)tmp64s) { + GY->sd[i] = (int32_t)tmp64s; + } else { + GY->sd[i] = INT32_MIN; + } + } + } else + GY->u128 = 0; + break; case 0x5C: /* VSUBSS Gx, Vx, Ex */ nextop = F8; GETEX(0); @@ -123,7 +242,21 @@ uintptr_t RunAVX_F30F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) GX->ud[1] = VX->ud[1]; GX->q[1] = VX->q[1]; } - GY->q[0] = GY->q[1] = 0; + GY->u128 = 0; + break; + + case 0x5E: /* VDIVSS Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + GX->f[0] = VX->f[0] / EX->f[0]; + if(GX!=VX) { + GX->ud[1] = VX->ud[1]; + GX->q[1] = VX->q[1]; + } + GY->u128 = 0; break; case 0x6F: // VMOVDQU Gx, Ex |