diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2024-05-29 16:36:43 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2024-05-29 16:36:43 +0200 |
| commit | 22bc6872f4aac9964b8bc0eca0540cd4dfbef629 (patch) | |
| tree | 8e80348af4ac25a683f20dde0acd5be372ccd694 /src | |
| parent | f6fe84afad62e3cda2855ef41c09580ec0dde75a (diff) | |
| download | box64-22bc6872f4aac9964b8bc0eca0540cd4dfbef629.tar.gz box64-22bc6872f4aac9964b8bc0eca0540cd4dfbef629.zip | |
[INTERPRETER] Last batch of avx/avx2 opcode
Diffstat (limited to 'src')
| -rw-r--r-- | src/emu/x64runavx0f.c | 73 | ||||
| -rw-r--r-- | src/emu/x64runavx660f.c | 87 | ||||
| -rw-r--r-- | src/emu/x64runavx660f38.c | 52 | ||||
| -rw-r--r-- | src/emu/x64runavx660f3a.c | 179 | ||||
| -rw-r--r-- | src/emu/x64runavxf20f.c | 26 | ||||
| -rw-r--r-- | src/emu/x64runavxf30f.c | 47 |
6 files changed, 451 insertions, 13 deletions
diff --git a/src/emu/x64runavx0f.c b/src/emu/x64runavx0f.c index 8283cb72..adead48d 100644 --- a/src/emu/x64runavx0f.c +++ b/src/emu/x64runavx0f.c @@ -129,7 +129,23 @@ uintptr_t RunAVX_0F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) } else GY->u128 = 0; break; - + case 0x15: /* VUNPCKHPS Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; GETVX; GETGY; + GX->ud[0] = VX->ud[2]; + GX->ud[1] = EX->ud[2]; + GX->ud[2] = VX->ud[3]; + GX->ud[3] = EX->ud[3]; + if(vex.l) { + GETEY; GETVY; + GY->ud[0] = VY->ud[2]; + GY->ud[1] = EY->ud[2]; + GY->ud[2] = VY->ud[3]; + GY->ud[3] = EY->ud[3]; + } else + GY->u128 = 0; + break; case 0x16: nextop = F8; GETEX(0); @@ -194,6 +210,7 @@ uintptr_t RunAVX_0F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) } break; + case 0x2E: /* VUCOMISS Gx, Ex */ case 0x2F: /* VCOMISS Gx, Ex */ RESET_FLAGS(emu); nextop = F8; @@ -224,7 +241,19 @@ uintptr_t RunAVX_0F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) GD->dword[0] |= ((EY->ud[i]>>31)&1)<<(i+4); } break; - + case 0x51: /* VSQRTPS Gx, Ex */ + nextop = F8; + GETEX(0); + GETGX; GETGY; + for(int i=0; i<4; ++i) + GX->f[i] = (EX->f[i]<0)?(-NAN):sqrtf(EX->f[i]); + if(vex.l) { + GETEY; + for(int i=0; i<4; ++i) + GY->f[i] = (EY->f[i]<0)?(-NAN):sqrtf(EY->f[i]); + } else + GY->u128 = 0; + break; case 0x52: /* VRSQRTPS Gx, Ex */ nextop = F8; GETEX(0); @@ -262,7 +291,22 @@ uintptr_t RunAVX_0F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) test->notest = 1; #endif break; - + case 0x53: /* VRCPPS Gx, Ex */ + nextop = F8; + GETEX(0); + GETGX; GETGY; + for(int i=0; i<4; ++i) + GX->f[i] = 1.0f/EX->f[i]; + if(vex.l) { + GETEY; + for(int i=0; i<4; ++i) + GY->f[i] = 1.0f/EY->f[i]; + } else + GY->u128 = 0; + #ifdef TEST_INTERPRETER + test->notest = 1; + #endif + break; case 0x54: /* VANDPS Gx, Vx, Ex */ nextop = F8; GETEX(0); @@ -475,6 +519,29 @@ uintptr_t RunAVX_0F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) return 0; break; + case 0xAE: /* Grp Ed (SSE) */ + nextop = F8; + if(MODREG) + return 0; + else + switch((nextop>>3)&7) { + case 2: /* VLDMXCSR Md */ + GETED(0); + emu->mxcsr.x32 = ED->dword[0]; + #ifndef TEST_INTERPRETER + if(box64_sse_flushto0) + applyFlushTo0(emu); + #endif + break; + case 3: /* VSTMXCSR Md */ + GETED(0); + ED->dword[0] = emu->mxcsr.x32; + break; + default: + return 0; + } + break; + case 0xC2: /* VCMPPS Gx, Vx, Ex, Ib */ nextop = F8; GETEX(1); diff --git a/src/emu/x64runavx660f.c b/src/emu/x64runavx660f.c index 3425a864..b136738b 100644 --- a/src/emu/x64runavx660f.c +++ b/src/emu/x64runavx660f.c @@ -99,7 +99,32 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) GETGX; ED->q[0] = GX->q[0]; break; - + case 0x14: /* VUNPCKLPD Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; GETVX; GETGY; + GX->q[1] = EX->q[0]; + GX->q[0] = VX->q[0]; + if(vex.l) { + GETEY; GETVY; + GY->q[1] = EY->q[0]; + GY->q[0] = VY->q[0]; + } else + GY->u128 = 0; + break; + case 0x15: /* VUNPCKHPD Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; GETVX; GETGY; + GX->q[0] = VX->q[1]; + GX->q[1] = EX->q[1]; + if(vex.l) { + GETEY; GETVY; + GY->q[0] = VY->q[1]; + GY->q[1] = EY->q[1]; + } else + GY->u128 = 0; + break; case 0x16: /* VMOVHPD Gx, Vx, Ed */ nextop = F8; GETE8(0); @@ -159,6 +184,7 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) } break; + case 0x2E: /* VUCOMISD Gx, Ex */ case 0x2F: /* VCOMISD Gx, Ex */ RESET_FLAGS(emu); nextop = F8; @@ -184,6 +210,31 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) for(int i=0; i<2; ++i) GD->dword[0] |= ((EX->q[i]>>63)&1)<<i; break; + case 0x51: /* VSQRTPD Gx, Ex */ + nextop = F8; + GETEX(0); + GETGX; GETGY; + for (int i=0; i<2; ++i) { + #ifndef NOALIGN + if(EX->d[i]<0.0) // on x86, default nan are negative + GX->d[i] = -NAN; // but input NAN are not touched (so sqrt(+nan) -> +nan) + else + #endif + GX->d[i] = sqrt(EX->d[i]); + } + if(vex.l) { + GETEY; + for (int i=0; i<2; ++i) { + #ifndef NOALIGN + if(EY->d[i]<0.0) + GY->d[i] = -NAN; + else + #endif + GY->d[i] = sqrt(EY->d[i]); + } + } else + GY->u128 = 0; + break; case 0x54: /* VANDPD Gx, Vx, Ex */ nextop = F8; @@ -373,7 +424,23 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) } else GY->u128 = 0; break; - + case 0x5C: /* VSUBPD Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GX->d[0] = VX->d[0] - EX->d[0]; + GX->d[1] = VX->d[1] - EX->d[1]; + GETGY; + if(vex.l) { + GETEY; + GETVY; + GY->d[0] = VY->d[0] - EY->d[0]; + GY->d[1] = VY->d[1] - EY->d[1]; + } else { + GY->u128 = 0; + } + break; case 0x5D: /* VMINPD Gx, Vx, Ex */ nextop = F8; GETEX(0); @@ -1232,6 +1299,22 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) tmp8u = F8; GD->q[0] = EX->uw[tmp8u&7]; // 16bits extract, 0 extended break; + case 0xC6: /* VSHUFPD Gx, Vx, Ex, Ib */ + nextop = F8; + GETEX(1); + GETGX; GETVX; GETGY; + tmp8u = F8; + eax1.q[0] = VX->q[tmp8u&1]; + eax1.q[1] = EX->q[(tmp8u>>1)&1]; + GX->u128 = eax1.u128; + if(vex.l) { + GETEY; GETVY; + eax1.q[0] = VY->q[(tmp8u>>2)&1]; + eax1.q[1] = EY->q[(tmp8u>>3)&1]; + GY->u128 = eax1.u128; + } else + GY->u128 = 0; + break; case 0xD0: /* VADDSUBPD Gx, Vx, Ex */ nextop = F8; diff --git a/src/emu/x64runavx660f38.c b/src/emu/x64runavx660f38.c index e4de81c2..24acf83c 100644 --- a/src/emu/x64runavx660f38.c +++ b/src/emu/x64runavx660f38.c @@ -468,6 +468,58 @@ uintptr_t RunAVX_660F38(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) } else GY->u128 = 0; break; + case 0x0E: /* VTESTPS Gx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + if(vex.l) {GETEY; GETGY;} + // ZF + u8 = 0; + for(int i=0; i<4 && !u8; ++i) + u8 |= ((EX->ud[i]>>31)&(GX->ud[i]>>31)); + if(vex.l && !u8) + for(int i=0; i<4 && !u8; ++i) + u8 |= ((EY->ud[i]>>31)&(GY->ud[i]>>31)); + CONDITIONAL_SET_FLAG(!u8, F_ZF); + // CF + u8 = 0; + for(int i=0; i<4 && !u8; ++i) + u8 |= ((EX->ud[i]>>31)&((~GX->ud[i])>>31)); + if(vex.l && !u8) + for(int i=0; i<4 && !u8; ++i) + u8 |= ((EY->ud[i]>>31)&((~GY->ud[i])>>31)); + CONDITIONAL_SET_FLAG(!u8, F_CF); + CLEAR_FLAG(F_AF); + CLEAR_FLAG(F_OF); + CLEAR_FLAG(F_SF); + CLEAR_FLAG(F_PF); + break; + case 0x0F: /* VTESTPD Gx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + if(vex.l) {GETEY; GETGY;} + // ZF + u8 = 0; + for(int i=0; i<2 && !u8; ++i) + u8 |= ((EX->q[i]>>63)&(GX->q[i]>>63)); + if(vex.l && !u8) + for(int i=0; i<2 && !u8; ++i) + u8 |= ((EY->q[i]>>63)&(GY->q[i]>>63)); + CONDITIONAL_SET_FLAG(!u8, F_ZF); + // CF + u8 = 0; + for(int i=0; i<2 && !u8; ++i) + u8 |= ((EX->q[i]>>63)&((~GX->q[i])>>63)); + if(vex.l && !u8) + for(int i=0; i<2 && !u8; ++i) + u8 |= ((EY->q[i]>>63)&((~GY->q[i])>>63)); + CONDITIONAL_SET_FLAG(!u8, F_CF); + CLEAR_FLAG(F_AF); + CLEAR_FLAG(F_OF); + CLEAR_FLAG(F_SF); + CLEAR_FLAG(F_PF); + break; case 0x16: /* VPERMPS Gx, Vx, Ex */ // same code as 0x36 diff --git a/src/emu/x64runavx660f3a.c b/src/emu/x64runavx660f3a.c index baf9fb21..eca2dff0 100644 --- a/src/emu/x64runavx660f3a.c +++ b/src/emu/x64runavx660f3a.c @@ -213,6 +213,185 @@ uintptr_t RunAVX_660F3A(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) } break; + case 0x08: // VROUNDPS Gx, Ex, u8 + nextop = F8; + GETEX(1); + GETGX; GETGY; + tmp8u = F8; // ignoring bit 3 interupt thingy + if(tmp8u&4) + tmp8u = emu->mxcsr.f.MXCSR_RC; + else + tmp8u &= 3; + switch(tmp8u) { + case ROUND_Nearest: { + int round = fegetround(); + fesetround(FE_TONEAREST); + for(int i=0; i<4; ++i) + GX->f[i] = nearbyintf(EX->f[i]); + fesetround(round); + break; + } + case ROUND_Down: + for(int i=0; i<4; ++i) + GX->f[i] = floorf(EX->f[i]); + break; + case ROUND_Up: + for(int i=0; i<4; ++i) + GX->f[i] = ceilf(EX->f[i]); + break; + case ROUND_Chop: + for(int i=0; i<4; ++i) + GX->f[i] = truncf(EX->f[i]); + break; + } + if(vex.l) { + GETEY; + switch(tmp8u) { + case ROUND_Nearest: { + int round = fegetround(); + fesetround(FE_TONEAREST); + for(int i=0; i<4; ++i) + GY->f[i] = nearbyintf(EY->f[i]); + fesetround(round); + break; + } + case ROUND_Down: + for(int i=0; i<4; ++i) + GY->f[i] = floorf(EY->f[i]); + break; + case ROUND_Up: + for(int i=0; i<4; ++i) + GY->f[i] = ceilf(EY->f[i]); + break; + case ROUND_Chop: + for(int i=0; i<4; ++i) + GY->f[i] = truncf(EY->f[i]); + break; + } + } else + GY->u128 = 0; + break; + case 0x09: // VROUNDPD Gx, Ex, u8 + nextop = F8; + GETEX(1); + GETGX; GETGY; + tmp8u = F8; // ignoring bit 3 interupt thingy + if(tmp8u&4) + tmp8u = emu->mxcsr.f.MXCSR_RC; + else + tmp8u &= 3; + switch(tmp8u) { + case ROUND_Nearest: { + int round = fegetround(); + fesetround(FE_TONEAREST); + GX->d[0] = nearbyint(EX->d[0]); + GX->d[1] = nearbyint(EX->d[1]); + fesetround(round); + break; + } + case ROUND_Down: + GX->d[0] = floor(EX->d[0]); + GX->d[1] = floor(EX->d[1]); + break; + case ROUND_Up: + GX->d[0] = ceil(EX->d[0]); + GX->d[1] = ceil(EX->d[1]); + break; + case ROUND_Chop: + GX->d[0] = trunc(EX->d[0]); + GX->d[1] = trunc(EX->d[1]); + break; + } + if(vex.l) { + GETEY; + switch(tmp8u) { + case ROUND_Nearest: { + int round = fegetround(); + fesetround(FE_TONEAREST); + GY->d[0] = nearbyint(EY->d[0]); + GY->d[1] = nearbyint(EY->d[1]); + fesetround(round); + break; + } + case ROUND_Down: + GY->d[0] = floor(EY->d[0]); + GY->d[1] = floor(EY->d[1]); + break; + case ROUND_Up: + GY->d[0] = ceil(EY->d[0]); + GY->d[1] = ceil(EY->d[1]); + break; + case ROUND_Chop: + GY->d[0] = trunc(EY->d[0]); + GY->d[1] = trunc(EY->d[1]); + break; + } + } else + GY->u128 = 0; + break; + case 0x0A: // VROUNDSS Gx, Vx, Ex, u8 + nextop = F8; + GETEX(1); + GETGX; GETVX; GETGY; + tmp8u = F8; // ignoring bit 3 interupt thingy + if(tmp8u&4) + tmp8u = emu->mxcsr.f.MXCSR_RC; + else + tmp8u &= 3; + switch(tmp8u) { + case ROUND_Nearest: { + int round = fegetround(); + fesetround(FE_TONEAREST); + GX->f[0] = nearbyintf(EX->f[0]); + fesetround(round); + break; + } + case ROUND_Down: + GX->f[0] = floorf(EX->f[0]); + break; + case ROUND_Up: + GX->f[0] = ceilf(EX->f[0]); + break; + case ROUND_Chop: + GX->f[0] = truncf(EX->f[0]); + break; + } + if(GX!=VX) { + GX->ud[1] = VX->ud[1]; + GX->q[1] = VX->q[1]; + } + GY->u128 = 0; + break; + case 0x0B: // VROUNDSD Gx, Vx, Ex, u8 + nextop = F8; + GETEX(1); + GETGX; GETVX; GETGY; + tmp8u = F8; // ignoring bit 3 interupt thingy + if(tmp8u&4) + tmp8u = emu->mxcsr.f.MXCSR_RC; + else + tmp8u &= 3; + switch(tmp8u) { + case ROUND_Nearest: { + int round = fegetround(); + fesetround(FE_TONEAREST); + GX->d[0] = nearbyint(EX->d[0]); + fesetround(round); + break; + } + case ROUND_Down: + GX->d[0] = floor(EX->d[0]); + break; + case ROUND_Up: + GX->d[0] = ceil(EX->d[0]); + break; + case ROUND_Chop: + GX->d[0] = trunc(EX->d[0]); + break; + } + GX->q[1] = VX->q[1]; + GY->u128 = 0; + break; case 0x0C: /* VBLENDPS Gx, Vx, Ex, u8 */ nextop = F8; GETEX(1); diff --git a/src/emu/x64runavxf20f.c b/src/emu/x64runavxf20f.c index c0eff9d0..3da16c92 100644 --- a/src/emu/x64runavxf20f.c +++ b/src/emu/x64runavxf20f.c @@ -183,6 +183,18 @@ uintptr_t RunAVX_F20F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) } break; + case 0x51: /* VSQRTSD Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; GETVX; GETGY; + if(EX->d[0]<0.0 ) + GX->d[0] = -NAN; + else + GX->d[0] = sqrt(EX->d[0]); + GX->q[1] = VX->q[1]; + GY->u128 = 0; + break; + case 0x58: /* VADDSD Gx, Vx, Ex */ nextop = F8; GETEX(0); @@ -190,9 +202,7 @@ uintptr_t RunAVX_F20F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) GETVX; GETGY; GX->d[0] = VX->d[0] + EX->d[0]; - if(GX!=VX) { - GX->q[1] = VX->q[1]; - } + GX->q[1] = VX->q[1]; GY->u128 = 0; break; case 0x59: /* VMULSD Gx, Vx, Ex */ @@ -225,6 +235,16 @@ uintptr_t RunAVX_F20F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) GY->u128 = 0; break; + case 0x5C: /* VSUBSD Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + GX->d[0] = VX->d[0] - EX->d[0]; + GX->q[1] = VX->q[1]; + GY->u128 = 0; + break; case 0x5D: /* VMINSD Gx, Vx, Ex */ nextop = F8; GETEX(0); diff --git a/src/emu/x64runavxf30f.c b/src/emu/x64runavxf30f.c index 541fca9e..511463fd 100644 --- a/src/emu/x64runavxf30f.c +++ b/src/emu/x64runavxf30f.c @@ -76,7 +76,7 @@ uintptr_t RunAVX_F30F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) GX->ud[1] = GX->ud[2] = GX->ud[3] = 0; } GETGY; - GY->q[0] = GY->q[1] = 0; + GY->u128 = 0; break; case 0x11: /* MOVSS Ex Gx */ nextop = F8; @@ -209,6 +209,43 @@ uintptr_t RunAVX_F30F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) } break; + case 0x51: /* VSQRTSS Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; GETVX; GETGY; + if(EX->f[0]<0.0 ) + GX->f[0] = -NAN; + else + GX->f[0] = sqrt(EX->f[0]); + if(GX!=VX) { + GX->ud[1] = VX->ud[1]; + GX->q[1] = VX->q[1]; + } + GY->u128 = 0; + break; + case 0x52: /* VRSQRTSS Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; GETVX; GETGY; + GX->f[0] = 1.0f/sqrtf(EX->f[0]); + if(GX!=VX) { + GX->ud[1] = VX->ud[1]; + GX->q[1] = VX->q[1]; + } + GY->u128 = 0; + break; + case 0x53: /* VRCPSS Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; GETVX; GETGY; + GX->f[0] = 1.0f/EX->f[0]; + if(GX!=VX) { + GX->ud[1] = VX->ud[1]; + GX->q[1] = VX->q[1]; + } + GY->u128 = 0; + break; + case 0x58: /* VADDSS Gx, Vx, Ex */ nextop = F8; GETEX(0); @@ -220,7 +257,7 @@ uintptr_t RunAVX_F30F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) GX->ud[1] = VX->ud[1]; GX->q[1] = VX->q[1]; } - GY->q[0] = GY->q[1] = 0; + GY->u128 = 0; break; case 0x59: /* VMULSS Gx, Vx, Ex */ nextop = F8; @@ -233,7 +270,7 @@ uintptr_t RunAVX_F30F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) GX->ud[1] = VX->ud[1]; GX->q[1] = VX->q[1]; } - GY->q[0] = GY->q[1] = 0; + GY->u128 = 0; break; case 0x5A: /* VCVTSS2SD Gx, Vx, Ex */ nextop = F8; @@ -243,7 +280,7 @@ uintptr_t RunAVX_F30F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) GETGY; GX->d[0] = EX->f[0]; GX->q[1] = VX->q[1]; - GY->q[0] = GY->q[1] = 0; + GY->u128 = 0; break; case 0x5B: /* VCVTTPS2DQ Gx, Ex */ nextop = F8; @@ -351,7 +388,7 @@ uintptr_t RunAVX_F30F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) GETEY; memcpy(GY, EY, 16); } else - GY->q[0] = GY->q[1] = 0; + GY->u128 = 0; break; case 0x70: /* VPSHUFHW Gx, Ex, Ib */ nextop = F8; |