diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2024-05-28 19:55:15 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2024-05-28 19:55:15 +0200 |
| commit | 503eb41939a915454aa5ca95626560ece19aab30 (patch) | |
| tree | 550e6dfd037930eb969ab94e1e65296c00f9dfe5 /src | |
| parent | a77db3c6a5d35140fbfb9ad717b4b36ba30f2972 (diff) | |
| download | box64-503eb41939a915454aa5ca95626560ece19aab30.tar.gz box64-503eb41939a915454aa5ca95626560ece19aab30.zip | |
[INTERPRETER] more and more avx/avx2 opcodes
Diffstat (limited to 'src')
| -rw-r--r-- | src/emu/x64runavx0f.c | 16 | ||||
| -rw-r--r-- | src/emu/x64runavx660f.c | 144 | ||||
| -rw-r--r-- | src/emu/x64runavx660f38.c | 84 | ||||
| -rw-r--r-- | src/emu/x64runavx660f3a.c | 55 | ||||
| -rw-r--r-- | src/emu/x64runavxf20f.c | 19 | ||||
| -rw-r--r-- | src/emu/x64runavxf30f.c | 14 |
6 files changed, 321 insertions, 11 deletions
diff --git a/src/emu/x64runavx0f.c b/src/emu/x64runavx0f.c index 1644836a..8283cb72 100644 --- a/src/emu/x64runavx0f.c +++ b/src/emu/x64runavx0f.c @@ -293,7 +293,21 @@ uintptr_t RunAVX_0F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) GY->u128 = 0; } break; - + case 0x56: /* VORPS Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GX->u128 = VX->u128 | EX->u128; + GETGY; + if(vex.l) { + GETEY; + GETVY; + GY->u128 = VY->u128 | EY->u128; + } else { + GY->u128 = 0; + } + break; case 0x57: /* XORPS Gx, Vx, Ex */ nextop = F8; GETEX(0); diff --git a/src/emu/x64runavx660f.c b/src/emu/x64runavx660f.c index 1539fed6..6ae59163 100644 --- a/src/emu/x64runavx660f.c +++ b/src/emu/x64runavx660f.c @@ -214,6 +214,21 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) GY->u128 = 0; } break; + case 0x56: /* VORPD Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GX->u128 = VX->u128 | EX->u128; + GETGY; + if(vex.l) { + GETEY; + GETVY; + GY->u128 = VY->u128 | EY->u128; + } else { + GY->u128 = 0; + } + break; case 0x58: /* VADDPD Gx, Vx, Ex */ nextop = F8; @@ -232,7 +247,36 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) GY->u128 = 0; } break; - + case 0x59: /* MULPD Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + for(int i=0; i<2; ++i) { + #ifndef NOALIGN + // mul generate a -NAN only if doing (+/-)inf * (+/-)0 + if((isinf(VX->d[i]) && EX->d[i]==0.0) || (isinf(EX->d[i]) && VX->d[i]==0.0)) + GX->d[i] = -NAN; + else + #endif + GX->d[i] = VX->d[i] * EX->d[i]; + } + if(vex.l) { + GETEY; + GETVY; + for(int i=0; i<2; ++i) { + #ifndef NOALIGN + // mul generate a -NAN only if doing (+/-)inf * (+/-)0 + if((isinf(VY->d[i]) && EY->d[i]==0.0) || (isinf(EY->d[i]) && VY->d[i]==0.0)) + GY->d[i] = -NAN; + else + #endif + GY->d[i] = VY->d[i] * EY->d[i]; + } + } else + GY->u128 = 0; + break; case 0x5A: /* VCVTPD2PS Gx, Ex */ nextop = F8; GETEX(0); @@ -392,6 +436,46 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) GY->u128 = 0; break; + case 0x63: /* VPACKSSWB Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + GETEY; + GETVY; + if(GX==EX) { + eax1 = *EX; + EX = &eax1; + } + if(VX==EX) { + for(int i=0; i<8; ++i) + GX->sb[i] = (EX->sw[i]<-128)?-128:((EX->sw[i]>127)?127:EX->sw[i]); + GX->q[1] = GX->q[0]; + } else { + for(int i=0; i<8; ++i) + GX->sb[i] = (VX->sw[i]<-128)?-128:((VX->sw[i]>127)?127:VX->sw[i]); + for(int i=0; i<8; ++i) + GX->sb[8+i] = (EX->sw[i]<-128)?-128:((EX->sw[i]>127)?127:EX->sw[i]); + } + if(vex.l) { + if(GY==EY) { + eay1 = *EY; + EY = &eay1; + } + if(VY==EY) { + for(int i=0; i<8; ++i) + GY->sb[i] = (EY->sw[i]<-128)?-128:((EY->sw[i]>127)?127:EY->sw[i]); + GY->q[1] = GY->q[0]; + } else { + for(int i=0; i<8; ++i) + GY->sb[i] = (VY->sw[i]<-128)?-128:((VY->sw[i]>127)?127:VY->sw[i]); + for(int i=0; i<8; ++i) + GY->sb[8+i] = (EY->sw[i]<-128)?-128:((EY->sw[i]>127)?127:EY->sw[i]); + } + } else + GY->u128 = 0; + break; case 0x64: /* VPCMPGTB Gx,Vx, Ex */ nextop = F8; GETEX(0); @@ -440,26 +524,74 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) } else GY->q[0] = GY->q[1] = 0; break; + case 0x67: /* VPACKUSWB Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + GETEY; + GETVY; + if(GX==EX) { + eax1 = *EX; + EX = &eax1; + } + if(VX==EX) { + for(int i=0; i<8; ++i) + GX->ub[i] = (EX->sw[i]<0)?0:((EX->sw[i]>0xff)?0xff:EX->sw[i]); + GX->q[1] = GX->q[0]; + } else { + for(int i=0; i<8; ++i) + GX->ub[i] = (VX->sw[i]<0)?0:((VX->sw[i]>0xff)?0xff:VX->sw[i]); + for(int i=0; i<8; ++i) + GX->ub[8+i] = (EX->sw[i]<0)?0:((EX->sw[i]>0xff)?0xff:EX->sw[i]); + } + if(vex.l) { + if(GY==EY) { + eay1 = *EY; + EY = &eay1; + } + if(VY==EY) { + for(int i=0; i<8; ++i) + GY->ub[i] = (EY->sw[i]<0)?0:((EY->sw[i]>0xff)?0xff:EY->sw[i]); + GY->q[1] = GY->q[0]; + } else { + for(int i=0; i<8; ++i) + GY->ub[i] = (VY->sw[i]<0)?0:((VY->sw[i]>0xff)?0xff:VY->sw[i]); + for(int i=0; i<8; ++i) + GY->ub[8+i] = (EY->sw[i]<0)?0:((EY->sw[i]>0xff)?0xff:EY->sw[i]); + } + } else + GY->u128 = 0; + break; - case 0x6B: /* VPACKSSDW Gx,Vx, Ex */ + case 0x6B: /* VPACKSSDW Gx, Vx, Ex */ nextop = F8; GETEX(0); GETGX; GETVX; GETGY; + GETEY; + GETVY; + if(GX==EX) { + eax1 = *EX; + EX = &eax1; + } for(int i=0; i<4; ++i) GX->sw[i] = (VX->sd[i]<-32768)?-32768:((VX->sd[i]>32767)?32767:VX->sd[i]); - if(GX==EX) + if(VX==EX) GX->q[1] = GX->q[0]; else for(int i=0; i<4; ++i) GX->sw[4+i] = (EX->sd[i]<-32768)?-32768:((EX->sd[i]>32767)?32767:EX->sd[i]); if(vex.l) { - GETEY; - GETVY; + if(GY==EY) { + eay1 = *EY; + EY = &eay1; + } for(int i=0; i<4; ++i) GY->sw[i] = (VY->sd[i]<-32768)?-32768:((VY->sd[i]>32767)?32767:VY->sd[i]); - if(GY==EY) + if(VY==EY) GY->q[1] = GY->q[0]; else for(int i=0; i<4; ++i) diff --git a/src/emu/x64runavx660f38.c b/src/emu/x64runavx660f38.c index 6f46c659..c2ef2965 100644 --- a/src/emu/x64runavx660f38.c +++ b/src/emu/x64runavx660f38.c @@ -191,6 +191,55 @@ uintptr_t RunAVX_660F38(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) GY->u128 = EX->u128; break; + case 0x1C: /* PABSB Gx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETGY; + for (int i=0; i<16; ++i) { + GX->ub[i] = abs(EX->sb[i]); + } + if(vex.l) { + GETEY; + for (int i=0; i<16; ++i) { + GY->ub[i] = abs(EY->sb[i]); + } + } else + GY->u128 = 0; + break; + case 0x1D: /* PABSW Gx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETGY; + for (int i=0; i<8; ++i) { + GX->uw[i] = abs(EX->sw[i]); + } + if(vex.l) { + GETEY; + for (int i=0; i<8; ++i) { + GY->uw[i] = abs(EY->sw[i]); + } + } else + GY->u128 = 0; + break; + case 0x1E: /* PABSD Gx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETGY; + for (int i=0; i<4; ++i) { + GX->ud[i] = abs(EX->sd[i]); + } + if(vex.l) { + GETEY; + for (int i=0; i<4; ++i) { + GY->ud[i] = abs(EY->sd[i]); + } + } else + GY->u128 = 0; + break; + case 0x2A: /* VMOVNTDQA Gx, Ex */ nextop = F8; GETEX(0); @@ -205,7 +254,40 @@ uintptr_t RunAVX_660F38(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) } else GY->u128 = 0; break; - + case 0x2B: /* VPACKUSDW Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + GETEY; + GETVY; + if(GX==EX) { + eax1 = *EX; + EX = &eax1; + } + for(int i=0; i<4; ++i) + GX->uw[i] = (VX->sd[i]<0)?0:((VX->sd[i]>65535)?65535:VX->sd[i]); + if(VX==EX) + GX->q[1] = GX->q[0]; + else + for(int i=0; i<4; ++i) + GX->uw[i+4] = (EX->sd[i]<0)?0:((EX->sd[i]>65535)?65535:EX->sd[i]); + if(vex.l) { + if(GY==EY) { + eay1 = *EY; + EY = &eay1; + } + for(int i=0; i<4; ++i) + GY->uw[i] = (VY->sd[i]<0)?0:((VY->sd[i]>65535)?65535:VY->sd[i]); + if(VY==EY) + GY->q[1] = GY->q[0]; + else + for(int i=0; i<4; ++i) + GY->uw[i+4] = (EY->sd[i]<0)?0:((EY->sd[i]>65535)?65535:EY->sd[i]); + } else + GY->u128 = 0; + break; case 0x2C: /*VMASKMOVPS Gx, Vx, Ex */ nextop = F8; GETEX(0); diff --git a/src/emu/x64runavx660f3a.c b/src/emu/x64runavx660f3a.c index 63125974..9016afa2 100644 --- a/src/emu/x64runavx660f3a.c +++ b/src/emu/x64runavx660f3a.c @@ -309,7 +309,60 @@ uintptr_t RunAVX_660F3A(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) } else GY->u128 = 0; break; - + case 0x42: /* VMPSADBW Gx, Vx, Ex, Ib */ + nextop = F8; + GETEX(1); + GETGX; GETVX; GETGY; GETVY; GETEY; + if(GX==EX) { + eax1 = *EX; + EX=&eax1; + } + if(GX==VX) { + eay1 = *VX; + VX=&eay1; + } + tmp8u = F8; + { + int src = tmp8u&3; + int dst = (tmp8u>>2)&1; + int b[11]; + for (int i=0; i<11; ++i) + b[i] = VX->ub[dst*4+i]; + for(int i=0; i<8; ++i) { + int tmp = abs(b[i+0]-EX->ub[src*4+0]); + tmp += abs(b[i+1]-EX->ub[src*4+1]); + tmp += abs(b[i+2]-EX->ub[src*4+2]); + tmp += abs(b[i+3]-EX->ub[src*4+3]); + GX->uw[i] = tmp; + } + } + if(vex.l) { + if(GY==EY) { + eax1 = *EY; + EY=&eax1; + } + if(GY==VY) { + eay1 = *VY; + VY=&eay1; + } + { + int src = (tmp8u>>3)&3; + int dst = (tmp8u>>5)&1; + int b[11]; + for (int i=0; i<11; ++i) + b[i] = VY->ub[dst*4+i]; + for(int i=0; i<8; ++i) { + int tmp = abs(b[i+0]-EY->ub[src*4+0]); + tmp += abs(b[i+1]-EY->ub[src*4+1]); + tmp += abs(b[i+2]-EY->ub[src*4+2]); + tmp += abs(b[i+3]-EY->ub[src*4+3]); + GY->uw[i] = tmp; + } + } + } else + GY->u128 = 0; + break; + case 0x44: /* VPCLMULQDQ Gx, Vx, Ex, imm8 */ nextop = F8; GETGX; diff --git a/src/emu/x64runavxf20f.c b/src/emu/x64runavxf20f.c index a3006a7d..6208f9f9 100644 --- a/src/emu/x64runavxf20f.c +++ b/src/emu/x64runavxf20f.c @@ -195,7 +195,24 @@ uintptr_t RunAVX_F20F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) } GY->u128 = 0; break; - + case 0x59: /* VMULSD Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + #ifndef NOALIGN + // mul generate a -NAN only if doing (+/-)inf * (+/-)0 + if((isinf(GX->d[0]) && EX->d[0]==0.0) || (isinf(EX->d[0]) && GX->d[0]==0.0)) + GX->d[0] = -NAN; + else + #endif + GX->d[0] = VX->d[0] * EX->d[0]; + if(GX!=VX) { + GX->q[1] = VX->q[1]; + } + GY->u128 = 0; + break; case 0x5A: /* VCVTSD2SS Gx, Vx, Ex */ nextop = F8; GETEX(0); diff --git a/src/emu/x64runavxf30f.c b/src/emu/x64runavxf30f.c index 8aa1506e..c8fd8b69 100644 --- a/src/emu/x64runavxf30f.c +++ b/src/emu/x64runavxf30f.c @@ -222,7 +222,19 @@ uintptr_t RunAVX_F30F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) } GY->q[0] = GY->q[1] = 0; break; - + case 0x59: /* VMULSS Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + GX->f[0] = VX->f[0] * EX->f[0]; + if(GX!=VX) { + GX->ud[1] = VX->ud[1]; + GX->q[1] = VX->q[1]; + } + GY->q[0] = GY->q[1] = 0; + break; case 0x5A: /* VCVTSS2SD Gx, Vx, Ex */ nextop = F8; GETEX(0); |