diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2024-06-02 11:26:03 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2024-06-02 11:26:16 +0200 |
| commit | b49273c079dd9fa02ba1ee709e6d4edb1e728f71 (patch) | |
| tree | b078d68ab284c3f15547261a3cb9c5729568305b /src | |
| parent | 7c1b161277681aa875c792613b1bb50451df8133 (diff) | |
| download | box64-b49273c079dd9fa02ba1ee709e6d4edb1e728f71.tar.gz box64-b49273c079dd9fa02ba1ee709e6d4edb1e728f71.zip | |
[INTERPRETER] Added FMA cpu extension (linked to BOX64_AVX=2)
Diffstat (limited to 'src')
| -rw-r--r-- | src/emu/x64runavx660f38.c | 447 | ||||
| -rw-r--r-- | src/tools/my_cpuid.c | 2 | ||||
| -rw-r--r-- | src/wrapped/wrappedlibc.c | 2 |
3 files changed, 449 insertions, 2 deletions
diff --git a/src/emu/x64runavx660f38.c b/src/emu/x64runavx660f38.c index 535371f2..6a2dec79 100644 --- a/src/emu/x64runavx660f38.c +++ b/src/emu/x64runavx660f38.c @@ -1491,6 +1491,453 @@ uintptr_t RunAVX_660F38(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) } break; + case 0x98: /* VFMADD132PS/D Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + if(rex.w) { + for(int i=0; i<2; ++i) + GX->d[i] = GX->d[i]*EX->d[i] + VX->d[i]; + } else { + for(int i=0; i<4; ++i) + GX->f[i] = GX->f[i]*EX->f[i] + VX->f[i]; + } + if(vex.l) { + GETEY; GETVY; + if(rex.w) { + for(int i=0; i<2; ++i) + GY->d[i] = GY->d[i]*EY->d[i] + VY->d[i]; + } else { + for(int i=0; i<4; ++i) + GY->f[i] = GY->f[i]*EY->f[i] + VY->f[i]; + } + } else GY->u128 = 0; + break; + case 0x99: /* VFMADD132SS/D Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + if(rex.w) { + GX->d[0] = GX->d[0]*EX->d[0] + VX->d[0]; + } else { + GX->f[0] = GX->f[0]*EX->f[0] + VX->f[0]; + } + GY->u128 = 0; + break; + case 0x9A: /* VFMSUB132PS/D Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + if(rex.w) { + for(int i=0; i<2; ++i) + GX->d[i] = GX->d[i]*EX->d[i] - VX->d[i]; + } else { + for(int i=0; i<4; ++i) + GX->f[i] = GX->f[i]*EX->f[i] - VX->f[i]; + } + if(vex.l) { + GETEY; GETVY; + if(rex.w) { + for(int i=0; i<2; ++i) + GY->d[i] = GY->d[i]*EY->d[i] - VY->d[i]; + } else { + for(int i=0; i<4; ++i) + GY->f[i] = GY->f[i]*EY->f[i] - VY->f[i]; + } + } else GY->u128 = 0; + break; + case 0x9B: /* VFMSUB132SS/D Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + if(rex.w) { + GX->d[0] = GX->d[0]*EX->d[0] - VX->d[0]; + } else { + GX->f[0] = GX->f[0]*EX->f[0] - VX->f[0]; + } + GY->u128 = 0; + break; + case 0x9C: /* VFNMADD132PS/D Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + if(rex.w) { + for(int i=0; i<2; ++i) + GX->d[i] = -GX->d[i]*EX->d[i] + VX->d[i]; + } else { + for(int i=0; i<4; ++i) + GX->f[i] = -GX->f[i]*EX->f[i] + VX->f[i]; + } + if(vex.l) { + GETEY; GETVY; + if(rex.w) { + for(int i=0; i<2; ++i) + GY->d[i] = -GY->d[i]*EY->d[i] + VY->d[i]; + } else { + for(int i=0; i<4; ++i) + GY->f[i] = -GY->f[i]*EY->f[i] + VY->f[i]; + } + } else GY->u128 = 0; + break; + case 0x9D: /* VFNMADD132SS/D Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + if(rex.w) { + GX->d[0] = -GX->d[0]*EX->d[0] + VX->d[0]; + } else { + GX->f[0] = -GX->f[0]*EX->f[0] + VX->f[0]; + } + GY->u128 = 0; + break; + case 0x9E: /* VFNMSUB132PS/D Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + if(rex.w) { + for(int i=0; i<2; ++i) + GX->d[i] = -GX->d[i]*EX->d[i] - VX->d[i]; + } else { + for(int i=0; i<4; ++i) + GX->f[i] = -GX->f[i]*EX->f[i] - VX->f[i]; + } + if(vex.l) { + GETEY; GETVY; + if(rex.w) { + for(int i=0; i<2; ++i) + GY->d[i] = -GY->d[i]*EY->d[i] - VY->d[i]; + } else { + for(int i=0; i<4; ++i) + GY->f[i] = -GY->f[i]*EY->f[i] - VY->f[i]; + } + } else GY->u128 = 0; + break; + case 0x9F: /* VFNMSUB132SS/D Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + if(rex.w) { + GX->d[0] = -GX->d[0]*EX->d[0] - VX->d[0]; + } else { + GX->f[0] = -GX->f[0]*EX->f[0] - VX->f[0]; + } + GY->u128 = 0; + break; + + case 0xA8: /* VFMADD213PS/D Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + if(rex.w) { + for(int i=0; i<2; ++i) + GX->d[i] = VX->d[i]*GX->d[i] + EX->d[i]; + } else { + for(int i=0; i<4; ++i) + GX->f[i] = VX->f[i]*GX->f[i] + EX->f[i]; + } + if(vex.l) { + GETEY; GETVY; + if(rex.w) { + for(int i=0; i<2; ++i) + GY->d[i] = VY->d[i]*GY->d[i] + EY->d[i]; + } else { + for(int i=0; i<4; ++i) + GY->f[i] = VY->f[i]*GY->f[i] + EY->f[i]; + } + } else GY->u128 = 0; + break; + case 0xA9: /* VFMADD213SS/D Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + if(rex.w) { + GX->d[0] = VX->d[0]*GX->d[0] + EX->d[0]; + } else { + GX->f[0] = VX->f[0]*GX->f[0] + EX->f[0]; + } + GY->u128 = 0; + break; + case 0xAA: /* VFMSUB213PS/D Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + if(rex.w) { + for(int i=0; i<2; ++i) + GX->d[i] = VX->d[i]*GX->d[i] - EX->d[i]; + } else { + for(int i=0; i<4; ++i) + GX->f[i] = VX->f[i]*GX->f[i] - EX->f[i]; + } + if(vex.l) { + GETEY; GETVY; + if(rex.w) { + for(int i=0; i<2; ++i) + GY->d[i] = VY->d[i]*GY->d[i] - EY->d[i]; + } else { + for(int i=0; i<4; ++i) + GY->f[i] = VY->f[i]*GY->f[i] - EY->f[i]; + } + } else GY->u128 = 0; + break; + case 0xAB: /* VFMSUB213SS/D Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + if(rex.w) { + GX->d[0] = VX->d[0]*GX->d[0] - EX->d[0]; + } else { + GX->f[0] = VX->f[0]*GX->f[0] - EX->f[0]; + } + GY->u128 = 0; + break; + case 0xAC: /* VFNMADD213PS/D Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + if(rex.w) { + for(int i=0; i<2; ++i) + GX->d[i] = -VX->d[i]*GX->d[i] + EX->d[i]; + } else { + for(int i=0; i<4; ++i) + GX->f[i] = -VX->f[i]*GX->f[i] + EX->f[i]; + } + if(vex.l) { + GETEY; GETVY; + if(rex.w) { + for(int i=0; i<2; ++i) + GY->d[i] = -VY->d[i]*GY->d[i] + EY->d[i]; + } else { + for(int i=0; i<4; ++i) + GY->f[i] = -VY->f[i]*GY->f[i] + EY->f[i]; + } + } else GY->u128 = 0; + break; + case 0xAD: /* VFNMADD213SS/D Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + if(rex.w) { + GX->d[0] = -VX->d[0]*GX->d[0] + EX->d[0]; + } else { + GX->f[0] = -VX->f[0]*GX->f[0] + EX->f[0]; + } + GY->u128 = 0; + break; + case 0xAE: /* VFNMSUB213PS/D Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + if(rex.w) { + for(int i=0; i<2; ++i) + GX->d[i] = -VX->d[i]*GX->d[i] - EX->d[i]; + } else { + for(int i=0; i<4; ++i) + GX->f[i] = -VX->f[i]*GX->f[i] - EX->f[i]; + } + if(vex.l) { + GETEY; GETVY; + if(rex.w) { + for(int i=0; i<2; ++i) + GY->d[i] = -VY->d[i]*GY->d[i] - EY->d[i]; + } else { + for(int i=0; i<4; ++i) + GY->f[i] = -VY->f[i]*GY->f[i] - EY->f[i]; + } + } else GY->u128 = 0; + break; + case 0xAF: /* VFNMSUB213SS/D Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + if(rex.w) { + GX->d[0] = -VX->d[0]*GX->d[0] - EX->d[0]; + } else { + GX->f[0] = -VX->f[0]*GX->f[0] - EX->f[0]; + } + GY->u128 = 0; + break; + + case 0xB8: /* VFMADD231PS/D Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + if(rex.w) { + for(int i=0; i<2; ++i) + GX->d[i] = VX->d[i]*EX->d[i] + GX->d[i]; + } else { + for(int i=0; i<4; ++i) + GX->f[i] = VX->f[i]*EX->f[i] + GX->d[i]; + } + if(vex.l) { + GETEY; GETVY; + if(rex.w) { + for(int i=0; i<2; ++i) + GY->d[i] = VY->d[i]*EY->d[i] + GY->d[i]; + } else { + for(int i=0; i<4; ++i) + GY->f[i] = VY->f[i]*EY->f[i] + GY->d[i]; + } + } else GY->u128 = 0; + break; + case 0xB9: /* VFMADD231SS/D Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + if(rex.w) { + GX->d[0] = VX->d[0]*EX->d[0] + GX->d[0]; + } else { + GX->f[0] = VX->f[0]*EX->f[0] + GX->d[0]; + } + GY->u128 = 0; + break; + case 0xBA: /* VFMSUB231PS/D Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + if(rex.w) { + for(int i=0; i<2; ++i) + GX->d[i] = VX->d[i]*EX->d[i] - GX->d[i]; + } else { + for(int i=0; i<4; ++i) + GX->f[i] = VX->f[i]*EX->f[i] - GX->d[i]; + } + if(vex.l) { + GETEY; GETVY; + if(rex.w) { + for(int i=0; i<2; ++i) + GY->d[i] = VY->d[i]*EY->d[i] - GY->d[i]; + } else { + for(int i=0; i<4; ++i) + GY->f[i] = VY->f[i]*EY->f[i] - GY->d[i]; + } + } else GY->u128 = 0; + break; + case 0xBB: /* VFMSUB231SS/D Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + if(rex.w) { + GX->d[0] = VX->d[0]*EX->d[0] - GX->d[0]; + } else { + GX->f[0] = VX->f[0]*EX->f[0] - GX->d[0]; + } + GY->u128 = 0; + break; + case 0xBC: /* VFNMADD231PS/D Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + if(rex.w) { + for(int i=0; i<2; ++i) + GX->d[i] = -VX->d[i]*EX->d[i] + GX->d[i]; + } else { + for(int i=0; i<4; ++i) + GX->f[i] = -VX->f[i]*EX->f[i] + GX->d[i]; + } + if(vex.l) { + GETEY; GETVY; + if(rex.w) { + for(int i=0; i<2; ++i) + GY->d[i] = -VY->d[i]*EY->d[i] + GY->d[i]; + } else { + for(int i=0; i<4; ++i) + GY->f[i] = -VY->f[i]*EY->f[i] + GY->d[i]; + } + } else GY->u128 = 0; + break; + case 0xBD: /* VFNMADD231SS/D Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + if(rex.w) { + GX->d[0] = -VX->d[0]*EX->d[0] + GX->d[0]; + } else { + GX->f[0] = -VX->f[0]*EX->f[0] + GX->d[0]; + } + GY->u128 = 0; + break; + case 0xBE: /* VFNMSUB231PS/D Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + if(rex.w) { + for(int i=0; i<2; ++i) + GX->d[i] = -VX->d[i]*EX->d[i] - GX->d[i]; + } else { + for(int i=0; i<4; ++i) + GX->f[i] = -VX->f[i]*EX->f[i] - GX->d[i]; + } + if(vex.l) { + GETEY; GETVY; + if(rex.w) { + for(int i=0; i<2; ++i) + GY->d[i] = -VY->d[i]*EY->d[i] - GY->d[i]; + } else { + for(int i=0; i<4; ++i) + GY->f[i] = -VY->f[i]*EY->f[i] - GY->d[i]; + } + } else GY->u128 = 0; + break; + case 0xBF: /* VFNMSUB231SS/D Gx, Vx, Ex */ + nextop = F8; + GETEX(0); + GETGX; + GETVX; + GETGY; + if(rex.w) { + GX->d[0] = -VX->d[0]*EX->d[0] - GX->d[0]; + } else { + GX->f[0] = -VX->f[0]*EX->f[0] - GX->d[0]; + } + GY->u128 = 0; + break; + case 0xDB: /* VAESIMC Gx, Ex */ nextop = F8; GETEX(0); diff --git a/src/tools/my_cpuid.c b/src/tools/my_cpuid.c index a42915fd..23f3d19b 100644 --- a/src/tools/my_cpuid.c +++ b/src/tools/my_cpuid.c @@ -254,7 +254,7 @@ void my_cpuid(x64emu_t* emu, uint32_t tmp32u) R_ECX = 1<<0 // SSE3 | 1<<1 // PCLMULQDQ | 1<<9 // SSSE3 - //| 1<<12 // fma // some games treat FMA as AVX + | box64_avx2<<12 // fma | 1<<13 // cx16 (cmpxchg16) | 1<<19 // SSE4_1 | box64_sse42<<20 // SSE4_2 can be hiden diff --git a/src/wrapped/wrappedlibc.c b/src/wrapped/wrappedlibc.c index 6b5312b3..299c0f58 100644 --- a/src/wrapped/wrappedlibc.c +++ b/src/wrapped/wrappedlibc.c @@ -1639,7 +1639,7 @@ void CreateCPUInfoFile(int fd) P; sprintf(buff, "bogomips\t: %g\n", getBogoMips()); P; - sprintf(buff, "flags\t\t: fpu cx8 sep ht cmov clflush mmx sse sse2 syscall tsc lahf_lm ssse3 ht tm lm fxsr cpuid pclmulqdq cx16 aes movbe pni sse4_1%s%s lzcnt popcnt%s%s%s%s\n", box64_sse42?" sse4_2":"", box64_avx?" avx":"", box64_avx?" bmi1":"", box64_avx2?" avx2":"", box64_avx?" bmi2":"", box64_avx2?" vaes":""); + sprintf(buff, "flags\t\t: fpu cx8 sep ht cmov clflush mmx sse sse2 syscall tsc lahf_lm ssse3 ht tm lm fxsr cpuid pclmulqdq cx16 aes movbe pni sse4_1%s%s lzcnt popcnt%s%s%s%s%s\n", box64_sse42?" sse4_2":"", box64_avx?" avx":"", box64_avx?" bmi1":"", box64_avx2?" avx2":"", box64_avx?" bmi2":"", box64_avx2?" vaes":"", box64_avx2?" fma":""); P; sprintf(buff, "address sizes\t: 48 bits physical, 48 bits virtual\n"); P; |