about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-06-02 11:26:03 +0200
committerptitSeb <sebastien.chev@gmail.com>2024-06-02 11:26:16 +0200
commitb49273c079dd9fa02ba1ee709e6d4edb1e728f71 (patch)
treeb078d68ab284c3f15547261a3cb9c5729568305b /src
parent7c1b161277681aa875c792613b1bb50451df8133 (diff)
downloadbox64-b49273c079dd9fa02ba1ee709e6d4edb1e728f71.tar.gz
box64-b49273c079dd9fa02ba1ee709e6d4edb1e728f71.zip
[INTERPRETER] Added FMA cpu extension (linked to BOX64_AVX=2)
Diffstat (limited to 'src')
-rw-r--r--src/emu/x64runavx660f38.c447
-rw-r--r--src/tools/my_cpuid.c2
-rw-r--r--src/wrapped/wrappedlibc.c2
3 files changed, 449 insertions, 2 deletions
diff --git a/src/emu/x64runavx660f38.c b/src/emu/x64runavx660f38.c
index 535371f2..6a2dec79 100644
--- a/src/emu/x64runavx660f38.c
+++ b/src/emu/x64runavx660f38.c
@@ -1491,6 +1491,453 @@ uintptr_t RunAVX_660F38(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
             }
             break;
 
+        case 0x98:  /* VFMADD132PS/D Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            if(rex.w) {
+                for(int i=0; i<2; ++i)
+                    GX->d[i] = GX->d[i]*EX->d[i] + VX->d[i];
+            } else {
+                for(int i=0; i<4; ++i)
+                    GX->f[i] = GX->f[i]*EX->f[i] + VX->f[i];
+            }
+            if(vex.l) {
+                GETEY; GETVY;
+                if(rex.w) {
+                    for(int i=0; i<2; ++i)
+                        GY->d[i] = GY->d[i]*EY->d[i] + VY->d[i];
+                } else {
+                    for(int i=0; i<4; ++i)
+                        GY->f[i] = GY->f[i]*EY->f[i] + VY->f[i];
+                }
+            } else GY->u128 = 0;
+            break;
+        case 0x99:  /* VFMADD132SS/D Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            if(rex.w) {
+                GX->d[0] = GX->d[0]*EX->d[0] + VX->d[0];
+            } else {
+                GX->f[0] = GX->f[0]*EX->f[0] + VX->f[0];
+            }
+            GY->u128 = 0;
+            break;
+        case 0x9A:  /* VFMSUB132PS/D Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            if(rex.w) {
+                for(int i=0; i<2; ++i)
+                    GX->d[i] = GX->d[i]*EX->d[i] - VX->d[i];
+            } else {
+                for(int i=0; i<4; ++i)
+                    GX->f[i] = GX->f[i]*EX->f[i] - VX->f[i];
+            }
+            if(vex.l) {
+                GETEY; GETVY;
+                if(rex.w) {
+                    for(int i=0; i<2; ++i)
+                        GY->d[i] = GY->d[i]*EY->d[i] - VY->d[i];
+                } else {
+                    for(int i=0; i<4; ++i)
+                        GY->f[i] = GY->f[i]*EY->f[i] - VY->f[i];
+                }
+            } else GY->u128 = 0;
+            break;
+        case 0x9B:  /* VFMSUB132SS/D Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            if(rex.w) {
+                GX->d[0] = GX->d[0]*EX->d[0] - VX->d[0];
+            } else {
+                GX->f[0] = GX->f[0]*EX->f[0] - VX->f[0];
+            }
+            GY->u128 = 0;
+            break;
+        case 0x9C:  /* VFNMADD132PS/D Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            if(rex.w) {
+                for(int i=0; i<2; ++i)
+                    GX->d[i] = -GX->d[i]*EX->d[i] + VX->d[i];
+            } else {
+                for(int i=0; i<4; ++i)
+                    GX->f[i] = -GX->f[i]*EX->f[i] + VX->f[i];
+            }
+            if(vex.l) {
+                GETEY; GETVY;
+                if(rex.w) {
+                    for(int i=0; i<2; ++i)
+                        GY->d[i] = -GY->d[i]*EY->d[i] + VY->d[i];
+                } else {
+                    for(int i=0; i<4; ++i)
+                        GY->f[i] = -GY->f[i]*EY->f[i] + VY->f[i];
+                }
+            } else GY->u128 = 0;
+            break;
+        case 0x9D:  /* VFNMADD132SS/D Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            if(rex.w) {
+                GX->d[0] = -GX->d[0]*EX->d[0] + VX->d[0];
+            } else {
+                GX->f[0] = -GX->f[0]*EX->f[0] + VX->f[0];
+            }
+            GY->u128 = 0;
+            break;
+        case 0x9E:  /* VFNMSUB132PS/D Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            if(rex.w) {
+                for(int i=0; i<2; ++i)
+                    GX->d[i] = -GX->d[i]*EX->d[i] - VX->d[i];
+            } else {
+                for(int i=0; i<4; ++i)
+                    GX->f[i] = -GX->f[i]*EX->f[i] - VX->f[i];
+            }
+            if(vex.l) {
+                GETEY; GETVY;
+                if(rex.w) {
+                    for(int i=0; i<2; ++i)
+                        GY->d[i] = -GY->d[i]*EY->d[i] - VY->d[i];
+                } else {
+                    for(int i=0; i<4; ++i)
+                        GY->f[i] = -GY->f[i]*EY->f[i] - VY->f[i];
+                }
+            } else GY->u128 = 0;
+            break;
+        case 0x9F:  /* VFNMSUB132SS/D Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            if(rex.w) {
+                GX->d[0] = -GX->d[0]*EX->d[0] - VX->d[0];
+            } else {
+                GX->f[0] = -GX->f[0]*EX->f[0] - VX->f[0];
+            }
+            GY->u128 = 0;
+            break;
+
+        case 0xA8:  /* VFMADD213PS/D Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            if(rex.w) {
+                for(int i=0; i<2; ++i)
+                    GX->d[i] = VX->d[i]*GX->d[i] + EX->d[i];
+            } else {
+                for(int i=0; i<4; ++i)
+                    GX->f[i] = VX->f[i]*GX->f[i] + EX->f[i];
+            }
+            if(vex.l) {
+                GETEY; GETVY;
+                if(rex.w) {
+                    for(int i=0; i<2; ++i)
+                        GY->d[i] = VY->d[i]*GY->d[i] + EY->d[i];
+                } else {
+                    for(int i=0; i<4; ++i)
+                        GY->f[i] = VY->f[i]*GY->f[i] + EY->f[i];
+                }
+            } else GY->u128 = 0;
+            break;
+        case 0xA9:  /* VFMADD213SS/D Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            if(rex.w) {
+                GX->d[0] = VX->d[0]*GX->d[0] + EX->d[0];
+            } else {
+                GX->f[0] = VX->f[0]*GX->f[0] + EX->f[0];
+            }
+            GY->u128 = 0;
+            break;
+        case 0xAA:  /* VFMSUB213PS/D Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            if(rex.w) {
+                for(int i=0; i<2; ++i)
+                    GX->d[i] = VX->d[i]*GX->d[i] - EX->d[i];
+            } else {
+                for(int i=0; i<4; ++i)
+                    GX->f[i] = VX->f[i]*GX->f[i] - EX->f[i];
+            }
+            if(vex.l) {
+                GETEY; GETVY;
+                if(rex.w) {
+                    for(int i=0; i<2; ++i)
+                        GY->d[i] = VY->d[i]*GY->d[i] - EY->d[i];
+                } else {
+                    for(int i=0; i<4; ++i)
+                        GY->f[i] = VY->f[i]*GY->f[i] - EY->f[i];
+                }
+            } else GY->u128 = 0;
+            break;
+        case 0xAB:  /* VFMSUB213SS/D Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            if(rex.w) {
+                GX->d[0] = VX->d[0]*GX->d[0] - EX->d[0];
+            } else {
+                GX->f[0] = VX->f[0]*GX->f[0] - EX->f[0];
+            }
+            GY->u128 = 0;
+            break;
+        case 0xAC:  /* VFNMADD213PS/D Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            if(rex.w) {
+                for(int i=0; i<2; ++i)
+                    GX->d[i] = -VX->d[i]*GX->d[i] + EX->d[i];
+            } else {
+                for(int i=0; i<4; ++i)
+                    GX->f[i] = -VX->f[i]*GX->f[i] + EX->f[i];
+            }
+            if(vex.l) {
+                GETEY; GETVY;
+                if(rex.w) {
+                    for(int i=0; i<2; ++i)
+                        GY->d[i] = -VY->d[i]*GY->d[i] + EY->d[i];
+                } else {
+                    for(int i=0; i<4; ++i)
+                        GY->f[i] = -VY->f[i]*GY->f[i] + EY->f[i];
+                }
+            } else GY->u128 = 0;
+            break;
+        case 0xAD:  /* VFNMADD213SS/D Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            if(rex.w) {
+                GX->d[0] = -VX->d[0]*GX->d[0] + EX->d[0];
+            } else {
+                GX->f[0] = -VX->f[0]*GX->f[0] + EX->f[0];
+            }
+            GY->u128 = 0;
+            break;
+        case 0xAE:  /* VFNMSUB213PS/D Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            if(rex.w) {
+                for(int i=0; i<2; ++i)
+                    GX->d[i] = -VX->d[i]*GX->d[i] - EX->d[i];
+            } else {
+                for(int i=0; i<4; ++i)
+                    GX->f[i] = -VX->f[i]*GX->f[i] - EX->f[i];
+            }
+            if(vex.l) {
+                GETEY; GETVY;
+                if(rex.w) {
+                    for(int i=0; i<2; ++i)
+                        GY->d[i] = -VY->d[i]*GY->d[i] - EY->d[i];
+                } else {
+                    for(int i=0; i<4; ++i)
+                        GY->f[i] = -VY->f[i]*GY->f[i] - EY->f[i];
+                }
+            } else GY->u128 = 0;
+            break;        
+        case 0xAF:  /* VFNMSUB213SS/D Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            if(rex.w) {
+                GX->d[0] = -VX->d[0]*GX->d[0] - EX->d[0];
+            } else {
+                GX->f[0] = -VX->f[0]*GX->f[0] - EX->f[0];
+            }
+            GY->u128 = 0;
+            break;
+
+        case 0xB8:  /* VFMADD231PS/D Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            if(rex.w) {
+                for(int i=0; i<2; ++i)
+                    GX->d[i] = VX->d[i]*EX->d[i] + GX->d[i];
+            } else {
+                for(int i=0; i<4; ++i)
+                    GX->f[i] = VX->f[i]*EX->f[i] + GX->d[i];
+            }
+            if(vex.l) {
+                GETEY; GETVY;
+                if(rex.w) {
+                    for(int i=0; i<2; ++i)
+                        GY->d[i] = VY->d[i]*EY->d[i] + GY->d[i];
+                } else {
+                    for(int i=0; i<4; ++i)
+                        GY->f[i] = VY->f[i]*EY->f[i] + GY->d[i];
+                }
+            } else GY->u128 = 0;
+            break;
+        case 0xB9:  /* VFMADD231SS/D Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            if(rex.w) {
+                GX->d[0] = VX->d[0]*EX->d[0] + GX->d[0];
+            } else {
+                GX->f[0] = VX->f[0]*EX->f[0] + GX->d[0];
+            }
+            GY->u128 = 0;
+            break;
+        case 0xBA:  /* VFMSUB231PS/D Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            if(rex.w) {
+                for(int i=0; i<2; ++i)
+                    GX->d[i] = VX->d[i]*EX->d[i] - GX->d[i];
+            } else {
+                for(int i=0; i<4; ++i)
+                    GX->f[i] = VX->f[i]*EX->f[i] - GX->d[i];
+            }
+            if(vex.l) {
+                GETEY; GETVY;
+                if(rex.w) {
+                    for(int i=0; i<2; ++i)
+                        GY->d[i] = VY->d[i]*EY->d[i] - GY->d[i];
+                } else {
+                    for(int i=0; i<4; ++i)
+                        GY->f[i] = VY->f[i]*EY->f[i] - GY->d[i];
+                }
+            } else GY->u128 = 0;
+            break;
+        case 0xBB:  /* VFMSUB231SS/D Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            if(rex.w) {
+                GX->d[0] = VX->d[0]*EX->d[0] - GX->d[0];
+            } else {
+                GX->f[0] = VX->f[0]*EX->f[0] - GX->d[0];
+            }
+            GY->u128 = 0;
+            break;
+        case 0xBC:  /* VFNMADD231PS/D Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            if(rex.w) {
+                for(int i=0; i<2; ++i)
+                    GX->d[i] = -VX->d[i]*EX->d[i] + GX->d[i];
+            } else {
+                for(int i=0; i<4; ++i)
+                    GX->f[i] = -VX->f[i]*EX->f[i] + GX->d[i];
+            }
+            if(vex.l) {
+                GETEY; GETVY;
+                if(rex.w) {
+                    for(int i=0; i<2; ++i)
+                        GY->d[i] = -VY->d[i]*EY->d[i] + GY->d[i];
+                } else {
+                    for(int i=0; i<4; ++i)
+                        GY->f[i] = -VY->f[i]*EY->f[i] + GY->d[i];
+                }
+            } else GY->u128 = 0;
+            break;
+        case 0xBD:  /* VFNMADD231SS/D Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            if(rex.w) {
+                GX->d[0] = -VX->d[0]*EX->d[0] + GX->d[0];
+            } else {
+                GX->f[0] = -VX->f[0]*EX->f[0] + GX->d[0];
+            }
+            GY->u128 = 0;
+            break;
+        case 0xBE:  /* VFNMSUB231PS/D Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            if(rex.w) {
+                for(int i=0; i<2; ++i)
+                    GX->d[i] = -VX->d[i]*EX->d[i] - GX->d[i];
+            } else {
+                for(int i=0; i<4; ++i)
+                    GX->f[i] = -VX->f[i]*EX->f[i] - GX->d[i];
+            }
+            if(vex.l) {
+                GETEY; GETVY;
+                if(rex.w) {
+                    for(int i=0; i<2; ++i)
+                        GY->d[i] = -VY->d[i]*EY->d[i] - GY->d[i];
+                } else {
+                    for(int i=0; i<4; ++i)
+                        GY->f[i] = -VY->f[i]*EY->f[i] - GY->d[i];
+                }
+            } else GY->u128 = 0;
+            break;
+        case 0xBF:  /* VFNMSUB231SS/D Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            if(rex.w) {
+                GX->d[0] = -VX->d[0]*EX->d[0] - GX->d[0];
+            } else {
+                GX->f[0] = -VX->f[0]*EX->f[0] - GX->d[0];
+            }
+            GY->u128 = 0;
+            break;
+
         case 0xDB:  /* VAESIMC Gx, Ex */
             nextop = F8;
             GETEX(0);
diff --git a/src/tools/my_cpuid.c b/src/tools/my_cpuid.c
index a42915fd..23f3d19b 100644
--- a/src/tools/my_cpuid.c
+++ b/src/tools/my_cpuid.c
@@ -254,7 +254,7 @@ void my_cpuid(x64emu_t* emu, uint32_t tmp32u)
             R_ECX =   1<<0      // SSE3
                     | 1<<1      // PCLMULQDQ
                     | 1<<9      // SSSE3
-                    //| 1<<12     // fma    // some games treat FMA as AVX
+                    | box64_avx2<<12     // fma
                     | 1<<13     // cx16 (cmpxchg16)
                     | 1<<19     // SSE4_1
                     | box64_sse42<<20     // SSE4_2 can be hiden
diff --git a/src/wrapped/wrappedlibc.c b/src/wrapped/wrappedlibc.c
index 6b5312b3..299c0f58 100644
--- a/src/wrapped/wrappedlibc.c
+++ b/src/wrapped/wrappedlibc.c
@@ -1639,7 +1639,7 @@ void CreateCPUInfoFile(int fd)
         P;
         sprintf(buff, "bogomips\t: %g\n", getBogoMips());
         P;
-        sprintf(buff, "flags\t\t: fpu cx8 sep ht cmov clflush mmx sse sse2 syscall tsc lahf_lm ssse3 ht tm lm fxsr cpuid pclmulqdq cx16 aes movbe pni sse4_1%s%s lzcnt popcnt%s%s%s%s\n", box64_sse42?" sse4_2":"", box64_avx?" avx":"", box64_avx?" bmi1":"", box64_avx2?" avx2":"", box64_avx?" bmi2":"", box64_avx2?" vaes":"");
+        sprintf(buff, "flags\t\t: fpu cx8 sep ht cmov clflush mmx sse sse2 syscall tsc lahf_lm ssse3 ht tm lm fxsr cpuid pclmulqdq cx16 aes movbe pni sse4_1%s%s lzcnt popcnt%s%s%s%s%s\n", box64_sse42?" sse4_2":"", box64_avx?" avx":"", box64_avx?" bmi1":"", box64_avx2?" avx2":"", box64_avx?" bmi2":"", box64_avx2?" vaes":"", box64_avx2?" fma":"");
         P;
         sprintf(buff, "address sizes\t: 48 bits physical, 48 bits virtual\n");
         P;