about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-06-06 13:47:47 +0200
committerptitSeb <sebastien.chev@gmail.com>2024-06-06 13:47:47 +0200
commit0cedc70eb36e0c136bfa90be1e275c8792052195 (patch)
tree266012e48b5960bd8b3b21e88b705d587b05530a /src
parent1612687acfd6200515fc38fd46180f09f5a41661 (diff)
downloadbox64-0cedc70eb36e0c136bfa90be1e275c8792052195.tar.gz
box64-0cedc70eb36e0c136bfa90be1e275c8792052195.zip
[ARM64_DYNAREC] Added a new small batch of AVX/BMI2 opcodes
Diffstat (limited to 'src')
-rw-r--r--src/core.c5
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx.c2
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_0f38.c100
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c49
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c18
-rw-r--r--src/dynarec/arm64/dynarec_arm64_f20f.c14
-rw-r--r--src/dynarec/arm64/dynarec_arm64_f30f.c23
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.h2
-rw-r--r--src/include/debug.h1
-rw-r--r--src/tools/my_cpuid.c3
10 files changed, 207 insertions, 10 deletions
diff --git a/src/core.c b/src/core.c
index 01e40fd9..6391b302 100644
--- a/src/core.c
+++ b/src/core.c
@@ -92,6 +92,7 @@ int arm64_aes = 0;
 int arm64_pmull = 0;
 int arm64_crc32 = 0;
 int arm64_atomics = 0;
+int arm64_asimdhp = 0;
 int arm64_sha1 = 0;
 int arm64_sha2 = 0;
 int arm64_uscat = 0;
@@ -404,6 +405,8 @@ HWCAP2_ECV
     // ATOMIC use are disable for now. They crashes Batman Arkham Knight, bossibly other (also seems to make steamwebhelper unstable)
     if(hwcap&HWCAP_ATOMICS)
         arm64_atomics = 1;
+    if(hwcap&HWCAP_ASIMDHP)
+        arm64_asimdhp = 1;
     #ifdef HWCAP_SHA1
     if(hwcap&HWCAP_SHA1)
         arm64_sha1 = 1;
@@ -442,6 +445,8 @@ HWCAP2_ECV
         printf_log(LOG_INFO, " PMULL");
     if(arm64_atomics)
         printf_log(LOG_INFO, " ATOMICS");
+    if(arm64_asimdhp)
+        printf_log(LOG_INFO, " ASIMDHP");
     if(arm64_sha1)
         printf_log(LOG_INFO, " SHA1");
     if(arm64_sha2)
diff --git a/src/dynarec/arm64/dynarec_arm64_avx.c b/src/dynarec/arm64/dynarec_arm64_avx.c
index 4a89afd1..ec00dddd 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx.c
@@ -51,6 +51,8 @@ uintptr_t dynarec64_AVX(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ni
 
     if( (vex.m==VEX_M_0F) && (vex.p==VEX_P_NONE))
         addr = dynarec64_AVX_0F(dyn, addr, ip, ninst, vex, ok, need_epilog);
+    else if( (vex.m==VEX_M_0F38) && (vex.p==VEX_P_NONE))
+        addr = dynarec64_AVX_0F38(dyn, addr, ip, ninst, vex, ok, need_epilog);
     else if( (vex.m==VEX_M_0F) && (vex.p==VEX_P_66))
         addr = dynarec64_AVX_66_0F(dyn, addr, ip, ninst, vex, ok, need_epilog);
     else if( (vex.m==VEX_M_0F) && (vex.p==VEX_P_F2))
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_0f38.c
new file mode 100644
index 00000000..8894405d
--- /dev/null
+++ b/src/dynarec/arm64/dynarec_arm64_avx_0f38.c
@@ -0,0 +1,100 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <errno.h>
+
+#include "debug.h"
+#include "box64context.h"
+#include "dynarec.h"
+#include "emu/x64emu_private.h"
+#include "emu/x64run_private.h"
+#include "x64run.h"
+#include "x64emu.h"
+#include "box64stack.h"
+#include "callback.h"
+#include "emu/x64run_private.h"
+#include "x64trace.h"
+#include "dynarec_native.h"
+#include "my_cpuid.h"
+#include "emu/x87emu_private.h"
+#include "emu/x64shaext.h"
+
+#include "arm64_printer.h"
+#include "dynarec_arm64_private.h"
+#include "dynarec_arm64_functions.h"
+#include "dynarec_arm64_helper.h"
+
+uintptr_t dynarec64_AVX_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog)
+{
+    (void)ip; (void)need_epilog;
+
+    uint8_t opcode = F8;
+    uint8_t nextop, u8;
+    uint8_t gd, ed, vd;
+    uint8_t wback, wb1, wb2;
+    uint8_t eb1, eb2, gb1, gb2;
+    int32_t i32, i32_;
+    int cacheupd = 0;
+    int v0, v1, v2;
+    int q0, q1, q2;
+    int d0, d1, d2;
+    int s0;
+    uint64_t tmp64u;
+    int64_t j64;
+    int64_t fixedaddress;
+    int unscaled;
+    MAYUSE(wb1);
+    MAYUSE(wb2);
+    MAYUSE(eb1);
+    MAYUSE(eb2);
+    MAYUSE(gb1);
+    MAYUSE(gb2);
+    MAYUSE(q0);
+    MAYUSE(q1);
+    MAYUSE(d0);
+    MAYUSE(d1);
+    MAYUSE(s0);
+    MAYUSE(j64);
+    MAYUSE(cacheupd);
+
+    rex_t rex = vex.rex;
+
+    switch(opcode) {
+
+        case 0xF5:
+            INST_NAME("BZHI Gd, Ed, Vd");
+            nextop = F8;
+            SETFLAGS(X_ALL, SF_SET);
+            GETGD;
+            GETED(0);
+            GETVD;
+            UXTBw(x1, vd);
+            CMPSw_U12(x1, rex.w?64:32);
+            CSETxw(x2, cPL);
+            IFX(F_CF) {
+                BFIw(xFlags, x2, F_CF, 1);
+            }
+            MVNxw_REG(x2, x2); //prepare mask
+            B_MARK(cPL);
+            LSLxw_REG(x2, x2, x1);
+            MARK;
+            IFX(X_ZF) {
+                BICSxw(gd, ed, x2);
+                CSETw(x3, cEQ);
+                BFIw(xFlags, x3, F_ZF, 1);
+            } else
+                BICxw(gd, ed, x2);
+            IFX(X_SF) {
+                LSRxw(x3, gd, rex.w?63:31);
+                BFIw(xFlags, x3, F_SF, 1);
+            }
+            IFX(X_AF) BFCw(xFlags, F_AF, 1);
+            IFX(X_PF) BFCw(xFlags, F_PF, 1);
+            IFX(X_OF) BFCw(xFlags, F_OF, 1);
+            break;
+
+        default:
+            DEFAULT;
+    }
+    return addr;
+}
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c
index 783b77c2..a2a45435 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c
@@ -840,6 +840,23 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip
             YMM0(gd);
             break;
 
+        case 0x9D:
+            INST_NAME("VFNMADD132SS/D Gx, Vx, Ex");
+            nextop = F8;
+            GETGX(v0, 1);
+            GETVX(v2, 0);
+            if(rex.w) {GETEXSD(v1, 0, 0);} else {GETEXSS(v1, 0, 0);}
+            q0 = fpu_get_scratch(dyn, ninst);
+            if(rex.w) {
+                FMSUB_64(q0, v2, v1, v0);
+                VMOVeD(v0, 0, q0, 0);
+            } else {
+                FMSUB_32(q0, v2, v1, v0);
+                VMOVeS(v0, 0, q0, 0);
+            }
+            YMM0(gd);
+            break;
+
         case 0xA8:
             INST_NAME("VFMADD213PS/D Gx, Vx, Ex");
             nextop = F8;
@@ -886,7 +903,39 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip
             }
             if(!vex.l) YMM0(gd);
             break;
+        case 0xAB:
+            INST_NAME("VFMSUB213SS/D Gx, Vx, Ex");
+            nextop = F8;
+            GETGX(v0, 1);
+            GETVX(v2, 0);
+            if(rex.w) {GETEXSD(v1, 0, 0);} else {GETEXSS(v1, 0, 0);}
+            q0 = fpu_get_scratch(dyn, ninst);
+            if(rex.w) {
+                FNMSUB_64(q0, v1, v0, v2);
+                VMOVeD(v0, 0, q0, 0);
+            } else {
+                FNMSUB_32(q0, v1, v0, v2);
+                VMOVeS(v0, 0, q0, 0);
+            }
+            YMM0(gd);
+            break;
 
+        case 0xAD:
+            INST_NAME("VFMNADD213SS/D Gx, Vx, Ex");
+            nextop = F8;
+            GETGX(v0, 1);
+            GETVX(v2, 0);
+            if(rex.w) {GETEXSD(v1, 0, 0);} else {GETEXSS(v1, 0, 0);}
+            q0 = fpu_get_scratch(dyn, ninst);
+            if(rex.w) {
+                FMSUB_64(q0, v1, v0, v2);
+                VMOVeD(v0, 0, q0, 0);
+            } else {
+                FMSUB_32(q0, v1, v0, v2);
+                VMOVeS(v0, 0, q0, 0);
+            }
+            YMM0(gd);
+            break;
         case 0xAE:
             INST_NAME("VFNMSUB213PS/D Gx, Vx, Ex");
             nextop = F8;
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c
index 8e2ed65c..08a04825 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c
@@ -448,15 +448,15 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
             GETEXSS(v1, 0, 1);
             GETGX_empty_VX(v0, v2);
             u8 = F8;
-            if(((u8&15)!=12) && ((u8&15)!=15)) {
-                if(((u8&15)==12)||((u8&15)==13)||((u8&15)==9)||((u8&15)==10))
+            if(((u8&15)!=11) && ((u8&15)!=15)) {
+                if((u8&15)>7)
                     FCMPS(v1, v2);
                 else
                     FCMPS(v2, v1);
             }
             // TODO: create a test for this one, there might be an issue with cases 9, 10 and 13
             if(v0!=v2) VMOVQ(v0, v2);
-            switch(u8&7) {
+            switch(u8&15) {
                 case 0x00: CSETMw(x2, cEQ); break;  // Equal
                 case 0x01: CSETMw(x2, cCC); break;  // Less than
                 case 0x02: CSETMw(x2, cLS); break;  // Less or equal
@@ -465,13 +465,13 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
                 case 0x05: CSETMw(x2, cCS); break;  // Greater or equal or unordered
                 case 0x06: CSETMw(x2, cHI); break;  // Greater or unordered
                 case 0x07: CSETMw(x2, cVC); break;  // not NaN
-                case 0x08: CSETMw(x2, cEQ); CSETMw(x3, cVS); ORRw_REG(x2, x2, x3); break;  // Equal than or ordered
-                case 0x09: CSETMw(x2, cCS); break;  // Less than or unordered
-                case 0x0a: CSETMw(x2, cHI); break;  // Less or equal or unordered
+                case 0x08: CSETMw(x2, cEQ); CSETMw(x3, cVS); ORRw_REG(x2, x2, x3); break;  // Equal or unordered
+                case 0x09: CSETMw(x2, cHI); break;  // Less than or unordered
+                case 0x0a: CSETMw(x2, cCS); break;  // Less or equal or unordered
                 case 0x0b: MOV32w(x2, 0); break;    // false
-                case 0x0c: CSETMw(x2, cNE); CSETMw(x3, cVC); ANDw_REG(x2, x2, x3); break;  // Not Equal not unordered
-                case 0x0d: CSETMw(x2, cCC); break;  // Greater or equal not unordered
-                case 0x0e: CSETMw(x2, cLS); break;  // Greater not unordered
+                case 0x0c: CSETMw(x2, cNE); CSETMw(x3, cVS); BICw(x2, x2, x3); break;  // Not Equal not unordered
+                case 0x0d: CSETMw(x2, cLS); break;  // Greater or equal not unordered
+                case 0x0e: CSETMw(x2, cCC); break;  // Greater not unordered
                 case 0x0f: MOV32w(x2, 0xffffffff); break; // true
             }
             VMOVQSfrom(v0, 0, x2);
diff --git a/src/dynarec/arm64/dynarec_arm64_f20f.c b/src/dynarec/arm64/dynarec_arm64_f20f.c
index d7eefb17..8c0cb3b9 100644
--- a/src/dynarec/arm64/dynarec_arm64_f20f.c
+++ b/src/dynarec/arm64/dynarec_arm64_f20f.c
@@ -413,6 +413,20 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             VUZP2Q_32(v0, v0, v1);

             VFSUBQS(v0, d0, v0);

             break;

+

+        case 0xAE:

+            nextop = F8;

+            switch((nextop>>3)&7) {

+                case 6:

+                    INST_NAME("(unsupported) UWAIT Ed");

+                    FAKEED;

+                    UDF(0);

+                    break;

+                default:

+                    DEFAULT;

+            }

+            break;

+

         case 0xC2:

             INST_NAME("CMPSD Gx, Ex, Ib");

             nextop = F8;

diff --git a/src/dynarec/arm64/dynarec_arm64_f30f.c b/src/dynarec/arm64/dynarec_arm64_f30f.c
index a3144f21..7787e4d8 100644
--- a/src/dynarec/arm64/dynarec_arm64_f30f.c
+++ b/src/dynarec/arm64/dynarec_arm64_f30f.c
@@ -410,6 +410,29 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             }

             break;

 

+        case 0xAE:

+            nextop = F8;

+            switch((nextop>>3)&7) {

+                case 2:

+                    INST_NAME("(unsupported) WRFSBASE Ed");

+                    FAKEED;

+                    UDF(0);

+                    break;

+                case 3:

+                    INST_NAME("(unsupported) WRGSBASE Ed");

+                    FAKEED;

+                    UDF(0);

+                    break;

+                case 6:

+                    INST_NAME("(unsupported) UMONITOR Ed");

+                    FAKEED;

+                    UDF(0);

+                    break;

+                default:

+                    DEFAULT;

+            }

+            break;

+

         case 0xB8:

             INST_NAME("POPCNT Gd, Ed");

             SETFLAGS(X_ALL, SF_SET);

diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h
index cb0e181c..34822661 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.h
+++ b/src/dynarec/arm64/dynarec_arm64_helper.h
@@ -1181,6 +1181,7 @@ void* arm64_next(x64emu_t* emu, uintptr_t addr);
 #define dynarec64_F30F     STEPNAME(dynarec64_F30F)
 #define dynarec64_AVX      STEPNAME(dynarec64_AVX)
 #define dynarec64_AVX_0F   STEPNAME(dynarec64_AVX_0F)
+#define dynarec64_AVX_0F38 STEPNAME(dynarec64_AVX_0F38)
 #define dynarec64_AVX_66_0F     STEPNAME(dynarec64_AVX_66_0F)
 #define dynarec64_AVX_F2_0F     STEPNAME(dynarec64_AVX_F2_0F)
 #define dynarec64_AVX_F3_0F     STEPNAME(dynarec64_AVX_F3_0F)
@@ -1608,6 +1609,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
 uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog);
 uintptr_t dynarec64_AVX(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
 uintptr_t dynarec64_AVX_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
+uintptr_t dynarec64_AVX_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
 uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
 uintptr_t dynarec64_AVX_F2_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
 uintptr_t dynarec64_AVX_F3_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
diff --git a/src/include/debug.h b/src/include/debug.h
index be48b59e..52429f47 100644
--- a/src/include/debug.h
+++ b/src/include/debug.h
@@ -40,6 +40,7 @@ extern int arm64_aes;
 extern int arm64_pmull;
 extern int arm64_crc32;
 extern int arm64_atomics;
+extern int arm64_asimdhp;
 extern int arm64_sha1;
 extern int arm64_sha2;
 extern int arm64_uscat;
diff --git a/src/tools/my_cpuid.c b/src/tools/my_cpuid.c
index 23f3d19b..0b71e4f7 100644
--- a/src/tools/my_cpuid.c
+++ b/src/tools/my_cpuid.c
@@ -264,6 +264,7 @@ void my_cpuid(x64emu_t* emu, uint32_t tmp32u)
                     | 1<<26     // xsave
                     | 1<<27     // osxsave
                     | box64_avx<<28 // AVX
+                    | box64_avx<<29 // F16C
                     ; 
             break;
         case 0x2:   // TLB and Cache info. Sending 1st gen P4 info...
@@ -325,7 +326,7 @@ void my_cpuid(x64emu_t* emu, uint32_t tmp32u)
                         box64_avx<<3 |  // BMI1 
                         box64_avx2<<5 |  //AVX2
                         box64_avx2<<8 | //BMI2
-                        box64_avx2<<9 | //VAES
+                        box64_avx<<9 | //VAES
                         box64_avx2<<19 | //ADX
                         1<<29|  // SHA extension
                         0;