about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorwannacu <76616478+wannacu@users.noreply.github.com>2025-02-17 20:42:43 +0800
committerGitHub <noreply@github.com>2025-02-17 13:42:43 +0100
commitf107321d0678f94893f22f83aee582d50bfd90a9 (patch)
treeb9cfb27d4c5223a65901f311a46a1245b024ebd0 /src
parent64313d5aa6a0bafe84ddda61fe7cd541f81b42e7 (diff)
downloadbox64-f107321d0678f94893f22f83aee582d50bfd90a9.tar.gz
box64-f107321d0678f94893f22f83aee582d50bfd90a9.zip
[ARM64_DYNAREC] Added more AVX opcodes (#2372)
* [ARM64_DYNAREC] Fixed AVX.F2.0F 12 opcode

* [ARM64_DYNAREC] Added more AVX opcodes
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_0f.c13
-rw-r--r--src/dynarec/arm64/dynarec_arm64_67_avx.c14
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c60
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c2
4 files changed, 84 insertions, 5 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c
index ec13f6aa..a032ce15 100644
--- a/src/dynarec/arm64/dynarec_arm64_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_0f.c
@@ -2816,7 +2816,18 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             GETEM(d1, 0);

             URHADD_16(d0, d0, d1);

             break;

-

+        case 0xE4:

+            INST_NAME("PMULHUW Gm,Em");

+            nextop = F8;

+            GETGM(v0);

+            GETEM(v1, 0);

+            q0 = fpu_get_scratch(dyn, ninst);

+            q1 = fpu_get_scratch(dyn, ninst);

+            VUMULL_16(q0, v0, v1);

+            VUMULL2_16(q1, v0, v1);

+            UQSHRN_16(v0, q0, 16);

+            UQSHRN2_16(v0, q1, 16);

+            break;

         case 0xE5:

             INST_NAME("PMULHW Gm,Em");

             nextop = F8;

diff --git a/src/dynarec/arm64/dynarec_arm64_67_avx.c b/src/dynarec/arm64/dynarec_arm64_67_avx.c
index 78ad685f..def36fb3 100644
--- a/src/dynarec/arm64/dynarec_arm64_67_avx.c
+++ b/src/dynarec/arm64/dynarec_arm64_67_avx.c
@@ -126,6 +126,20 @@ uintptr_t dynarec64_67_AVX(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int
                 }
                 break;
 
+            case 0x6E:
+                INST_NAME("VMOVD Gx, Ed");
+                nextop = F8;
+                GETGX_empty(v0);
+                GETED(0);
+                VEORQ(v0, v0, v0); // RAZ vector
+                if(rex.w) {
+                    FMOVDx(v0, ed);
+                } else {
+                    FMOVSw(v0, ed);
+                }
+                YMM0(gd);
+                break;
+
             default:
                 DEFAULT;
         }
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c
index 4f2b1c28..541855ff 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c
@@ -99,7 +99,18 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip
             }
             if(!vex.l) YMM0(gd);
             break;
-
+        case 0x03:
+            INST_NAME("VPHADDSW Gx, Vx, Ex");
+            nextop = F8;
+            v0 = fpu_get_scratch(dyn, ninst);
+            for(int l=0; l<1+vex.l; ++l) {
+                if(!l) { GETGX_empty_VXEX(q0, q2, q1, 0); } else { GETGY_empty_VYEY(q0, q2, q1); }
+                VUZP2Q_16(v0, q2, q1);
+                VUZP1Q_16(q0, q2, q1);
+                SQADDQ_16(q0, q0, v0);
+            }
+            if(!vex.l) YMM0(gd);
+            break;
         case 0x04:
             INST_NAME("PMADDUBSW Gx, Vx, Ex");
             nextop = F8;
@@ -126,7 +137,42 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip
             }
             if(!vex.l) YMM0(gd); 
             break;
-
+        case 0x05:
+            INST_NAME("VPHSUBW Gx, Vx, Ex");
+            nextop = F8;
+            v0 = fpu_get_scratch(dyn, ninst);
+            for(int l=0; l<1+vex.l; ++l) {
+                if(!l) { GETGX_empty_VXEX(q0, q2, q1, 0); } else { GETGY_empty_VYEY(q0, q2, q1); }
+                VUZP2Q_16(v0, q2, q1);
+                VUZP1Q_16(q0, q2, q1);
+                VSUBQ_16(q0, q0, v0);
+            }
+            if(!vex.l) YMM0(gd);
+            break;
+        case 0x06:
+            INST_NAME("VPHSUBD Gx, Vx, Ex");
+            nextop = F8;
+            v0 = fpu_get_scratch(dyn, ninst);
+            for(int l=0; l<1+vex.l; ++l) {
+                if(!l) { GETGX_empty_VXEX(q0, q2, q1, 0); } else { GETGY_empty_VYEY(q0, q2, q1); }
+                VUZP2Q_32(v0, q2, q1);
+                VUZP1Q_32(q0, q2, q1);
+                VSUBQ_32(q0, q0, v0);
+            }
+            if(!vex.l) YMM0(gd);
+            break;
+        case 0x7:
+            INST_NAME("VPHSUBSW Gx, Vx, Ex");
+            nextop = F8;
+            v0 = fpu_get_scratch(dyn, ninst);
+            for(int l=0; l<1+vex.l; ++l) {
+                if(!l) { GETGX_empty_VXEX(q0, q2, q1, 0); } else { GETGY_empty_VYEY(q0, q2, q1); }
+                VUZP2Q_16(v0, q2, q1);
+                VUZP1Q_16(q0, q2, q1);
+                SQSUBQ_16(q0, q0, v0);
+            }
+            if(!vex.l) YMM0(gd);
+            break;
         case 0x08:
             INST_NAME("VPSIGNB Gx, Vx, Ex");
             nextop = F8;
@@ -599,7 +645,15 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip
             }
             if(!vex.l) YMM0(gd);
             break;
-
+        case 0x2A:
+            INST_NAME("VMOVNTDQA Gx, Ex");
+            nextop = F8;
+            for(int l=0; l<1+vex.l; ++l) {
+                if(!l) { GETGX_empty_EX(v0, v1, 0); } else { GETGY_empty_EY(v0, v1); }
+                VMOVQ(v0, v1);
+            }
+            if(!vex.l) YMM0(gd);
+            break;
         case 0x2B:
             INST_NAME("VPACKUSDW Gx, Ex, Vx");
             nextop = F8;
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c
index afc1ed55..1e204a13 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c
@@ -113,7 +113,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
                 if(vex.l) {
                     GETGY_empty(v0, -1, -1, -1);
                     ADDx_U12(x3, ed, 16);
-                    VLDQ1R_64(v0, ed);
+                    VLDQ1R_64(v0, x3);
                 }
             }
             if(!vex.l) YMM0(gd);