about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2025-04-21 12:22:06 +0200
committerptitSeb <sebastien.chev@gmail.com>2025-04-21 12:22:06 +0200
commit6f3f3e0e85bd55bae2ff2040e8e4eb921f8716dd (patch)
treefac61537d7657a6d8ec48fdcf949893f51104bb3 /src
parent2384462f61f5105921aa931855f028b1f3b5c4c6 (diff)
downloadbox64-6f3f3e0e85bd55bae2ff2040e8e4eb921f8716dd.tar.gz
box64-6f3f3e0e85bd55bae2ff2040e8e4eb921f8716dd.zip
[ARM64_DYNAREC] Add/Improved (V)H[ADD/SUB]P[S/D] opcodes
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/arm64_emitter.h6
-rw-r--r--src/dynarec/arm64/arm64_printer.c8
-rw-r--r--src/dynarec/arm64/dynarec_arm64_660f.c17
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_66_0f.c32
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c5
-rw-r--r--src/dynarec/arm64/dynarec_arm64_f20f.c8
6 files changed, 53 insertions, 23 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h
index 7d087fc5..1c8f0296 100644
--- a/src/dynarec/arm64/arm64_emitter.h
+++ b/src/dynarec/arm64/arm64_emitter.h
@@ -1570,6 +1570,12 @@ int convert_bitmask(uint64_t bitmask);
 #define VFMAXQS(Vd, Vn, Vm)         EMIT(FMINMAX_vector(1, 0, 0, 0, Vm, Vn, Vd))
 #define VFMINQD(Vd, Vn, Vm)         EMIT(FMINMAX_vector(1, 0, 1, 1, Vm, Vn, Vd))
 #define VFMAXQD(Vd, Vn, Vm)         EMIT(FMINMAX_vector(1, 0, 0, 1, Vm, Vn, Vd))
+#define VFMINPS(Vd, Vn, Vm)         EMIT(FMINMAX_vector(0, 1, 1, 0, Vm, Vn, Vd))
+#define VFMAXPS(Vd, Vn, Vm)         EMIT(FMINMAX_vector(0, 1, 0, 0, Vm, Vn, Vd))
+#define VFMINPQS(Vd, Vn, Vm)        EMIT(FMINMAX_vector(1, 1, 1, 0, Vm, Vn, Vd))
+#define VFMAXPQS(Vd, Vn, Vm)        EMIT(FMINMAX_vector(1, 1, 0, 0, Vm, Vn, Vd))
+#define VFMINPQD(Vd, Vn, Vm)        EMIT(FMINMAX_vector(1, 1, 1, 1, Vm, Vn, Vd))
+#define VFMAXPQD(Vd, Vn, Vm)        EMIT(FMINMAX_vector(1, 1, 0, 1, Vm, Vn, Vd))
 
 #define FMINMAX_scalar(type, Rm, op, Rn, Rd)        (0b11110<<24 | (type)<<22 | 1<<21 | (Rm)<<16 | 0b01<<14 | (op)<<12 | 0b10<<10 | (Rn)<<5 | (Rd))
 #define FMINS(Sd, Sn, Sm)           EMIT(FMINMAX_scalar(0b00, Sm, 0b01, Sn, Sd))
diff --git a/src/dynarec/arm64/arm64_printer.c b/src/dynarec/arm64/arm64_printer.c
index 766ac6ea..a0818a78 100644
--- a/src/dynarec/arm64/arm64_printer.c
+++ b/src/dynarec/arm64/arm64_printer.c
@@ -1362,18 +1362,18 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
         snprintf(buff, sizeof(buff), "F%s %c%d, %c%d, %c%d", (option==3)?"MINNM":((option==2)?"MAXNM":((!option)?"MAX":"MIN")), s, Rd, s, Rn, s, Rm);

         return buff;

     }

-    if(isMask(opcode, "0Q001110of1mmmmm110001nnnnnddddd", &a)) {

+    if(isMask(opcode, "0QU01110of1mmmmm110001nnnnnddddd", &a)) {

         char s = (sf==0)?'S':((sf==1)?'D':'?');

         int n = (sf==0)?2:1;

         n *= a.Q?2:1;

-        snprintf(buff, sizeof(buff), "F%sNM%s V%d.%d%c, V%d.%d%c, V%d.%d%c", option?"MIN":"MAX", a.Q?"Q":"", Rd, n, s, Rn, n, s, Rm, n, s);

+        snprintf(buff, sizeof(buff), "F%sNM%s%s V%d.%d%c, V%d.%d%c, V%d.%d%c", option?"MIN":"MAX", a.U?"P":"", a.Q?"Q":"", Rd, n, s, Rn, n, s, Rm, n, s);

         return buff;

     }

-    if(isMask(opcode, "0Q001110of1mmmmm111101nnnnnddddd", &a)) {

+    if(isMask(opcode, "0QU01110of1mmmmm111101nnnnnddddd", &a)) {

         char s = (sf==0)?'S':((sf==1)?'D':'?');

         int n = (sf==0)?2:1;

         n *= a.Q?2:1;

-        snprintf(buff, sizeof(buff), "F%s%s V%d.%d%c, V%d.%d%c, V%d.%d%c", option?"MIN":"MAX", a.Q?"Q":"", Rd, n, s, Rn, n, s, Rm, n, s);

+        snprintf(buff, sizeof(buff), "F%s%s%s V%d.%d%c, V%d.%d%c, V%d.%d%c", option?"MIN":"MAX", a.U?"P":"", a.Q?"Q":"", Rd, n, s, Rn, n, s, Rm, n, s);

         return buff;

     }

     // FMADD

diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c
index 14540eef..81dad552 100644
--- a/src/dynarec/arm64/dynarec_arm64_660f.c
+++ b/src/dynarec/arm64/dynarec_arm64_660f.c
@@ -2289,10 +2289,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 v0 = fpu_get_scratch(dyn, ninst);

                 v1 = fpu_get_scratch(dyn, ninst);

                 // check if any input value was NAN

-                // but need to mix low/high part

-                VTRNQ1_64(v0, q1, q0);

-                VTRNQ2_64(v1, q1, q0);

-                VFMAXQD(v0, v0, v1);    // propagate NAN

+                VFMAXPQD(v0, q1, q0);    // propagate NAN

                 VFCMEQQD(v0, v0, v0);    // 0 if NAN, 1 if not NAN

             }

             VFADDPQD(q1, q1, q0);

@@ -2311,7 +2308,19 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             v0 = fpu_get_scratch(dyn, ninst);

             VUZP1Q_64(v0, q0, q1);

             VUZP2Q_64(q0, q0, q1);

+            if(!BOX64ENV(dynarec_fastnan)) {

+                v1 = fpu_get_scratch(dyn, ninst);

+                // check if any input value was NAN

+                VFMAXQD(v1, v0, q0);    // propagate NAN

+                VFCMEQQD(v1, v1, v1);    // 0 if NAN, 1 if not NAN

+            }

             VFSUBQD(q0, v0, q0);

+            if(!BOX64ENV(dynarec_fastnan)) {

+                VFCMEQQD(v0, q0, q0);    // 0 => out is NAN

+                VBICQ(v1, v1, v0);      // forget it in any input was a NAN alreavy

+                VSHLQ_64(v1, v1, 63);   // only keep the sign bit

+                VORRQ(q0, q0, v1);      // NAN -> -NAN

+            }

             break;

         case 0x7E:

             INST_NAME("MOVD Ed,Gx");

diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c
index f4424246..efffe8dd 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c
@@ -1138,10 +1138,7 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
                 if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); }
                 if(!BOX64ENV(dynarec_fastnan)) {
                     // check if any input value was NAN
-                    // but need to mix low/high part
-                    VTRNQ1_64(q0, v2, v1);
-                    VTRNQ2_64(q1, v2, v1);
-                    VFMAXQD(q0, q0, q1);    // propagate NAN
+                    VFMAXPQD(q0, v2, v1);    // propagate NAN
                     VFCMEQQD(q0, q0, q0);    // 0 if NAN, 1 if not NAN
                 }
                 VFADDPQD(v0, v2, v1);
@@ -1154,7 +1151,32 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
             }
             if(!vex.l) YMM0(gd);
             break;
-
+        case 0x7D:
+            INST_NAME("VHSUBPD Gx, Vx, Ex");
+            nextop = F8;
+            q0 = fpu_get_scratch(dyn, ninst);
+            if(!BOX64ENV(dynarec_fastnan))
+                q1 = fpu_get_scratch(dyn, ninst);
+            for(int l=0; l<1+vex.l; ++l) {
+                if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); }
+                VUZP1Q_64(q0, v2, v1);
+                VUZP2Q_64(v0, v2, v1);
+                if(!BOX64ENV(dynarec_fastnan)) {
+                    // check if any input value was NAN
+                    // but need to mix low/high part
+                    VFMAXQD(q1, v0, q0);    // propagate NAN
+                    VFCMEQQD(q1, q1, q1);    // 0 if NAN, 1 if not NAN
+                }
+                VFSUBQD(v0, q0, v0);
+                if(!BOX64ENV(dynarec_fastnan)) {
+                    VFCMEQQD(q0, v0, v0);    // 0 => out is NAN
+                    VBICQ(q1, q1, q0);      // forget it in any input was a NAN already
+                    VSHLQ_64(q1, q1, 63);   // only keep the sign bit
+                    VORRQ(v0, v0, q1);      // NAN -> -NAN
+                }
+            }
+            if(!vex.l) YMM0(gd);
+            break;
         case 0x7E:
             INST_NAME("VMOVD Ed,Gx");
             nextop = F8;
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c
index 156d9243..a94574a8 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c
@@ -429,10 +429,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
                 if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); }
                 if(!BOX64ENV(dynarec_fastnan)) {
                     // check if any input value was NAN
-                    // but need to mix low/high part
-                    VUZP1Q_32(q0, v2, v1);
-                    VUZP2Q_32(q1, v2, v1);
-                    VFMAXQS(q0, q0, q1);    // propagate NAN
+                    VFMAXPQS(q0, v2, v1);    // propagate NAN
                     VFCMEQQS(q0, q0, q0);    // 0 if NAN, 1 if not NAN
                 }
                 VFADDPQS(v0, v2, v1);
diff --git a/src/dynarec/arm64/dynarec_arm64_f20f.c b/src/dynarec/arm64/dynarec_arm64_f20f.c
index 03240803..d329f560 100644
--- a/src/dynarec/arm64/dynarec_arm64_f20f.c
+++ b/src/dynarec/arm64/dynarec_arm64_f20f.c
@@ -427,10 +427,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 v0 = fpu_get_scratch(dyn, ninst);

                 v1 = fpu_get_scratch(dyn, ninst);

                 // check if any input value was NAN

-                // but need to mix low/high part

-                VUZP1Q_32(v0, q0, q1);

-                VUZP2Q_32(v1, q0, q1);

-                VFMAXQS(v0, v0, v1);    // propagate NAN

+                VFMAXPQS(v0, q1, q0);    // propagate NAN

                 VFCMEQQS(v0, v0, v0);    // 0 if NAN, 1 if not NAN

             }

             VFADDPQS(q1, q1, q0);

@@ -452,8 +449,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             if(!BOX64ENV(dynarec_fastnan)) {

                 d1 = fpu_get_scratch(dyn, ninst);

                 // check if any input value was NAN

-                // but need to mix low/high part

-                VFMAXQS(d1, v0, d0);    // propagate NAN

+                VFMAXQS(d1, d0, v0);    // propagate NAN

                 VFCMEQQS(d1, d1, d1);    // 0 if NAN, 1 if not NAN

             }

             VFSUBQS(v0, d0, v0);