about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2025-04-25 20:03:58 +0200
committerptitSeb <sebastien.chev@gmail.com>2025-04-25 20:03:58 +0200
commite4da025dc00257b2b6ad1f5d97df7960e80bcf0e (patch)
treedb2e6883e2dbfad5a667de8aecfbb7ca53e0ebe2 /src
parentb6b069cf8d854467e8fa3d2ebf7f60e975988e6a (diff)
downloadbox64-e4da025dc00257b2b6ad1f5d97df7960e80bcf0e.tar.gz
box64-e4da025dc00257b2b6ad1f5d97df7960e80bcf0e.zip
[ARM64_DYNAREC] Fixed (rarely used) some edge case for (V)PMULHRSW opcode (and improved tests)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/arm64_emitter.h4
-rw-r--r--src/dynarec/arm64/arm64_printer.c42
-rw-r--r--src/dynarec/arm64/dynarec_arm64_0f.c5
-rw-r--r--src/dynarec/arm64/dynarec_arm64_660f.c9
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c11
5 files changed, 67 insertions, 4 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h
index 91e4fba6..7cc424fb 100644
--- a/src/dynarec/arm64/arm64_emitter.h
+++ b/src/dynarec/arm64/arm64_emitter.h
@@ -2166,6 +2166,10 @@ int convert_bitmask(uint64_t bitmask);
 #define URHADDQ_16(Vd, Vn, Vm)      EMIT(RHADD_vector(1, 1, 0b01, Vm, Vn, Vd))
 #define URHADDQ_32(Vd, Vn, Vm)      EMIT(RHADD_vector(1, 1, 0b10, Vm, Vn, Vd))
 
+//SRSHR/URSHR
+#define RSHR(Q, U, immh, immb, Rn, Rd)      ((Q)<<30 | (U)<<29 | 0b011110<<23 | (immh)<<19 | (immb)<<16 | 1<<13 | 0<<12 | 1<<10 | (Rn)<<5 | (Rd))
+#define SRSHRQ_32(Vd, Vn, shift)    EMIT(RSHR(1, 0, 0b0100 | (((32-(shift))>>3)&0b11), (32-(shift))&0b111, Vn, Vd))
+
 // QRDMULH Signed saturating (Rounding) Doubling Multiply returning High half
 #define QDMULH_vector(Q, U, size, Rm, Rn, Rd)   ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b10110<<11 | 1<<10 | (Rn)<<5 | (Rd))
 #define SQRDMULH_8(Vd, Vn, Vm)      EMIT(QDMULH_vector(0, 1, 0b00, Vm, Vn, Vd))
diff --git a/src/dynarec/arm64/arm64_printer.c b/src/dynarec/arm64/arm64_printer.c
index 7d730bc9..3d369fb3 100644
--- a/src/dynarec/arm64/arm64_printer.c
+++ b/src/dynarec/arm64/arm64_printer.c
@@ -1587,6 +1587,28 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
         snprintf(buff, sizeof(buff), "%cRHADD V%d.%s, V%d.%s, V%d.%s", a.U?'U':'S', Rd, Vd, Rn, Vd, Rm, Vd);

         return buff;

     }

+    //S/URSHR

+    if(isMask(opcode, "0QU011110iiiiiii001001nnnnnddddd", &a)) {

+        int shft = 0;

+        int sz = 0;

+        const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "2D"};

+        if(imm&0b1000000) {

+            sz = 3;

+            shft = imm&0b111111;

+        } else if(imm&0b100000) {

+            sz = 2;

+            shft = imm&0b1111;

+        } else if(imm&0b10000) {

+            sz = 1;

+            shft = imm&0b111;

+        } else if(imm&0b1000) {

+            sz = 0;

+            shft = imm&0b111;

+        }

+        const char* Vd = Y[(sz<<1) | a.Q];

+        snprintf(buff, sizeof(buff), "%cRSHR V%d.%s, V%d.%s, #%d", a.U?'U':'S', Rd, Vd, Rn, Vd, shft);

+        return buff;

+    }

     //SQ(R)DMULH

     if(isMask(opcode, "0QU01110ff1mmmmm101101nnnnnddddd", &a)) {

         const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "???"};

@@ -1762,6 +1784,26 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
         snprintf(buff, sizeof(buff), "PMULL%s V%d.%s, V%d.%s, V%d.%s", a.Q?"2":"", Rd, Vd, Rn, Vn, Rm, Vn);   

         return buff;

     }

+    // [S/U]MULL

+    if(isMask(opcode, "0QU01110ff1mmmmm110000nnnnnddddd", &a)) {

+        const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "??"};

+        const char* Z[] = {"8H", "4S", "2D", "??"};

+        int sz = sf;

+        const char* Vn = Y[(sz<<1)|a.Q];

+        const char* Vd = Z[sz];

+        snprintf(buff, sizeof(buff), "%cMULL%s V%d.%s, V%d.%s, V%d.%s", a.U?'U':'S', a.Q?"2":"", Rd, Vd, Rn, Vn, Rm, Vn);   

+        return buff;

+    }

+    //XTN(2)

+    if(isMask(opcode, "0Q001110ff100001001010nnnnnddddd", &a)) {

+        const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "??"};

+        const char* Z[] = {"8H", "4S", "2D", "??"};

+        int sz = sf;

+        const char* Vd = Y[(sz<<1)|a.Q];

+        const char* Vn = Z[sz];

+        snprintf(buff, sizeof(buff), "XTN%s V%d.%s, V%d.%s", a.Q?"2":"", Rd, Vd, Rn, Vn);   

+        return buff;

+    }

 

     // DMB

     if(isMask(opcode, "11010101000000110011nnnn10111111", &a)) {

diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c
index 9e62417c..b4778849 100644
--- a/src/dynarec/arm64/dynarec_arm64_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_0f.c
@@ -677,7 +677,10 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     nextop = F8;

                     GETGM(q0);

                     GETEM(q1, 0);

-                    SQRDMULH_16(q0, q0, q1);

+                    v0 = fpu_get_scratch(dyn, ninst);

+                    VSMULL_16(v0, q0, q1);

+                    SRSHRQ_32(v0, v0, 15);

+                    XTN_16(q0, v0);

                     break;

                 case 0x1C:

                     INST_NAME("PABSB Gm,Em");

diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c
index 50100d12..fddd347a 100644
--- a/src/dynarec/arm64/dynarec_arm64_660f.c
+++ b/src/dynarec/arm64/dynarec_arm64_660f.c
@@ -454,7 +454,14 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                     nextop = F8;

                     GETGX(q0, 1);

                     GETEX(q1, 0, 0);

-                    SQRDMULHQ_16(q0, q0, q1);

+                    v0 = fpu_get_scratch(dyn, ninst);

+                    v1 = fpu_get_scratch(dyn, ninst);

+                    VSMULL_16(v0, q0, q1);

+                    VSMULL2_16(v1, q0, q1);

+                    SRSHRQ_32(v0, v0, 15);

+                    SRSHRQ_32(v1, v1, 15);

+                    XTN_16(q0, v0);

+                    XTN2_16(q0, v1);

                     break;

 

                 case 0x10:

diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c
index 5338c53a..91eefba3 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c
@@ -219,10 +219,17 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip
         case 0x0B:
             INST_NAME("VPMULHRSW Gx,Vx, Ex");
             nextop = F8;
+            q0 = fpu_get_scratch(dyn, ninst);
+            q1 = fpu_get_scratch(dyn, ninst);
             for(int l=0; l<1+vex.l; ++l) {
                 if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); }
-                SQRDMULHQ_16(v0, v2, v1);
-            }
+                VSMULL_16(q0, v1, v2);
+                VSMULL2_16(q1, v1, v2);
+                SRSHRQ_32(q0, q0, 15);
+                SRSHRQ_32(q1, q1, 15);
+                XTN_16(v0, q0);
+                XTN2_16(v0, q1);
+        }
             if(!vex.l) YMM0(gd);
             break;
         case 0x0C: