about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2023-04-06 19:55:34 +0200
committerptitSeb <sebastien.chev@gmail.com>2023-04-06 19:55:34 +0200
commit053ecec70bda076cfd4910a850bfbd8971fd7501 (patch)
treed045a1e6b1438c1a621901c7e7be3aa07278d814
parent42601d845132aef7be9d1a0c246f75ca47f7618b (diff)
downloadbox64-053ecec70bda076cfd4910a850bfbd8971fd7501.tar.gz
box64-053ecec70bda076cfd4910a850bfbd8971fd7501.zip
[ARM64_DYNAREC] Fixes to 66 0F E1/E2 opcodes
-rwxr-xr-xsrc/dynarec/arm64/arm64_emitter.h18
-rwxr-xr-xsrc/dynarec/arm64/arm64_printer.c44
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_660f.c28
3 files changed, 62 insertions, 28 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h
index c84e4f86..0d6998ba 100755
--- a/src/dynarec/arm64/arm64_emitter.h
+++ b/src/dynarec/arm64/arm64_emitter.h
@@ -1695,6 +1695,8 @@
 #define MOVI_vector(Q, op, abc, cmode, defgh, Rd)   ((Q)<<30 | (op)<<29 | 0b0111100000<<19 | (abc)<<16 | (cmode)<<12 | 1<<10 | (defgh)<<5 | (Rd))
 #define MOVIQ_8(Rd, imm8)           EMIT(MOVI_vector(1, 0, (((imm8)>>5)&0b111), 0b1110, ((imm8)&0b11111), Rd))
 #define MOVI_8(Rd, imm8)            EMIT(MOVI_vector(0, 0, (((imm8)>>5)&0b111), 0b1110, ((imm8)&0b11111), Rd))
+#define MOVI_16(Rd, imm8)           EMIT(MOVI_vector(0, 0, (((imm8)>>5)&0b111), 0b1000, ((imm8)&0b11111), Rd))
+#define MOVI_32(Rd, imm8)           EMIT(MOVI_vector(0, 0, (((imm8)>>5)&0b111), 0b0000, ((imm8)&0b11111), Rd))
 
 // SHLL and eXtend Long
 #define SHLL_vector(Q, U, immh, immb, Rn, Rd)  ((Q)<<30 | (U)<<29 | 0b011110<<23 | (immh)<<19 | (immb)<<16 | 0b10100<<11 | 1<<10 | (Rn)<<5 | (Rd))
@@ -1764,35 +1766,35 @@
 #define SMAX_8(Vd, Vn, Vm)          EMIT(MINMAX_vector(0, 0, 0b00, Vm, 0, Vn, Vd))
 #define SMAX_16(Vd, Vn, Vm)         EMIT(MINMAX_vector(0, 0, 0b01, Vm, 0, Vn, Vd))
 #define SMAX_32(Vd, Vn, Vm)         EMIT(MINMAX_vector(0, 0, 0b10, Vm, 0, Vn, Vd))
-#define SMAX_64(Vd, Vn, Vm)         EMIT(MINMAX_vector(0, 0, 0b11, Vm, 0, Vn, Vd))
+//#define SMAX_64(Vd, Vn, Vm)         EMIT(MINMAX_vector(0, 0, 0b11, Vm, 0, Vn, Vd))
 #define UMAX_8(Vd, Vn, Vm)          EMIT(MINMAX_vector(0, 1, 0b00, Vm, 0, Vn, Vd))
 #define UMAX_16(Vd, Vn, Vm)         EMIT(MINMAX_vector(0, 1, 0b01, Vm, 0, Vn, Vd))
 #define UMAX_32(Vd, Vn, Vm)         EMIT(MINMAX_vector(0, 1, 0b10, Vm, 0, Vn, Vd))
-#define UMAX_64(Vd, Vn, Vm)         EMIT(MINMAX_vector(0, 1, 0b11, Vm, 0, Vn, Vd))
+//#define UMAX_64(Vd, Vn, Vm)         EMIT(MINMAX_vector(0, 1, 0b11, Vm, 0, Vn, Vd))
 #define SMIN_8(Vd, Vn, Vm)          EMIT(MINMAX_vector(0, 0, 0b00, Vm, 1, Vn, Vd))
 #define SMIN_16(Vd, Vn, Vm)         EMIT(MINMAX_vector(0, 0, 0b01, Vm, 1, Vn, Vd))
 #define SMIN_32(Vd, Vn, Vm)         EMIT(MINMAX_vector(0, 0, 0b10, Vm, 1, Vn, Vd))
-#define SMIN_64(Vd, Vn, Vm)         EMIT(MINMAX_vector(0, 0, 0b11, Vm, 1, Vn, Vd))
+//#define SMIN_64(Vd, Vn, Vm)         EMIT(MINMAX_vector(0, 0, 0b11, Vm, 1, Vn, Vd))
 #define UMIN_8(Vd, Vn, Vm)          EMIT(MINMAX_vector(0, 1, 0b00, Vm, 1, Vn, Vd))
 #define UMIN_16(Vd, Vn, Vm)         EMIT(MINMAX_vector(0, 1, 0b01, Vm, 1, Vn, Vd))
 #define UMIN_32(Vd, Vn, Vm)         EMIT(MINMAX_vector(0, 1, 0b10, Vm, 1, Vn, Vd))
-#define UMIN_64(Vd, Vn, Vm)         EMIT(MINMAX_vector(0, 1, 0b11, Vm, 1, Vn, Vd))
+//#define UMIN_64(Vd, Vn, Vm)         EMIT(MINMAX_vector(0, 1, 0b11, Vm, 1, Vn, Vd))
 #define SMAXQ_8(Vd, Vn, Vm)         EMIT(MINMAX_vector(1, 0, 0b00, Vm, 0, Vn, Vd))
 #define SMAXQ_16(Vd, Vn, Vm)        EMIT(MINMAX_vector(1, 0, 0b01, Vm, 0, Vn, Vd))
 #define SMAXQ_32(Vd, Vn, Vm)        EMIT(MINMAX_vector(1, 0, 0b10, Vm, 0, Vn, Vd))
-#define SMAXQ_64(Vd, Vn, Vm)        EMIT(MINMAX_vector(1, 0, 0b11, Vm, 0, Vn, Vd))
+//#define SMAXQ_64(Vd, Vn, Vm)        EMIT(MINMAX_vector(1, 0, 0b11, Vm, 0, Vn, Vd))
 #define UMAXQ_8(Vd, Vn, Vm)         EMIT(MINMAX_vector(1, 1, 0b00, Vm, 0, Vn, Vd))
 #define UMAXQ_16(Vd, Vn, Vm)        EMIT(MINMAX_vector(1, 1, 0b01, Vm, 0, Vn, Vd))
 #define UMAXQ_32(Vd, Vn, Vm)        EMIT(MINMAX_vector(1, 1, 0b10, Vm, 0, Vn, Vd))
-#define UMAXQ_64(Vd, Vn, Vm)        EMIT(MINMAX_vector(1, 1, 0b11, Vm, 0, Vn, Vd))
+//#define UMAXQ_64(Vd, Vn, Vm)        EMIT(MINMAX_vector(1, 1, 0b11, Vm, 0, Vn, Vd))
 #define SMINQ_8(Vd, Vn, Vm)         EMIT(MINMAX_vector(1, 0, 0b00, Vm, 1, Vn, Vd))
 #define SMINQ_16(Vd, Vn, Vm)        EMIT(MINMAX_vector(1, 0, 0b01, Vm, 1, Vn, Vd))
 #define SMINQ_32(Vd, Vn, Vm)        EMIT(MINMAX_vector(1, 0, 0b10, Vm, 1, Vn, Vd))
-#define SMINQ_64(Vd, Vn, Vm)        EMIT(MINMAX_vector(1, 0, 0b11, Vm, 1, Vn, Vd))
+//#define SMINQ_64(Vd, Vn, Vm)        EMIT(MINMAX_vector(1, 0, 0b11, Vm, 1, Vn, Vd))
 #define UMINQ_8(Vd, Vn, Vm)         EMIT(MINMAX_vector(1, 1, 0b00, Vm, 1, Vn, Vd))
 #define UMINQ_16(Vd, Vn, Vm)        EMIT(MINMAX_vector(1, 1, 0b01, Vm, 1, Vn, Vd))
 #define UMINQ_32(Vd, Vn, Vm)        EMIT(MINMAX_vector(1, 1, 0b10, Vm, 1, Vn, Vd))
-#define UMINQ_64(Vd, Vn, Vm)        EMIT(MINMAX_vector(1, 1, 0b11, Vm, 1, Vn, Vd))
+//#define UMINQ_64(Vd, Vn, Vm)        EMIT(MINMAX_vector(1, 1, 0b11, Vm, 1, Vn, Vd))
 
 // HADD vector
 #define HADD_vector(Q, U, size, Rm, Rn, Rd)     ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 1<<10 | (Rn)<<5 | (Rd))
diff --git a/src/dynarec/arm64/arm64_printer.c b/src/dynarec/arm64/arm64_printer.c
index 2c499ea4..9fe7535d 100755
--- a/src/dynarec/arm64/arm64_printer.c
+++ b/src/dynarec/arm64/arm64_printer.c
@@ -950,6 +950,28 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
         snprintf(buff, sizeof(buff), "VCMEQ V%d.%s, V%d.%s, V%d.%s", Rd, Vd, Rn, Vd, Rm, Vd);

         return buff;

     }

+    // MIN/MAX

+    if(isMask(opcode, "0QU01110ff1mmmmm0110o1nnnnnddddd", &a)) {

+        const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "2D"};

+        const char* Vd = Y[((sf)<<1) | a.Q];

+        snprintf(buff, sizeof(buff), "%c%s V%d.%s, V%d.%s, V%d.%s", a.U?'U':'S', a.o?"MIN":"MAX", Rd, Vd, Rn, Vd, Rm, Vd);

+        return buff;

+    }

+

+    // MOV immediate (not)shifted 8bits

+    if(isMask(opcode, "0Q00111100000iii111001iiiiiddddd", &a)) {

+        const char* Y[] = {"8B", "16B"};

+        const char* Vd = Y[a.Q];

+        snprintf(buff, sizeof(buff), "MOVI V%d.%s, #0x%x", Rd, Vd, imm);

+        return buff;

+    }

+    // MOV immediate (not)shifted 16bits & 32bits

+    if(isMask(opcode, "0Q00111100000iiif00001iiiiiddddd", &a)) {

+        const char* Y[] = {"2S", "4S", "4H", "8H"};

+        const char* Vd = Y[(sf<<1)| a.Q];

+        snprintf(buff, sizeof(buff), "MOVI V%d.%s, #0x%x", Rd, Vd, imm);

+        return buff;

+    }

 

     // Shift

     if(isMask(opcode, "0QU011110hhhhrrr000001nnnnnddddd", &a)) {

@@ -1125,6 +1147,20 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
         snprintf(buff, sizeof(buff), "F%s%s V%d.%d%c, V%d.%d%c, V%d.%d%c", option?"MIN":"MAX", a.Q?"Q":"", Rd, n, s, Rn, n, s, Rm, n, s);

         return buff;

     }

+    // NEG

+    if(isMask(opcode, "0Q101110ff100000101110nnnnnddddd", &a)) {

+        const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "2D"};

+        const char* Vd = Y[(sf<<1) | a.Q];

+        snprintf(buff, sizeof(buff), "NEG%s V%d.%s, V%d.%s", a.Q?"Q":"", Rd, Vd, Rn, Vd);

+        return buff;

+    }

+    // SSHL vector

+    if(isMask(opcode, "0QU01110ff1mmmmm010rS1nnnnnddddd", &a)) {

+        const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "2D"};

+        const char* Vd = Y[(sf<<1) | a.Q];

+        snprintf(buff, sizeof(buff), "%c%s%sSHL%s V%d.%s, V%d.%s, V%d.%s", a.U?'U':'S', a.r?"R":"", a.S?"Q":"", a.Q?"Q":"", Rd, Vd, Rn, Vd, Rm, Vd);

+        return buff;

+    }

 

     // FCVT

     if(isMask(opcode, "f0011110pp10010U000000nnnnnddddd", &a)) {

@@ -1300,14 +1336,6 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
         return buff;

     }

     

-    // MOV immediate

-    if(isMask(opcode, "0Q00111100000iii111001iiiiiddddd", &a)) {

-        const char* Y[] = {"8B", "16B"};

-        const char* Vd = Y[a.Q];

-        snprintf(buff, sizeof(buff), "MOVI V%d.%s, #0x%x", Rd, Vd, imm);

-        return buff;

-    }

-

     // LD1/ST1 single structure

     if(isMask(opcode, "0Q0011010L000000cc0Sffnnnnnttttt", &a)) {

         int scale = a.c;

diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c
index 038e70c3..b06a9af2 100755
--- a/src/dynarec/arm64/dynarec_arm64_660f.c
+++ b/src/dynarec/arm64/dynarec_arm64_660f.c
@@ -2136,13 +2136,14 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             GETGX(q0, 1);

             GETEX(q1, 0, 0);

             v0 = fpu_get_scratch(dyn);

-            VMOVeD(v0, 0, q1, 0);

-            VMOVeD(v0, 1, q1, 0);

-            SQXTN_32(v0, v0);   // 2*q1 in 32bits now

-            NEG_32(v0, v0);   // because we want SHR and not SHL

-            VMOVeD(v0, 1, v0, 0);

-            SQXTN_16(v0, v0);   // 4*q1 in 32bits now

-            VMOVeD(v0, 1, v0, 0);

+            v1 = fpu_get_scratch(dyn);

+            SQXTN_32(v0, q1);

+            NEG_32(v0, v0);

+            MOVI_32(v1, 15);

+            SMIN_32(v0, v0, v1);

+            NEG_32(v1, v1);

+            SMAX_32(v0, v0, v1);    // limit to -15 .. +15 values

+            VDUPQ_16(v0, v0, 0);    // only the low 8bits will be used anyway

             SSHLQ_16(q0, q0, v0);

             break;

         case 0xE2:

@@ -2151,11 +2152,14 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             GETGX(q0, 1);

             GETEX(q1, 0, 0);

             v0 = fpu_get_scratch(dyn);

-            VMOVeD(v0, 0, q1, 0);

-            VMOVeD(v0, 1, q1, 0);

-            SQXTN_32(v0, v0);   // 2*q1 in 32bits now

-            NEG_32(v0, v0);   // because we want SHR and not SHL

-            VMOVeD(v0, 1, v0, 0);

+            v1 = fpu_get_scratch(dyn);

+            SQXTN_32(v0, q1);

+            NEG_32(v0, v0);

+            MOVI_32(v1, 31);

+            SMIN_32(v0, v0, v1);

+            NEG_32(v1, v1);

+            SMAX_32(v0, v0, v1);    // limit to -31 .. +31 values

+            VDUPQ_32(v0, v0, 0);    // only the low 8bits will be used anyway

             SSHLQ_32(q0, q0, v0);

             break;

         case 0xE3: