about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2023-11-16 16:30:21 +0100
committerptitSeb <sebastien.chev@gmail.com>2023-11-16 16:30:21 +0100
commita8f902885dd513e9f9f3e0c7542d73fe6c7be3a7 (patch)
tree9ee0ab9a532bee8f406e0be34c3febfbc2b40b0e /src
parentc17e37b6e0c4d686702aa9abfdcc58b227626a2b (diff)
downloadbox64-a8f902885dd513e9f9f3e0c7542d73fe6c7be3a7.tar.gz
box64-a8f902885dd513e9f9f3e0c7542d73fe6c7be3a7.zip
[ARM64_DYNAREC] Optimized 66 0F A5 ocode
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_660f.c24
-rw-r--r--src/dynarec/arm64/dynarec_arm64_emit_shift.c56
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.h2
3 files changed, 69 insertions, 13 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c
index 689f7143..7e9d73a6 100644
--- a/src/dynarec/arm64/dynarec_arm64_660f.c
+++ b/src/dynarec/arm64/dynarec_arm64_660f.c
@@ -2168,13 +2168,19 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
         case 0xA5:

             nextop = F8;

             INST_NAME("SHLD Ew, Gw, CL");

-            UXTBw(x3, xRCX);

-            MESSAGE(LOG_DUMP, "Need Optimization\n");

-            SETFLAGS(X_ALL, SF_SET);

-            GETEWW(x4, x1, 0);

+            SETFLAGS(X_ALL, SF_SET_PENDING);    // some flags are left undefined

+            if(box64_dynarec_safeflags>1)

+                MAYSETFLAGS();

             GETGW(x2);

-            CALL_(shld16, x1, wback);

-            EWBACKW(x1);

+            GETEW(x1, 0);

+            UFLAG_IF {

+                ANDSw_mask(x4, xRCX, 0, 0b00100);  //mask=0x00000001f

+                B_NEXT(cEQ);

+            } else {

+                ANDw_mask(x4, xRCX, 0, 0b00100);  //mask=0x00000001f

+            }

+            emit_shld16(dyn, ninst, ed, gd, x4, x5, x6);

+            EWBACK;

             break;

 

         case 0xAB:

@@ -2224,12 +2230,12 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             GETGW(x2);

             GETEW(x1, 0);

             UFLAG_IF {

-                ANDSw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f

+                ANDSw_mask(x4, xRCX, 0, 0b00100);  //mask=0x00000001f

                 B_NEXT(cEQ);

             } else {

-                ANDw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f

+                ANDw_mask(x4, xRCX, 0, 0b00100);  //mask=0x00000001f

             }

-            emit_shrd16(dyn, ninst, ed, gd, x3, x5, x4);

+            emit_shrd16(dyn, ninst, ed, gd, x4, x5, x6);

             EWBACK;

             break;

 

diff --git a/src/dynarec/arm64/dynarec_arm64_emit_shift.c b/src/dynarec/arm64/dynarec_arm64_emit_shift.c
index cb944f8f..cc583c45 100644
--- a/src/dynarec/arm64/dynarec_arm64_emit_shift.c
+++ b/src/dynarec/arm64/dynarec_arm64_emit_shift.c
@@ -1320,16 +1320,16 @@ void emit_shld16c(dynarec_arm_t* dyn, int ninst, int s1, int s2, uint32_t c, int
         }
         return;
     }
-    ORRw_REG_LSL(s1, s2, s1, 16);   // create concat first
+    ORRw_REG_LSL(s1, s1, s2, 16);   // create concat first
     IFX(X_CF) {
-        LSRw(s3, s1, 32-c);
+        LSRw(s3, s1, 16-c);
         BFIw(xFlags, s3, F_CF, 1);
     }
     IFX(X_OF) {
-        LSRw(s3, s1, 31);
+        LSRw(s3, s1, 15);
         BFIw(xFlags, s3, F_OF, 1);  // store current sign for later use
     }
-    RORw(s1, s1, c+16);
+    RORw(s1, s1, 32-c);
 
     IFX(X_PEND) {
         STRH_U12(s1, xEmu, offsetof(x64emu_t, res));
@@ -1355,4 +1355,52 @@ void emit_shld16c(dynarec_arm_t* dyn, int ninst, int s1, int s2, uint32_t c, int
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
     }
+}
+
+void emit_shld16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s5, int s3, int s4)
+{
+    IFX(X_PEND) {
+        STRH_U12(s1, xEmu, offsetof(x64emu_t, op1));
+        STRH_U12(s5, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s4, d_shld16);
+    } else IFX(X_ALL) {
+        SET_DFNONE(s4);
+    }
+    ORRw_REG_LSL(s1, s1, s2, 16);   // create concat first
+    IFX(X_CF) {
+        MOV32w(s3, 16);
+        SUBw_REG(s3, s3, s5);
+        LSRw_REG(s3, s1, s3);
+        BFIw(xFlags, s3, F_CF, 1);
+    }
+    IFX(X_OF) {
+        LSRw(s3, s1, 15);
+        BFIw(xFlags, s3, F_OF, 1);  // store current sign for later use
+    }
+    MOV32w(s3, 32);
+    SUBw_REG(s3, s3, s5);
+    RORw_REG(s1, s1, s3);
+
+    IFX(X_PEND) {
+        STRH_U12(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_ZF) {
+        TSTw_mask(s1, 0, 15);   // 0xffff
+        CSETw(s4, cEQ);
+        BFIw(xFlags, s4, F_ZF, 1);
+    }
+    IFX(X_SF) {
+        LSRw(s4, s1, 15);
+        BFIw(xFlags, s4, F_SF, 1);
+    }
+    IFX(X_OF) {
+        CMPSw_U12(s5, 1);
+        Bcond(cNE, 4+3*4);
+            LSRw(s3, s1, 15);
+            EORw_REG_LSR(s3, s3, xFlags, F_OF);  // OF is set if sign changed
+            BFIw(xFlags, s3, F_OF, 1);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
 }
\ No newline at end of file
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h
index 2ce35ee2..c8320b57 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.h
+++ b/src/dynarec/arm64/dynarec_arm64_helper.h
@@ -1048,6 +1048,7 @@ void* arm64_next(x64emu_t* emu, uintptr_t addr);
 #define emit_shrd16c    STEPNAME(emit_shrd16c)
 #define emit_shrd16     STEPNAME(emit_shrd16)
 #define emit_shld16c    STEPNAME(emit_shld16c)
+#define emit_shld16     STEPNAME(emit_shld16)
 
 #define emit_pf         STEPNAME(emit_pf)
 
@@ -1198,6 +1199,7 @@ void emit_shld32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
 void emit_shrd16c(dynarec_arm_t* dyn, int ninst, int s1, int s2, uint32_t c, int s3, int s4);
 void emit_shrd16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s5, int s3, int s4);
 void emit_shld16c(dynarec_arm_t* dyn, int ninst, int s1, int s2, uint32_t c, int s3, int s4);
+void emit_shld16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s5, int s3, int s4);
 
 void emit_pf(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4);