about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2023-11-16 13:20:01 +0100
committerptitSeb <sebastien.chev@gmail.com>2023-11-16 13:20:01 +0100
commitdf25986597a6c24984fd81a5eb14b95754ed6973 (patch)
treeb0b04ab568c12875f8003f44f237f7665dec6639 /src
parentdc06ca51a64fbb251e89726a0a0a6f2bbebf7c8c (diff)
downloadbox64-df25986597a6c24984fd81a5eb14b95754ed6973.tar.gz
box64-df25986597a6c24984fd81a5eb14b95754ed6973.zip
[ARM64_DYNAREC] Optimized 0F A5 opcode
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_0f.c26
-rw-r--r--src/dynarec/arm64/dynarec_arm64_emit_shift.c47
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.h4
3 files changed, 70 insertions, 7 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c
index ea126cbf..b91128b9 100644
--- a/src/dynarec/arm64/dynarec_arm64_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_0f.c
@@ -1300,13 +1300,27 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
         case 0xA5:

             nextop = F8;

             INST_NAME("SHLD Ed, Gd, CL");

-            MESSAGE(LOG_DUMP, "Need Optimization\n");

-            UXTBw(x3, xRCX);

-            SETFLAGS(X_ALL, SF_SET);

-            GETEDW(x4, x1, 0);

+            SETFLAGS(X_ALL, SF_SET_PENDING);    // some flags are left undefined

+            if(box64_dynarec_safeflags>1)

+                MAYSETFLAGS();

             GETGD;

-            MOVxw_REG(x2, gd);

-            CALL_(rex.w?((void*)shld64):((void*)shld32), ed, x4);

+            GETED(0);

+            if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);}

+            UFLAG_IF {

+                if(rex.w) {

+                    ANDSx_mask(x3, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f

+                } else {

+                    ANDSw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f

+                }

+                B_NEXT(cEQ);

+            } else {

+                if(rex.w) {

+                    ANDx_mask(x3, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f

+                } else {

+                    ANDw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f

+                }

+            }

+            emit_shld32(dyn, ninst, rex, ed, gd, x3, x5, x4);

             WBACK;

             break;

 

diff --git a/src/dynarec/arm64/dynarec_arm64_emit_shift.c b/src/dynarec/arm64/dynarec_arm64_emit_shift.c
index 7c626eb1..6af2c7c9 100644
--- a/src/dynarec/arm64/dynarec_arm64_emit_shift.c
+++ b/src/dynarec/arm64/dynarec_arm64_emit_shift.c
@@ -1154,6 +1154,53 @@ void emit_shrd32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
     }
 }
 
+void emit_shld32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4)
+{
+    IFX(X_PEND) {
+        STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1));
+        STRxw_U12(s5, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s4, rex.w?d_shld64:d_shld32);
+    } else IFX(X_ALL) {
+        SET_DFNONE(s4);
+    }
+    MOV32w(s3, (rex.w?64:32));
+    SUBw_REG(s3, s3, s5);
+    IFX(X_CF) {
+        LSRxw_REG(s4, s1, s3);
+        BFIxw(xFlags, s4, F_CF, 1);
+    }
+    IFX(X_OF) {
+        LSRxw(s4, s1, rex.w?63:31);
+        BFIw(xFlags, s4, F_OF, 1);  // store current sign for later use
+    }
+    LSLxw_REG(s4, s1, s5);
+    LSRxw_REG(s3, s2, s3);
+    ORRxw_REG(s1, s3, s4);
+
+    IFX(X_PEND) {
+        STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_ZF) {
+        TSTxw_REG(s1, s1);
+        CSETw(s4, cEQ);
+        BFIw(xFlags, s4, F_ZF, 1);
+    }
+    IFX(X_SF) {
+        LSRxw(s4, s1, (rex.w)?63:31);
+        BFIx(xFlags, s4, F_SF, 1);
+    }
+    IFX(X_OF) {
+        CMPSw_U12(s5, 1);
+        Bcond(cNE, 4+3*4);
+            LSRxw(s3, s1, rex.w?63:31);
+            EORxw_REG_LSR(s3, s3, xFlags, F_OF);  // OF is set if sign changed
+            BFIw(xFlags, s3, F_OF, 1);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
 // emit SHRD16 instruction, from s1, fill s2 , constant c, store result in s1 using s3 and s4 as scratch
 void emit_shrd16c(dynarec_arm_t* dyn, int ninst, int s1, int s2, uint32_t c, int s3, int s4)
 {
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h
index a853e8f0..ef03ff42 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.h
+++ b/src/dynarec/arm64/dynarec_arm64_helper.h
@@ -1042,8 +1042,9 @@ void* arm64_next(x64emu_t* emu, uintptr_t addr);
 #define emit_rol16c     STEPNAME(emit_rol16c)
 #define emit_ror16c     STEPNAME(emit_ror16c)
 #define emit_shrd32c    STEPNAME(emit_shrd32c)
-#define emit_shld32c    STEPNAME(emit_shld32c)
 #define emit_shrd32     STEPNAME(emit_shrd32)
+#define emit_shld32c    STEPNAME(emit_shld32c)
+#define emit_shld32     STEPNAME(emit_shld32)
 #define emit_shrd16c    STEPNAME(emit_shrd16c)
 #define emit_shrd16     STEPNAME(emit_shrd16)
 
@@ -1192,6 +1193,7 @@ void emit_ror16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int
 void emit_shrd32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
 void emit_shld32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
 void emit_shrd32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4);
+void emit_shld32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4);
 void emit_shrd16c(dynarec_arm_t* dyn, int ninst, int s1, int s2, uint32_t c, int s3, int s4);
 void emit_shrd16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s5, int s3, int s4);