about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-02-21 15:31:09 +0100
committerptitSeb <sebastien.chev@gmail.com>2024-02-21 15:31:09 +0100
commite71df7eb670a1d943a41b7c94f3fc3794bc927eb (patch)
treee8d7ae3654bdc72fbc138dae3552406d72cf3139
parent9d1e6b9b960c33bb524cabfd53bb1ce1133e5e3b (diff)
downloadbox64-e71df7eb670a1d943a41b7c94f3fc3794bc927eb.tar.gz
box64-e71df7eb670a1d943a41b7c94f3fc3794bc927eb.zip
[ARM64_DYNAREC] Optimized rcl 8bits with constant
-rw-r--r--src/dynarec/arm64/dynarec_arm64_00.c45
-rw-r--r--src/dynarec/arm64/dynarec_arm64_emit_shift.c33
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.h2
3 files changed, 55 insertions, 25 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c
index 1e0cd854..a56e2aec 100644
--- a/src/dynarec/arm64/dynarec_arm64_00.c
+++ b/src/dynarec/arm64/dynarec_arm64_00.c
@@ -1844,7 +1844,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     if(u8) {
                         SETFLAGS(X_CF|((u8==1)?X_OF:0), SF_SUBSET_PENDING);
                         GETEB(x1, 1);
-                        u8 = F8;
+                        u8 = F8&0x1f;
                         emit_rol8c(dyn, ninst, x1, u8, x4, x5);
                         EBBACK;
                     } else {
@@ -1858,7 +1858,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     if(u8) {
                         SETFLAGS(X_CF|((u8==1)?X_OF:0), SF_SUBSET_PENDING);
                         GETEB(x1, 1);
-                        u8 = F8;
+                        u8 = F8&0x1f;
                         emit_ror8c(dyn, ninst, x1, u8, x4, x5);
                         EBBACK;
                     } else {
@@ -1868,35 +1868,35 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     break;
                 case 2:
                     INST_NAME("RCL Eb, Ib");
-                    MESSAGE(LOG_DUMP, "Need Optimization\n");
-                    READFLAGS(X_CF);
                     u8 = geted_ib(dyn, addr, ninst, nextop)&0x1f;
-                    if(u8==1) {
-                        SETFLAGS(X_OF|X_CF, SF_SET);
+                    if(u8) {
+                        READFLAGS(X_CF);
+                        SETFLAGS(X_OF|X_CF, SF_SUBSET_PENDING);
+                        GETEB(x1, 1);
+                        u8 = F8&0x1f;
+                        emit_rcl8c(dyn, ninst, x1, u8, x4, x5);
+                        EBBACK;
                     } else {
-                        SETFLAGS(X_CF, SF_SET);
+                        FAKEED;
+                        F8;
                     }
-                    GETEB(x1, 1);
-                    u8 = F8;
-                    MOV32w(x2, u8);
-                    CALL_(rcl8, ed, x3);
-                    EBBACK;
                     break;
                 case 3:
                     INST_NAME("RCR Eb, Ib");
                     MESSAGE(LOG_DUMP, "Need Optimization\n");
                     READFLAGS(X_CF);
                     u8 = geted_ib(dyn, addr, ninst, nextop)&0x1f;
-                    if(u8==1) {
+                    if(u8) {
                         SETFLAGS(X_OF|X_CF, SF_SET);
+                        GETEB(x1, 1);
+                        u8 = F8&0x1f;
+                        MOV32w(x2, u8);
+                        CALL_(rcr8, ed, x3);
+                        EBBACK;
                     } else {
-                        SETFLAGS(X_CF, SF_SET);
+                        FAKEED;
+                        F8;
                     }
-                    GETEB(x1, 1);
-                    u8 = F8;
-                    MOV32w(x2, u8);
-                    CALL_(rcr8, ed, x3);
-                    EBBACK;
                     break;
                 case 4:
                 case 6:
@@ -2317,12 +2317,9 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     break;
                 case 2:
                     INST_NAME("RCL Eb, 1");
-                    MESSAGE(LOG_DUMP, "Need Optimization\n");
-                    READFLAGS(X_CF);
-                    SETFLAGS(X_OF|X_CF, SF_SET);
-                    MOV32w(x2, 1);
+                    SETFLAGS(X_OF|X_CF, SF_SUBSET);
                     GETEB(x1, 0);
-                    CALL_(rcl8, x1, x3);
+                    emit_rcl8c(dyn, ninst, ed, 1, x4, x5);
                     EBBACK;
                     break;
                 case 3:
diff --git a/src/dynarec/arm64/dynarec_arm64_emit_shift.c b/src/dynarec/arm64/dynarec_arm64_emit_shift.c
index 61e866c7..0456359b 100644
--- a/src/dynarec/arm64/dynarec_arm64_emit_shift.c
+++ b/src/dynarec/arm64/dynarec_arm64_emit_shift.c
@@ -976,6 +976,37 @@ void emit_ror16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int
     }
 }
 
+// emit RcL8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
+void emit_rcl8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4)
+{
+    MAYUSE(s1); MAYUSE(s3); MAYUSE(s4);
+    IFX(X_PEND) {
+        MOV32w(s3, c);
+        STRB_U12(s3, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s4, d_rol8);
+    } else IFX(X_ALL) {
+        SET_DFNONE(s4);
+    }
+    IFX(X_OF|X_CF) {
+        if(c%9) {
+            LSRw_IMM(x2, x1, 8-(c%9));
+        }
+    }
+    BFIw(x1, xFlags, 8, 1); // insert cf
+    ORRw_REG_LSL(x1, x1, x1, 9);    // insert x1 again
+    LSRw_IMM(x1, x1, 9-(c%9)); // do the rcl
+    UXTBw(x1, x1);
+    IFX(X_OF|X_CF) {
+        BFIw(xFlags, x2, F_CF, 1);
+        IFX(X_OF) {
+            if(c==1) {
+                EORw_REG_LSR(x2, x2, x1, 7);
+                BFIw(xFlags, x2, F_OF, 1);
+            }
+        }
+    }
+}
+
 // emit SHRD32 instruction, from s1, fill s2 , constant c, store result in s1 using s3 and s4 as scratch
 void emit_shrd32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4)
 {
@@ -1321,7 +1352,7 @@ void emit_shld16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s5, int s3,
         ORRw_REG_LSL(s4, s2, s1, 16);
         MOV32w(s3, 32);
         SUBw_REG(s3, s3, s5);
-        RORw_REG(s3, s4, s3);
+        LSRw_REG(s3, s4, s3);
         BFIw(xFlags, s3, F_CF, 1);
     }
     IFX(X_OF) {
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h
index f2654aa4..902219c6 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.h
+++ b/src/dynarec/arm64/dynarec_arm64_helper.h
@@ -1080,6 +1080,7 @@ void* arm64_next(x64emu_t* emu, uintptr_t addr);
 #define emit_ror8c      STEPNAME(emit_ror8c)
 #define emit_rol16c     STEPNAME(emit_rol16c)
 #define emit_ror16c     STEPNAME(emit_ror16c)
+#define emit_rcl8c      STEPNAME(emit_rcl8c)
 #define emit_shrd32c    STEPNAME(emit_shrd32c)
 #define emit_shrd32     STEPNAME(emit_shrd32)
 #define emit_shld32c    STEPNAME(emit_shld32c)
@@ -1230,6 +1231,7 @@ void emit_rol8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s
 void emit_ror8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4);
 void emit_rol16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4);
 void emit_ror16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4);
+void emit_rcl8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4);
 void emit_shrd32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
 void emit_shld32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
 void emit_shrd32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4);