about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-05-17 14:29:58 +0200
committerptitSeb <sebastien.chev@gmail.com>2024-05-17 14:29:58 +0200
commitd7127ccc3dad1aab55676f31d26d5ec3799ca8f1 (patch)
tree93db7fc23359c54a12a224efeba9f6a4822a779a /src
parent04e960b0dabc7f8bf77d532b57c1156ff10e2735 (diff)
downloadbox64-d7127ccc3dad1aab55676f31d26d5ec3799ca8f1.tar.gz
box64-d7127ccc3dad1aab55676f31d26d5ec3799ca8f1.zip
[ARM64_DYNAREC] Improved stability of RCL/RCR and added 32/64 bits with constant emiter
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_00.c72
-rw-r--r--src/dynarec/arm64/dynarec_arm64_66.c8
-rw-r--r--src/dynarec/arm64/dynarec_arm64_emit_shift.c104
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.h4
-rw-r--r--src/emu/x64run_private.c32
-rw-r--r--src/include/regs.h8
6 files changed, 123 insertions, 105 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c
index 54f59b0d..91e0159d 100644
--- a/src/dynarec/arm64/dynarec_arm64_00.c
+++ b/src/dynarec/arm64/dynarec_arm64_00.c
@@ -2074,27 +2074,43 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     break;
                 case 2:
                     INST_NAME("RCL Ed, Ib");
-                    MESSAGE(LOG_DUMP, "Need Optimization\n");
-                    READFLAGS(X_CF);
-                    u8 = geted_ib(dyn, addr, ninst, nextop)&0x1f;
-                    SETFLAGS(X_OF|X_CF, SF_SET_DF);
-                    GETEDW(x4, x1, 1);
-                    u8 = F8;
-                    MOV32w(x2, u8);
-                    CALL_(rex.w?((void*)rcl64):((void*)rcl32), ed, x4);
-                    WBACK;
+                    u8 = geted_ib(dyn, addr, ninst, nextop)&(0x1f+(rex.w*0x20));
+                    if(u8) {
+                        READFLAGS(X_CF);
+                        SETFLAGS(X_CF|X_OF, SF_SUBSET); // removed PENDING on purpose
+                        GETED(1);
+                        u8 = (F8)&(rex.w?0x3f:0x1f);
+                        emit_rcl32c(dyn, ninst, rex, ed, u8, x3, x4);
+                        WBACK;
+                    } else {
+                        if(MODREG && ! rex.w && !rex.is32bits) {
+                            GETED(1);
+                            MOVw_REG(ed, ed);
+                        } else {
+                            FAKEED;
+                        }
+                        F8;
+                    }
                     break;
                 case 3:
                     INST_NAME("RCR Ed, Ib");
-                    MESSAGE(LOG_DUMP, "Need Optimization\n");
-                    READFLAGS(X_CF);
-                    u8 = geted_ib(dyn, addr, ninst, nextop)&0x1f;
-                    SETFLAGS(X_OF|X_CF, SF_SET_DF);
-                    GETEDW(x4, x1, 1);
-                    u8 = F8;
-                    MOV32w(x2, u8);
-                    CALL_(rex.w?((void*)rcr64):((void*)rcr32), ed, x4);
-                    WBACK;
+                    u8 = geted_ib(dyn, addr, ninst, nextop)&(0x1f+(rex.w*0x20));
+                    if(u8) {
+                        READFLAGS(X_CF);
+                        SETFLAGS(X_CF|X_OF, SF_SUBSET); // removed PENDING on purpose
+                        GETED(1);
+                        u8 = (F8)&(rex.w?0x3f:0x1f);
+                        emit_rcr32c(dyn, ninst, rex, ed, u8, x3, x4);
+                        WBACK;
+                    } else {
+                        if(MODREG && ! rex.w && !rex.is32bits) {
+                            GETED(1);
+                            MOVw_REG(ed, ed);
+                        } else {
+                            FAKEED;
+                        }
+                        F8;
+                    }
                     break;
                 case 4:
                 case 6:
@@ -2435,7 +2451,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 case 2:
                     INST_NAME("RCL Eb, 1");
                     READFLAGS(X_CF);
-                    SETFLAGS(X_OF|X_CF, SF_SUBSET_PENDING);
+                    SETFLAGS(X_OF|X_CF, SF_SUBSET); // removed PENDING on purpose
                     GETEB(x1, 0);
                     emit_rcl8c(dyn, ninst, ed, 1, x4, x5);
                     EBBACK;
@@ -2443,7 +2459,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 case 3:
                     INST_NAME("RCR Eb, 1");
                     READFLAGS(X_CF);
-                    SETFLAGS(X_OF|X_CF, SF_SUBSET_PENDING);
+                    SETFLAGS(X_OF|X_CF, SF_SUBSET); // removed PENDING on purpose
                     GETEB(x1, 0);
                     emit_rcr8c(dyn, ninst, ed, 1, x4, x5);
                     EBBACK;
@@ -2491,22 +2507,18 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     break;
                 case 2:
                     INST_NAME("RCL Ed, 1");
-                    MESSAGE(LOG_DUMP, "Need Optimization\n");
                     READFLAGS(X_CF);
-                    SETFLAGS(X_OF|X_CF, SF_SET_DF);
-                    MOV32w(x2, 1);
-                    GETEDW(x4, x1, 0);
-                    CALL_(rex.w?((void*)rcl64):((void*)rcl32), ed, x4);
+                    SETFLAGS(X_OF|X_CF, SF_SUBSET); // removed PENDING on purpose
+                    GETED(0);
+                    emit_rcl32c(dyn, ninst, rex, ed, 1, x3, x4);
                     WBACK;
                     break;
                 case 3:
                     INST_NAME("RCR Ed, 1");
-                    MESSAGE(LOG_DUMP, "Need Optimization\n");
                     READFLAGS(X_CF);
-                    SETFLAGS(X_OF|X_CF, SF_SET_DF);
-                    MOV32w(x2, 1);
-                    GETEDW(x4, x1, 0);
-                    CALL_(rex.w?((void*)rcr64):((void*)rcr32), ed, x4);
+                    SETFLAGS(X_OF|X_CF, SF_SUBSET); // removed PENDING on purpose
+                    GETED(0);
+                    emit_rcr32c(dyn, ninst, rex, ed, 1, x3, x4);
                     WBACK;
                     break;
                 case 4:
diff --git a/src/dynarec/arm64/dynarec_arm64_66.c b/src/dynarec/arm64/dynarec_arm64_66.c
index d06665a2..df5bcb32 100644
--- a/src/dynarec/arm64/dynarec_arm64_66.c
+++ b/src/dynarec/arm64/dynarec_arm64_66.c
@@ -976,7 +976,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     INST_NAME("RCL Ew, Ib");

                     if (geted_ib(dyn, addr, ninst, nextop) & 31) {

                         READFLAGS(X_CF);

-                        SETFLAGS(X_OF|X_CF, SF_SUBSET_PENDING);

+                        SETFLAGS(X_OF|X_CF, SF_SUBSET); // removed PENDING on purpose

                         GETEW(x1, 1);

                         u8 = F8;

                         emit_rcl16c(dyn, ninst, ed, u8, x4, x5);

@@ -990,7 +990,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     INST_NAME("RCR Ew, Ib");

                     if (geted_ib(dyn, addr, ninst, nextop) & 31) {

                         READFLAGS(X_CF);

-                        SETFLAGS(X_OF|X_CF, SF_SUBSET_PENDING);

+                        SETFLAGS(X_OF|X_CF, SF_SUBSET); // removed PENDING on purpose

                         GETEW(x1, 1);

                         u8 = F8;

                         emit_rcr16c(dyn, ninst, ed, u8, x4, x5);

@@ -1080,7 +1080,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 case 2:

                     INST_NAME("RCL Ew, 1");

                     READFLAGS(X_CF);

-                    SETFLAGS(X_OF|X_CF, SF_SUBSET_PENDING);

+                    SETFLAGS(X_OF|X_CF, SF_SUBSET); // removed PENDING on purpose

                     GETEW(x1, 0);

                     emit_rcl16c(dyn, ninst, x1, 1, x5, x4);

                     EWBACK;

@@ -1088,7 +1088,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 case 3:

                     INST_NAME("RCR Ew, 1");

                     READFLAGS(X_CF);

-                    SETFLAGS(X_OF|X_CF, SF_SUBSET_PENDING);

+                    SETFLAGS(X_OF|X_CF, SF_SUBSET); // removed PENDING on purpose

                     GETEW(x1, 0);

                     emit_rcr16c(dyn, ninst, x1, 1, x5, x4);

                     EWBACK;

diff --git a/src/dynarec/arm64/dynarec_arm64_emit_shift.c b/src/dynarec/arm64/dynarec_arm64_emit_shift.c
index 4b614a94..e1f88393 100644
--- a/src/dynarec/arm64/dynarec_arm64_emit_shift.c
+++ b/src/dynarec/arm64/dynarec_arm64_emit_shift.c
@@ -991,14 +991,8 @@ void emit_rcl8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s
 
     if (!(c%9)) return;
 
-    IFX(X_PEND) {
-        MOV32w(s3, c);
-        STRB_U12(s1, xEmu, offsetof(x64emu_t, op1));
-        STRB_U12(s3, xEmu, offsetof(x64emu_t, op2));
-        SET_DF(s4, d_rcl8);
-    } else IFX(X_ALL) {
-        SET_DFNONE(s4);
-    }
+    SET_DFNONE(s4);
+
     c%=9;
     BFIw(s1, xFlags, 8, 1); // insert cf
     IFX(X_OF|X_CF) {
@@ -1033,14 +1027,8 @@ void emit_rcr8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s
 
     if (!(c%9)) return;
 
-    IFX(X_PEND) {
-        MOV32w(s3, c);
-        STRB_U12(s1, xEmu, offsetof(x64emu_t, op1));
-        STRB_U12(s3, xEmu, offsetof(x64emu_t, op2));
-        SET_DF(s4, d_rcr8);
-    } else IFX(X_ALL) {
-        SET_DFNONE(s4);
-    }
+    SET_DFNONE(s4);
+
     c%=9;
     IFX(X_OF) {
         MOVw_REG(s3, wFlags);
@@ -1071,14 +1059,8 @@ void emit_rcl16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int
 
     if (!(c%17)) return;
 
-    IFX(X_PEND) {
-        MOV32w(s3, c);
-        STRH_U12(s1, xEmu, offsetof(x64emu_t, op1));
-        STRH_U12(s3, xEmu, offsetof(x64emu_t, op2));
-        SET_DF(s4, d_rcl16);
-    } else IFX(X_ALL) {
-        SET_DFNONE(s4);
-    }
+    SET_DFNONE(s4);
+
     c%=17;
     BFIw(s1, xFlags, 16, 1); // insert cf
     IFX(X_OF|X_CF) {
@@ -1111,14 +1093,8 @@ void emit_rcr16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int
 
     if (!(c%17)) return;
 
-    IFX(X_PEND) {
-        MOV32w(s3, c);
-        STRH_U12(s1, xEmu, offsetof(x64emu_t, op1));
-        STRH_U12(s3, xEmu, offsetof(x64emu_t, op2));
-        SET_DF(s4, d_rcr16);
-    } else IFX(X_ALL) {
-        SET_DFNONE(s4);
-    }
+    SET_DFNONE(s4);
+
     c%=17;
     BFIw(s1, xFlags, 16, 1); // insert cf
     IFX(X_OF) {
@@ -1141,6 +1117,70 @@ void emit_rcr16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int
         STRH_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
 }
+
+// emit RCL32/RCL64 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
+void emit_rcl32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4)
+{
+    MAYUSE(s1); MAYUSE(s3); MAYUSE(s4);
+
+    SET_DFNONE(s4);
+
+    IFX(X_OF|X_CF) {
+        LSRxw_IMM(s3, s1, (rex.w?64:32)-c);
+    }
+    if(c==1) {
+        LSLxw(s1, s1, 1);
+        BFIxw(s1, xFlags, 0, 1);
+    } else {
+        LSLxw(s4, s1, c);
+        BFIxw(s4, xFlags, c-1, 1);
+        ORRxw_REG_LSR(s1, s4, s1, (rex.w?65:33)-c);
+    }
+    IFX(X_PEND) {
+        STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_CF) {
+        BFIw(xFlags, s3, F_CF, 1);
+    }
+    IFX(X_OF) {
+        if(c==1) {
+            EORxw_REG_LSR(s3, s3, s1, rex.w?63:31);
+            BFIw(xFlags, s3, F_OF, 1);
+        }
+    }
+}
+// emit RCR32/RCR64 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
+void emit_rcr32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4)
+{
+    MAYUSE(s1); MAYUSE(s3); MAYUSE(s4);
+
+    SET_DFNONE(s4);
+
+    IFX(X_OF) {
+        if(c==1) {
+            EORxw_REG_LSR(s3, xFlags, s1, rex.w?63:31);
+            BFIw(xFlags, s3, F_OF, 1);
+        }
+    }
+    IFX(X_CF) {
+        BFXILxw(s3, s1, c-1, 1);
+    }
+    if(c==1) {
+        LSRxw(s1, s1, 1);
+        BFIxw(s1, xFlags, rex.w?63:31, 1);
+    } else {
+        LSRxw(s4, s1, c);
+        BFIxw(s4, xFlags, (rex.w?64:32)-c, 1);
+        ORRxw_REG_LSL(s1, s4, s1, (rex.w?65:33)-c);
+    }
+    IFX(X_CF) {
+        BFIw(wFlags, s3, 0, 1);
+    }
+    IFX(X_PEND) {
+        STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
+    }
+}
+
 // emit SHRD32 instruction, from s1, fill s2 , constant c, store result in s1 using s3 and s4 as scratch
 void emit_shrd32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4)
 {
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h
index f19db163..6279d2f1 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.h
+++ b/src/dynarec/arm64/dynarec_arm64_helper.h
@@ -1102,6 +1102,8 @@ void* arm64_next(x64emu_t* emu, uintptr_t addr);
 #define emit_rcr8c      STEPNAME(emit_rcr8c)
 #define emit_rcl16c     STEPNAME(emit_rcl16c)
 #define emit_rcr16c     STEPNAME(emit_rcr16c)
+#define emit_rcl32c     STEPNAME(emit_rcl32c)
+#define emit_rcr32c     STEPNAME(emit_rcr32c)
 #define emit_shrd32c    STEPNAME(emit_shrd32c)
 #define emit_shrd32     STEPNAME(emit_shrd32)
 #define emit_shld32c    STEPNAME(emit_shld32c)
@@ -1259,6 +1261,8 @@ void emit_rcl8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s
 void emit_rcr8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4);
 void emit_rcl16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4);
 void emit_rcr16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4);
+void emit_rcl32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
+void emit_rcr32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
 void emit_shrd32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
 void emit_shld32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
 void emit_shrd32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4);
diff --git a/src/emu/x64run_private.c b/src/emu/x64run_private.c
index 016092b3..06f20118 100644
--- a/src/emu/x64run_private.c
+++ b/src/emu/x64run_private.c
@@ -1051,37 +1051,7 @@ void UpdateFlags(x64emu_t *emu)
             }
             CONDITIONAL_SET_FLAG(emu->res.u64 & (1L << 63), F_CF);
             break;
-        case d_rcl8:
-            cnt = emu->op2.u8%9;
-            CONDITIONAL_SET_FLAG(emu->op1.u8>>(9-cnt) & 1, F_CF);
-            // should for cnt==1
-            CONDITIONAL_SET_FLAG(((emu->res.u8>>7) ^ ACCESS_FLAG(F_CF)) & 1, F_OF);
-            break;
-        case d_rcr8:
-            cnt = emu->op2.u8%9;
-            // should for cnt==1, using "before" CF
-            CONDITIONAL_SET_FLAG(((emu->res.u8>>7) ^ ACCESS_FLAG(F_CF)) & 1, F_OF);
-            // new CF
-            CONDITIONAL_SET_FLAG(((cnt==1)?emu->op1.u8:(emu->op1.u8>>(cnt-1))) & 1, F_CF);
-            break;
-        case d_rcl16:
-            cnt = emu->op2.u16%17;
-            CONDITIONAL_SET_FLAG(emu->op1.u16>>(17-cnt) & 1, F_CF);
-            // should for cnt==1
-            CONDITIONAL_SET_FLAG(((emu->res.u16>>15) ^ ACCESS_FLAG(F_CF)) & 1, F_OF);
-            break;
-        case d_rcr16:
-            cnt = emu->op2.u16%17;
-            // should for cnt==1, using "before" CF
-            CONDITIONAL_SET_FLAG(((emu->res.u16>>15) ^ ACCESS_FLAG(F_CF)) & 1, F_OF);
-            // new CF
-            CONDITIONAL_SET_FLAG(((cnt==1)?emu->op1.u16:(emu->op1.u16>>(cnt-1))) & 1, F_CF);
-            break;
-
-        case d_rcl32:
-        case d_rcl64:
-        case d_rcr32:
-        case d_rcr64:
+
         case d_unknown:
             printf_log(LOG_NONE, "Box64: %p trying to evaluate Unknown deferred Flags\n", (void*)R_RIP);
             break;
diff --git a/src/include/regs.h b/src/include/regs.h
index 99ddba0c..3cfcdc5a 100644
--- a/src/include/regs.h
+++ b/src/include/regs.h
@@ -119,14 +119,6 @@ typedef enum {
 	d_ror16,
 	d_ror32,
 	d_ror64,
-	d_rcl8,
-	d_rcl16,
-	d_rcl32,
-	d_rcl64,
-	d_rcr8,
-	d_rcr16,
-	d_rcr32,
-	d_rcr64,
 	d_dec8i,	// interpreter version, to handle the CF flags that is untouched
 	d_dec16i,
 	d_dec32i,