about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2021-03-23 09:29:50 +0100
committerptitSeb <sebastien.chev@gmail.com>2021-03-23 09:29:50 +0100
commite52c207c751536e7db6b7f553903d225f1783b39 (patch)
treec39c8e24f49f123f2659e64470f37ef5a88d57de /src
parentbfdcb023b3321d093e5fbe9944e9b818c3b65b20 (diff)
downloadbox64-e52c207c751536e7db6b7f553903d225f1783b39.tar.gz
box64-e52c207c751536e7db6b7f553903d225f1783b39.zip
[DYNAREC] Optimized F2/F3 AE/A6 opcodes
Diffstat (limited to 'src')
-rwxr-xr-xsrc/dynarec/arm64_emitter.h6
-rwxr-xr-xsrc/dynarec/dynarec_arm64_00.c38
-rwxr-xr-xsrc/dynarec/dynarec_arm64_helper.h12
3 files changed, 40 insertions, 16 deletions
diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h
index 28dfa452..1d2add7b 100755
--- a/src/dynarec/arm64_emitter.h
+++ b/src/dynarec/arm64_emitter.h
@@ -198,6 +198,7 @@
 #define LDRw_S9_preindex(Rt, Rn, imm9)    EMIT(LDR_gen(0b10, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt))
 #define LDRB_S9_postindex(Rt, Rn, imm9)   EMIT(LDR_gen(0b00, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt))
 #define LDRB_S9_preindex(Rt, Rn, imm9)    EMIT(LDR_gen(0b00, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt))
+#define LDRxw_S9_postindex(Rt, Rn, imm9)  EMIT(LDR_gen(rex.w?0b11:0b10, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt))
 
 #define LDRS_gen(size, op1, imm9, op2, Rn, Rt)   ((size)<<30 | 0b111<<27 | (op1)<<24 | 0b10<<22 | (imm9)<<12 | (op2)<<10 | (Rn)<<5 | (Rt))
 #define LDRSW_S9_postindex(Rt, Rn, imm9)  EMIT(LDRS_gen(0b10, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt))
@@ -240,6 +241,7 @@
 #define STRx_S9_preindex(Rt, Rn, imm9)    EMIT(STR_gen(0b11, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt))
 #define STRw_S9_postindex(Rt, Rn, imm9)   EMIT(STR_gen(0b10, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt))
 #define STRw_S9_preindex(Rt, Rn, imm9)    EMIT(STR_gen(0b10, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt))
+#define STRxw_S9_postindex(Rt, Rn, imm9)  EMIT(STR_gen(rex.w?0b11:0b10, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt))
 
 #define ST_gen(size, op1, imm12, Rn, Rt)        ((size)<<30 | 0b111<<27 | (op1)<<24 | 0b00<<22 | (imm12)<<10 | (Rn)<<5 | (Rt))
 #define STRx_U12(Rt, Rn, imm12)           EMIT(ST_gen(0b11, 0b01, ((uint32_t)(imm12>>3))&0xfff, Rn, Rt))
@@ -324,8 +326,8 @@
 #define CBZxw(Rt, imm19)                EMIT(CB_gen(rex.w, 0, ((imm19)>>2)&0x7FFFF, Rt))
 
 #define TB_gen(b5, op, b40, imm14, Rt)  ((b5)<<31 | 0b011011<<25 | (op)<<24  | (b40)<<19 | (imm14)<<5 | (Rt))
-#define TBZ(Rt, bit, imm16)             EMIT(TB_gen(((bit)>>5)&1, 0, (bit)&0x1f, ((imm19)>>2)&0x3FFF, Rt))
-#define TBNZ(Rt, bit, imm16)            EMIT(TB_gen(((bit)>>5)&1, 1, (bit)&0x1f, ((imm19)>>2)&0x3FFF, Rt))
+#define TBZ(Rt, bit, imm16)             EMIT(TB_gen(((bit)>>5)&1, 0, (bit)&0x1f, ((imm16)>>2)&0x3FFF, Rt))
+#define TBNZ(Rt, bit, imm16)            EMIT(TB_gen(((bit)>>5)&1, 1, (bit)&0x1f, ((imm16)>>2)&0x3FFF, Rt))
 
 #define Bcond_gen(imm19, cond)          (0b0101010<<25 | (imm19)<<5 | (cond))
 #define Bcond(cond, imm19)              EMIT(Bcond_gen(((imm19)>>2)&0x7FFFF, cond))
diff --git a/src/dynarec/dynarec_arm64_00.c b/src/dynarec/dynarec_arm64_00.c
index 692c55be..776fb3ae 100755
--- a/src/dynarec/dynarec_arm64_00.c
+++ b/src/dynarec/dynarec_arm64_00.c
@@ -996,16 +996,23 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 if(rep==1) {INST_NAME("REPNZ CMPSB");} else {INST_NAME("REPZ CMPSB");}
                 SETFLAGS(X_ALL, SF_SET);
                 CBZx_NEXT(xRCX);
-                GETDIR(x3, 1);
-                MARK;
-                LDRB_U12(x1, xRSI, 0);
-                LDRB_U12(x2, xRDI, 0);
-                ADDx_REG(xRSI, xRSI, x3);
-                ADDx_REG(xRDI, xRDI, x3);
+                TBNZ_MARK2(xFlags, F_DF);
+                MARK;   // Part with DF==0
+                LDRB_S9_postindex(x1, xRSI, 1);
+                LDRB_S9_postindex(x2, xRDI, 1);
                 SUBx_U12(xRCX, xRCX, 1);
                 CMPSw_REG(x1, x2);
-                Bcond((rep==1)?cEQ:cNE, 4+4);
+                B_MARK3((rep==1)?cEQ:cNE);
                 CBNZx_MARK(xRCX);
+                B_MARK3_nocond;
+                MARK2;  // Part with DF==1
+                LDRB_S9_postindex(x1, xRSI, -1);
+                LDRB_S9_postindex(x2, xRDI, -1);
+                SUBx_U12(xRCX, xRCX, 1);
+                CMPSw_REG(x1, x2);
+                B_MARK3((rep==1)?cEQ:cNE);
+                CBNZx_MARK2(xRCX);
+                MARK3;  // end
                 emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5);
                 break;
             default:
@@ -1044,15 +1051,22 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 if(rep==1) {INST_NAME("REPNZ SCASB");} else {INST_NAME("REPZ SCASB");}
                 SETFLAGS(X_ALL, SF_SET);
                 CBZx_NEXT(xRCX);
-                GETDIR(x3, 1);
                 UBFXw(x1, xRAX, 0, 8);
-                MARK;
-                LDRB_U12(x2, xRDI, 0);
-                ADDx_REG(xRDI, xRDI, x3);
+                TBNZ_MARK2(xFlags, F_DF);
+                MARK;   // Part with DF==0
+                LDRB_S9_postindex(x2, xRDI, 1);
                 SUBx_U12(xRCX, xRCX, 1);
                 CMPSw_REG(x1, x2);
-                Bcond((rep==1)?cEQ:cNE, 4+4);
+                B_MARK3((rep==1)?cEQ:cNE);
                 CBNZx_MARK(xRCX);
+                B_MARK3_nocond;
+                MARK2;  // Part with DF==1
+                LDRB_S9_postindex(x2, xRDI, -1);
+                SUBx_U12(xRCX, xRCX, 1);
+                CMPSw_REG(x1, x2);
+                B_MARK3((rep==1)?cEQ:cNE);
+                CBNZx_MARK2(xRCX);
+                MARK3;  // end
                 emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5);
                 break;
             default:
diff --git a/src/dynarec/dynarec_arm64_helper.h b/src/dynarec/dynarec_arm64_helper.h
index 94cdd652..ff397379 100755
--- a/src/dynarec/dynarec_arm64_helper.h
+++ b/src/dynarec/dynarec_arm64_helper.h
@@ -262,8 +262,8 @@
     j32 = GETMARK-(dyn->arm_size);  \
     B(j32)
 // Branch to MARK if reg is not 0 (use j32)
-#define CBNZx_MARK(reg)            \
-    j32 = GETMARK-(dyn->arm_size); \
+#define CBNZx_MARK(reg)             \
+    j32 = GETMARK-(dyn->arm_size);  \
     CBNZx(reg, j32)
 // Branch to MARK2 if cond (use j32)
 #define B_MARK2(cond)               \
@@ -273,6 +273,14 @@
 #define B_MARK2_nocond              \
     j32 = GETMARK2-(dyn->arm_size); \
     B(j32)
+// Branch to MARK2 if reg is not 0 (use j32)
+#define CBNZx_MARK2(reg)            \
+    j32 = GETMARK2-(dyn->arm_size); \
+    CBNZx(reg, j32)
+// Test bit N of A and branch to MARK2 if set
+#define TBNZ_MARK2(A, N)            \
+    j32 = GETMARK2-(dyn->arm_size); \
+    TBNZ(A, N, j32)
 // Branch to MARK3 if cond (use j32)
 #define B_MARK3(cond)               \
     j32 = GETMARK3-(dyn->arm_size); \