about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2025-02-18 16:26:33 +0800
committerGitHub <noreply@github.com>2025-02-18 09:26:33 +0100
commit32a747979ad0ca0bba3aa97819daa63d1a5981ee (patch)
tree256bdab258add70599109c43e3802cb1148920b6 /src
parent362a92c1589141a8fbbd7f0a555d60638268a2b7 (diff)
downloadbox64-32a747979ad0ca0bba3aa97819daa63d1a5981ee.tar.gz
box64-32a747979ad0ca0bba3aa97819daa63d1a5981ee.zip
[RV64_DYNAREC] Optimized REP MOVSB (#2381)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00_2.c36
-rw-r--r--src/dynarec/rv64/dynarec_rv64_66.c36
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h2
3 files changed, 46 insertions, 28 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00_2.c b/src/dynarec/rv64/dynarec_rv64_00_2.c
index e77956ff..35241d38 100644
--- a/src/dynarec/rv64/dynarec_rv64_00_2.c
+++ b/src/dynarec/rv64/dynarec_rv64_00_2.c
@@ -657,19 +657,24 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 CBZ_NEXT(xRCX);
                 ANDI(x1, xFlags, 1 << F_DF);
                 BNEZ_MARK2(x1);
-                IF_ALIGNED (ip) {
-                    // special optim for large RCX value on forward case only
-                    MARK3;
-                    ADDI(x1, xZR, 8);
-                    BLT_MARK(xRCX, x1);
-                    LD(x1, xRSI, 0);
-                    SD(x1, xRDI, 0);
-                    ADDI(xRSI, xRSI, 8);
-                    ADDI(xRDI, xRDI, 8);
-                    SUBI(xRCX, xRCX, 8);
-                    BNEZ_MARK3(xRCX);
-                    BEQZ_MARKLOCK(xRCX);
+                if (BOX64DRENV(dynarec_safeflags)) {
+                    // check for overlapping
+                    SUB(x2, xRDI, xRSI);
+                    BLT_MARK(x2, 8);
                 }
+                OR(x1, xRSI, xRDI);
+                ANDI(x1, x1, 7);
+                BNEZ_MARK(x1);
+                ADDI(x6, xZR, 8);
+                MARK3;
+                BLT_MARK(xRCX, x6);
+                LD(x1, xRSI, 0);
+                SD(x1, xRDI, 0);
+                ADDI(xRSI, xRSI, 8);
+                ADDI(xRDI, xRDI, 8);
+                SUBI(xRCX, xRCX, 8);
+                BNEZ_MARK3(xRCX);
+                B_MARKLOCK_nocond;
                 MARK; // Part with DF==0
                 LBU(x1, xRSI, 0);
                 SB(x1, xRDI, 0);
@@ -677,7 +682,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 ADDI(xRDI, xRDI, 1);
                 SUBI(xRCX, xRCX, 1);
                 BNEZ_MARK(xRCX);
-                B_NEXT_nocond;
+                B_MARKLOCK_nocond;
                 MARK2; // Part with DF==1
                 LBU(x1, xRSI, 0);
                 SB(x1, xRDI, 0);
@@ -695,6 +700,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 ADD(xRSI, xRSI, x3);
                 ADD(xRDI, xRDI, x3);
             }
+            SMWRITE();
             break;
         case 0xA5:
             if (rep) {
@@ -709,7 +715,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 ADDI(xRDI, xRDI, rex.w ? 8 : 4);
                 SUBI(xRCX, xRCX, 1);
                 BNEZ_MARK(xRCX);
-                B_NEXT_nocond;
+                B_MARKLOCK_nocond;
                 MARK2; // Part with DF==1
                 LDxw(x1, xRSI, 0);
                 SDxw(x1, xRDI, 0);
@@ -717,6 +723,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 SUBI(xRDI, xRDI, rex.w ? 8 : 4);
                 SUBI(xRCX, xRCX, 1);
                 BNEZ_MARK2(xRCX);
+                MARKLOCK;
                 // done
             } else {
                 INST_NAME("MOVSD");
@@ -726,6 +733,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 ADD(xRSI, xRSI, x3);
                 ADD(xRDI, xRDI, x3);
             }
+            SMWRITE();
             break;
         case 0xA6:
             switch (rep) {
diff --git a/src/dynarec/rv64/dynarec_rv64_66.c b/src/dynarec/rv64/dynarec_rv64_66.c
index 1830054c..9535ce8d 100644
--- a/src/dynarec/rv64/dynarec_rv64_66.c
+++ b/src/dynarec/rv64/dynarec_rv64_66.c
@@ -827,19 +827,24 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 CBZ_NEXT(xRCX);
                 ANDI(x1, xFlags, 1 << F_DF);
                 BNEZ_MARK2(x1);
-                IF_ALIGNED (ip) {
-                    // special optim for large RCX value on forward case only
-                    MARK3;
-                    ADDI(x1, xZR, 8);
-                    BLT_MARK(xRCX, x1);
-                    LD(x1, xRSI, 0);
-                    SD(x1, xRDI, 0);
-                    ADDI(xRSI, xRSI, 8);
-                    ADDI(xRDI, xRDI, 8);
-                    SUBI(xRCX, xRCX, 8);
-                    BNEZ_MARK3(xRCX);
-                    BEQZ_MARKLOCK(xRCX);
+                if (BOX64DRENV(dynarec_safeflags)) {
+                    // check for overlapping
+                    SUB(x2, xRDI, xRSI);
+                    BLT_MARK(x2, 8);
                 }
+                OR(x1, xRSI, xRDI);
+                ANDI(x1, x1, 7);
+                BNEZ_MARK(x1);
+                ADDI(x6, xZR, 8);
+                MARK3;
+                BLT_MARK(xRCX, x6);
+                LD(x1, xRSI, 0);
+                SD(x1, xRDI, 0);
+                ADDI(xRSI, xRSI, 8);
+                ADDI(xRDI, xRDI, 8);
+                SUBI(xRCX, xRCX, 8);
+                BNEZ_MARK3(xRCX);
+                B_MARKLOCK_nocond;
                 MARK; // Part with DF==0
                 LBU(x1, xRSI, 0);
                 SB(x1, xRDI, 0);
@@ -847,7 +852,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 ADDI(xRDI, xRDI, 1);
                 SUBI(xRCX, xRCX, 1);
                 BNEZ_MARK(xRCX);
-                B_NEXT_nocond;
+                B_MARKLOCK_nocond;
                 MARK2; // Part with DF==1
                 LBU(x1, xRSI, 0);
                 SB(x1, xRDI, 0);
@@ -865,6 +870,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 ADD(xRSI, xRSI, x3);
                 ADD(xRDI, xRDI, x3);
             }
+            SMWRITE();
             break;
         case 0xA5:
             if (rep) {
@@ -879,7 +885,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 ADDI(xRDI, xRDI, 2);
                 SUBI(xRCX, xRCX, 1);
                 BNEZ_MARK(xRCX);
-                B_NEXT_nocond;
+                B_MARKLOCK_nocond;
                 MARK2; // Part with DF==1
                 LH(x1, xRSI, 0);
                 SH(x1, xRDI, 0);
@@ -887,6 +893,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 SUBI(xRDI, xRDI, 2);
                 SUBI(xRCX, xRCX, 1);
                 BNEZ_MARK2(xRCX);
+                MARKLOCK;
                 // done
             } else {
                 INST_NAME("MOVSW");
@@ -902,6 +909,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 ADD(xRSI, xRSI, x3);
                 ADD(xRDI, xRDI, x3);
             }
+            SMWRITE();
             break;
         case 0xA7:
             switch (rep) {
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index d1ecd7f7..21f07db5 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -779,6 +779,8 @@
 // Branch to MARKLOCK if reg1==0 (use j64)
 #define BEQZ_MARKLOCK(reg) BEQ_MARKLOCK(reg, xZR)
 
+// Branch to MARKLOCK instruction unconditionnal (use j64)
+#define B_MARKLOCK_nocond Bxx_gen(__, MARKLOCK, 0, 0)
 
 // Branch to NEXT if reg1==reg2 (use j64)
 #define BEQ_NEXT(reg1, reg2)                                                  \