about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2025-02-08 18:30:02 +0800
committerGitHub <noreply@github.com>2025-02-08 11:30:02 +0100
commit28ebc1a0b779d6fca2f0b7e45291df6795be6c0a (patch)
tree6d8f761b709da657c6c038c684609acd6ee9644e /src
parent65487dac516d666e26e4ac1fd7dfeffa8a01ee6a (diff)
downloadbox64-28ebc1a0b779d6fca2f0b7e45291df6795be6c0a.tar.gz
box64-28ebc1a0b779d6fca2f0b7e45291df6795be6c0a.zip
[RV64_DYNAREC] Added aligned optim case for REP MOVSB (#2327)
* [RV64_DYNAREC] Added aligned optim case for REP MOVSB

* fixed a typo
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00_2.c14
-rw-r--r--src/dynarec/rv64/dynarec_rv64_66.c14
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h9
-rw-r--r--src/dynarec/rv64/dynarec_rv64_pass0.h3
4 files changed, 39 insertions, 1 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00_2.c b/src/dynarec/rv64/dynarec_rv64_00_2.c
index 29898876..75d03ad7 100644
--- a/src/dynarec/rv64/dynarec_rv64_00_2.c
+++ b/src/dynarec/rv64/dynarec_rv64_00_2.c
@@ -662,6 +662,19 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 CBZ_NEXT(xRCX);
                 ANDI(x1, xFlags, 1 << F_DF);
                 BNEZ_MARK2(x1);
+                IF_ALIGNED (ip) {
+                    // special optim for large RCX value on forward case only
+                    MARK3;
+                    ADDI(x1, xZR, 8);
+                    BLT_MARK(xRCX, x1);
+                    LD(x1, xRSI, 0);
+                    SD(x1, xRDI, 0);
+                    ADDI(xRSI, xRSI, 8);
+                    ADDI(xRDI, xRDI, 8);
+                    SUBI(xRCX, xRCX, 8);
+                    BNEZ_MARK3(xRCX);
+                    BEQZ_MARKLOCK(xRCX);
+                }
                 MARK; // Part with DF==0
                 LBU(x1, xRSI, 0);
                 SB(x1, xRDI, 0);
@@ -677,6 +690,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 SUBI(xRDI, xRDI, 1);
                 SUBI(xRCX, xRCX, 1);
                 BNEZ_MARK2(xRCX);
+                MARKLOCK;
                 // done
             } else {
                 INST_NAME("MOVSB");
diff --git a/src/dynarec/rv64/dynarec_rv64_66.c b/src/dynarec/rv64/dynarec_rv64_66.c
index f5bd1794..eadb624e 100644
--- a/src/dynarec/rv64/dynarec_rv64_66.c
+++ b/src/dynarec/rv64/dynarec_rv64_66.c
@@ -827,6 +827,19 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 CBZ_NEXT(xRCX);
                 ANDI(x1, xFlags, 1 << F_DF);
                 BNEZ_MARK2(x1);
+                IF_ALIGNED (ip) {
+                    // special optim for large RCX value on forward case only
+                    MARK3;
+                    ADDI(x1, xZR, 8);
+                    BLT_MARK(xRCX, x1);
+                    LD(x1, xRSI, 0);
+                    SD(x1, xRDI, 0);
+                    ADDI(xRSI, xRSI, 8);
+                    ADDI(xRDI, xRDI, 8);
+                    SUBI(xRCX, xRCX, 8);
+                    BNEZ_MARK3(xRCX);
+                    BEQZ_MARKLOCK(xRCX);
+                }
                 MARK; // Part with DF==0
                 LBU(x1, xRSI, 0);
                 SB(x1, xRDI, 0);
@@ -842,6 +855,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 SUBI(xRDI, xRDI, 1);
                 SUBI(xRCX, xRCX, 1);
                 BNEZ_MARK2(xRCX);
+                MARKLOCK;
                 // done
             } else {
                 INST_NAME("MOVSB");
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index 909a2054..26820441 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -774,6 +774,11 @@
 #define BNE_MARKLOCK2(reg1, reg2) Bxx_gen(NE, MARKLOCK2, reg1, reg2)
 // Branch to MARKLOCK2 if reg1!=0 (use j64)
 #define BNEZ_MARKLOCK2(reg) BNE_MARKLOCK2(reg, xZR)
+// Branch to MARKLOCK if reg1==reg2 (use j64)
+#define BEQ_MARKLOCK(reg1, reg2) Bxx_gen(EQ, MARKLOCK, reg1, reg2)
+// Branch to MARKLOCK if reg1==0 (use j64)
+#define BEQZ_MARKLOCK(reg) BEQ_MARKLOCK(reg, xZR)
+
 
 // Branch to NEXT if reg1==reg2 (use j64)
 #define BEQ_NEXT(reg1, reg2)                                                  \
@@ -818,6 +823,10 @@
 #define IF_UNALIGNED(A)    if(is_addr_unaligned(A))
 #endif
 
+#ifndef IF_ALIGNED
+#define IF_ALIGNED(A) if (!is_addr_unaligned(A))
+#endif
+
 #define STORE_REG(A) SD(x##A, xEmu, offsetof(x64emu_t, regs[_##A]))
 #define LOAD_REG(A)  LD(x##A, xEmu, offsetof(x64emu_t, regs[_##A]))
 
diff --git a/src/dynarec/rv64/dynarec_rv64_pass0.h b/src/dynarec/rv64/dynarec_rv64_pass0.h
index 4a94b387..beb9a7c4 100644
--- a/src/dynarec/rv64/dynarec_rv64_pass0.h
+++ b/src/dynarec/rv64/dynarec_rv64_pass0.h
@@ -95,4 +95,5 @@
     } while (0)
 
 // mark opcode as "unaligned" possible only if the current address is not marked as already unaligned
-#define IF_UNALIGNED(A) if((dyn->insts[ninst].unaligned=(is_addr_unaligned(A)?0:1)))
\ No newline at end of file
+#define IF_UNALIGNED(A) if ((dyn->insts[ninst].unaligned = (is_addr_unaligned(A) ? 0 : 1)))
+#define IF_ALIGNED(A)   if ((dyn->insts[ninst].unaligned = (is_addr_unaligned(A) ? 1 : 0)))
\ No newline at end of file