diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2025-02-18 16:26:33 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-02-18 09:26:33 +0100 |
| commit | 32a747979ad0ca0bba3aa97819daa63d1a5981ee (patch) | |
| tree | 256bdab258add70599109c43e3802cb1148920b6 /src | |
| parent | 362a92c1589141a8fbbd7f0a555d60638268a2b7 (diff) | |
| download | box64-32a747979ad0ca0bba3aa97819daa63d1a5981ee.tar.gz box64-32a747979ad0ca0bba3aa97819daa63d1a5981ee.zip | |
[RV64_DYNAREC] Optimized REP MOVSB (#2381)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_00_2.c | 36 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_66.c | 36 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 2 |
3 files changed, 46 insertions, 28 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00_2.c b/src/dynarec/rv64/dynarec_rv64_00_2.c index e77956ff..35241d38 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_2.c +++ b/src/dynarec/rv64/dynarec_rv64_00_2.c @@ -657,19 +657,24 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int CBZ_NEXT(xRCX); ANDI(x1, xFlags, 1 << F_DF); BNEZ_MARK2(x1); - IF_ALIGNED (ip) { - // special optim for large RCX value on forward case only - MARK3; - ADDI(x1, xZR, 8); - BLT_MARK(xRCX, x1); - LD(x1, xRSI, 0); - SD(x1, xRDI, 0); - ADDI(xRSI, xRSI, 8); - ADDI(xRDI, xRDI, 8); - SUBI(xRCX, xRCX, 8); - BNEZ_MARK3(xRCX); - BEQZ_MARKLOCK(xRCX); + if (BOX64DRENV(dynarec_safeflags)) { + // check for overlapping + SUB(x2, xRDI, xRSI); + BLT_MARK(x2, 8); } + OR(x1, xRSI, xRDI); + ANDI(x1, x1, 7); + BNEZ_MARK(x1); + ADDI(x6, xZR, 8); + MARK3; + BLT_MARK(xRCX, x6); + LD(x1, xRSI, 0); + SD(x1, xRDI, 0); + ADDI(xRSI, xRSI, 8); + ADDI(xRDI, xRDI, 8); + SUBI(xRCX, xRCX, 8); + BNEZ_MARK3(xRCX); + B_MARKLOCK_nocond; MARK; // Part with DF==0 LBU(x1, xRSI, 0); SB(x1, xRDI, 0); @@ -677,7 +682,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ADDI(xRDI, xRDI, 1); SUBI(xRCX, xRCX, 1); BNEZ_MARK(xRCX); - B_NEXT_nocond; + B_MARKLOCK_nocond; MARK2; // Part with DF==1 LBU(x1, xRSI, 0); SB(x1, xRDI, 0); @@ -695,6 +700,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ADD(xRSI, xRSI, x3); ADD(xRDI, xRDI, x3); } + SMWRITE(); break; case 0xA5: if (rep) { @@ -709,7 +715,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ADDI(xRDI, xRDI, rex.w ? 8 : 4); SUBI(xRCX, xRCX, 1); BNEZ_MARK(xRCX); - B_NEXT_nocond; + B_MARKLOCK_nocond; MARK2; // Part with DF==1 LDxw(x1, xRSI, 0); SDxw(x1, xRDI, 0); @@ -717,6 +723,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SUBI(xRDI, xRDI, rex.w ? 8 : 4); SUBI(xRCX, xRCX, 1); BNEZ_MARK2(xRCX); + MARKLOCK; // done } else { INST_NAME("MOVSD"); @@ -726,6 +733,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ADD(xRSI, xRSI, x3); ADD(xRDI, xRDI, x3); } + SMWRITE(); break; case 0xA6: switch (rep) { diff --git a/src/dynarec/rv64/dynarec_rv64_66.c b/src/dynarec/rv64/dynarec_rv64_66.c index 1830054c..9535ce8d 100644 --- a/src/dynarec/rv64/dynarec_rv64_66.c +++ b/src/dynarec/rv64/dynarec_rv64_66.c @@ -827,19 +827,24 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni CBZ_NEXT(xRCX); ANDI(x1, xFlags, 1 << F_DF); BNEZ_MARK2(x1); - IF_ALIGNED (ip) { - // special optim for large RCX value on forward case only - MARK3; - ADDI(x1, xZR, 8); - BLT_MARK(xRCX, x1); - LD(x1, xRSI, 0); - SD(x1, xRDI, 0); - ADDI(xRSI, xRSI, 8); - ADDI(xRDI, xRDI, 8); - SUBI(xRCX, xRCX, 8); - BNEZ_MARK3(xRCX); - BEQZ_MARKLOCK(xRCX); + if (BOX64DRENV(dynarec_safeflags)) { + // check for overlapping + SUB(x2, xRDI, xRSI); + BLT_MARK(x2, 8); } + OR(x1, xRSI, xRDI); + ANDI(x1, x1, 7); + BNEZ_MARK(x1); + ADDI(x6, xZR, 8); + MARK3; + BLT_MARK(xRCX, x6); + LD(x1, xRSI, 0); + SD(x1, xRDI, 0); + ADDI(xRSI, xRSI, 8); + ADDI(xRDI, xRDI, 8); + SUBI(xRCX, xRCX, 8); + BNEZ_MARK3(xRCX); + B_MARKLOCK_nocond; MARK; // Part with DF==0 LBU(x1, xRSI, 0); SB(x1, xRDI, 0); @@ -847,7 +852,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ADDI(xRDI, xRDI, 1); SUBI(xRCX, xRCX, 1); BNEZ_MARK(xRCX); - B_NEXT_nocond; + B_MARKLOCK_nocond; MARK2; // Part with DF==1 LBU(x1, xRSI, 0); SB(x1, xRDI, 0); @@ -865,6 +870,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ADD(xRSI, xRSI, x3); ADD(xRDI, xRDI, x3); } + SMWRITE(); break; case 0xA5: if (rep) { @@ -879,7 +885,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ADDI(xRDI, xRDI, 2); SUBI(xRCX, xRCX, 1); BNEZ_MARK(xRCX); - B_NEXT_nocond; + B_MARKLOCK_nocond; MARK2; // Part with DF==1 LH(x1, xRSI, 0); SH(x1, xRDI, 0); @@ -887,6 +893,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SUBI(xRDI, xRDI, 2); SUBI(xRCX, xRCX, 1); BNEZ_MARK2(xRCX); + MARKLOCK; // done } else { INST_NAME("MOVSW"); @@ -902,6 +909,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ADD(xRSI, xRSI, x3); ADD(xRDI, xRDI, x3); } + SMWRITE(); break; case 0xA7: switch (rep) { diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index d1ecd7f7..21f07db5 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -779,6 +779,8 @@ // Branch to MARKLOCK if reg1==0 (use j64) #define BEQZ_MARKLOCK(reg) BEQ_MARKLOCK(reg, xZR) +// Branch to MARKLOCK instruction unconditionnal (use j64) +#define B_MARKLOCK_nocond Bxx_gen(__, MARKLOCK, 0, 0) // Branch to NEXT if reg1==reg2 (use j64) #define BEQ_NEXT(reg1, reg2) \ |