From 65487dac516d666e26e4ac1fd7dfeffa8a01ee6a Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Sat, 8 Feb 2025 10:51:39 +0100 Subject: [ARM64_DYNAREC] Optimized unaligned path for REP MOVSB --- src/dynarec/arm64/dynarec_arm64_00.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c index fc54be0e..44758153 100644 --- a/src/dynarec/arm64/dynarec_arm64_00.c +++ b/src/dynarec/arm64/dynarec_arm64_00.c @@ -1675,7 +1675,21 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("REP MOVSB"); CBZx_NEXT(xRCX); TBNZ_MARK2(xFlags, F_DF); - IF_ALIGNED(ip) { + IF_UNALIGNED(ip) { + // special optim for large RCX value on forward case only + // but because it's unaligned path, check if a byte per byt is needed, and do 4-bytes per 4-bytes only instead + ORRw_REG(x1, xRSI, xRDI); + ANDw_mask(x1, x1, 0, 1); //mask = 3 + CBNZw_MARK(x1); + MARK3; + CMPSx_U12(xRCX, 4); + B_MARK(cCC); + LDRw_S9_postindex(x1, xRSI, 4); + STRw_S9_postindex(x1, xRDI, 4); + SUBx_U12(xRCX, xRCX, 4); + CBNZx_MARK3(xRCX); + CBZx_MARKLOCK(xRCX); + } else { // special optim for large RCX value on forward case only MARK3; CMPSx_U12(xRCX, 8); -- cgit 1.4.1