diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2025-04-23 12:47:56 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2025-04-23 12:47:56 +0200 |
| commit | 815836d28551983e45f3ef167cc8780f90df24a1 (patch) | |
| tree | 8675cb22e9c1146a1931bb41ffabf2f481eface8 /src | |
| parent | 468a3c2165a737029ed01862361d6a62c511501d (diff) | |
| download | box64-815836d28551983e45f3ef167cc8780f90df24a1.tar.gz box64-815836d28551983e45f3ef167cc8780f90df24a1.zip | |
[ARM64_DYNAREC] Optimized REP STOSB
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_00.c | 34 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.h | 4 |
2 files changed, 36 insertions, 2 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c index 059d9fcc..8b06925a 100644 --- a/src/dynarec/arm64/dynarec_arm64_00.c +++ b/src/dynarec/arm64/dynarec_arm64_00.c @@ -1893,16 +1893,46 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("REP STOSB"); CBZx_NEXT(xRCX); TBNZ_MARK2(xFlags, F_DF); + IF_UNALIGNED(ip) { + MESSAGE(LOG_DEBUG, "\tUnaligned path"); + // special optim for large RCX value on forward case only + // but because it's unaligned path, check if a byte per byt is needed, and do 4-bytes per 4-bytes only instead + ANDw_mask(x1, xRDI, 0, 1); //mask = 3 + CBNZw_MARK(x1); + UXTBw(x3, xRAX); // prepare x3 + ORRw_REG_LSL(x3, x3, x3, 8); + ORRw_REG_LSL(x3, x3, x3, 16); // 4bytes ready + MARK3; + ANDx_mask(x1, xRCX, 1, 0b111110, 0b111101); // mask=0xfffffffffffffffc, so ~3LL + CBZx_MARK(x1); // xRCX<4 + STRw_S9_postindex(x3, xRDI, 4); + SUBx_U12(xRCX, xRCX, 4); + CBNZx_MARK3(xRCX); + CBZx_MARKLOCK(xRCX); + } else { + // special optim for large RCX value on forward case only + UXTBw(x3, xRAX); // prepare x3 + ORRw_REG_LSL(x3, x3, x3, 8); + ORRw_REG_LSL(x3, x3, x3, 16); + ORRx_REG_LSL(x3, x3, x3, 32); // 8 bytes... + MARK3; + ANDx_mask(x1, xRCX, 1, 0b111101, 0b111100); // mask=0xfffffffffffffff8, so ~7LL + CBZx_MARK(x1); // xRCX<8 + STRx_S9_postindex(x3, xRDI, 8); + SUBx_U12(xRCX, xRCX, 8); + CBNZx_MARK3(xRCX); + CBZx_MARKLOCK(xRCX); + } MARK; // Part with DF==0 STRB_S9_postindex(xRAX, xRDI, 1); SUBx_U12(xRCX, xRCX, 1); CBNZx_MARK(xRCX); - B_MARK3_nocond; + B_MARKLOCK_nocond; MARK2; // Part with DF==1 STRB_S9_postindex(xRAX, xRDI, -1); SUBx_U12(xRCX, xRCX, 1); CBNZx_MARK2(xRCX); - MARK3; + MARKLOCK; // done } else { INST_NAME("STOSB"); diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index ded76a69..af806d38 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -760,6 +760,10 @@ j64 = GETMARK-(dyn->native_size); \ CBZw(reg, j64) // Branch to MARK if reg is 0 (use j64) +#define CBZx_MARK(reg) \ + j64 = GETMARK-(dyn->native_size); \ + CBZx(reg, j64) +// Branch to MARK if reg is 0 (use j64) #define CBZxw_MARK(reg) \ j64 = GETMARK-(dyn->native_size); \ CBZxw(reg, j64) |