diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2025-02-10 18:44:35 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-02-10 11:44:35 +0100 |
| commit | f5174d12aa0f9aef7fce3ef6263df0f08d2e8c75 (patch) | |
| tree | b9dca6c3e3a2380255cedab4262c2f0376f12c92 /src | |
| parent | 2cb25c095a113e575bf5e4e9fe051c283daaf659 (diff) | |
| download | box64-f5174d12aa0f9aef7fce3ef6263df0f08d2e8c75.tar.gz box64-f5174d12aa0f9aef7fce3ef6263df0f08d2e8c75.zip | |
[LA64_DYNAREC] Added preliminary optimization for REP MOVSB (#2340)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_00.c | 14 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_66.c | 14 |
2 files changed, 28 insertions, 0 deletions
diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c index 1751c759..a342fb53 100644 --- a/src/dynarec/la64/dynarec_la64_00.c +++ b/src/dynarec/la64/dynarec_la64_00.c @@ -1291,6 +1291,20 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni CBZ_NEXT(xRCX); ANDI(x1, xFlags, 1 << F_DF); BNEZ_MARK2(x1); + // special optim for large RCX value on forward case only + OR(x1, xRSI, xRDI); + ANDI(x1, x1, 7); + BNEZ_MARK(x1); + ADDI_D(x6, xZR, 8); + MARK3; + BLT_MARK(xRCX, x6); + LD_D(x1, xRSI, 0); + ST_D(x1, xRDI, 0); + ADDI_D(xRSI, xRSI, 8); + ADDI_D(xRDI, xRDI, 8); + ADDI_D(xRCX, xRCX, -8); + BNEZ_MARK3(xRCX); + B_NEXT_nocond; MARK; // Part with DF==0 LD_BU(x1, xRSI, 0); ST_B(x1, xRDI, 0); diff --git a/src/dynarec/la64/dynarec_la64_66.c b/src/dynarec/la64/dynarec_la64_66.c index 8d3ae2e5..5e9e0a3b 100644 --- a/src/dynarec/la64/dynarec_la64_66.c +++ b/src/dynarec/la64/dynarec_la64_66.c @@ -542,6 +542,20 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni CBZ_NEXT(xRCX); ANDI(x1, xFlags, 1 << F_DF); BNEZ_MARK2(x1); + // special optim for large RCX value on forward case only + OR(x1, xRSI, xRDI); + ANDI(x1, x1, 7); + BNEZ_MARK(x1); + ADDI_D(x6, xZR, 8); + MARK3; + BLT_MARK(xRCX, x6); + LD_D(x1, xRSI, 0); + ST_D(x1, xRDI, 0); + ADDI_D(xRSI, xRSI, 8); + ADDI_D(xRDI, xRDI, 8); + ADDI_D(xRCX, xRCX, -8); + BNEZ_MARK3(xRCX); + B_NEXT_nocond; MARK; // Part with DF==0 LD_BU(x1, xRSI, 0); ST_B(x1, xRDI, 0); |