diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2025-02-07 12:13:13 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2025-02-07 12:13:21 +0100 |
| commit | 46ad2261e760b3bb96c41205ab22dbfa8c54e963 (patch) | |
| tree | ab609bfa6de42626fbfa11b99a1189aa80e56a7a /src | |
| parent | 6c60ad4695c167ac0a9f75b4b139eb46f5abaab9 (diff) | |
| download | box64-46ad2261e760b3bb96c41205ab22dbfa8c54e963.tar.gz box64-46ad2261e760b3bb96c41205ab22dbfa8c54e963.zip | |
[ARM64_DYNAREC] Optimized REP MOVSB
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_00.c | 12 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.h | 4 | ||||
| -rw-r--r-- | src/libtools/signals.c | 41 |
3 files changed, 57 insertions, 0 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c index 899915ba..162a833d 100644 --- a/src/dynarec/arm64/dynarec_arm64_00.c +++ b/src/dynarec/arm64/dynarec_arm64_00.c @@ -1675,6 +1675,17 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("REP MOVSB"); CBZx_NEXT(xRCX); TBNZ_MARK2(xFlags, F_DF); + IF_UNALIGNED(ip) {} else { + // special optim for large RCX value on forward case only + MARK3; + CMPSx_U12(xRCX, 8); + B_MARK(cCC); + LDRx_S9_postindex(x1, xRSI, 8); + STRx_S9_postindex(x1, xRDI, 8); + SUBx_U12(xRCX, xRCX, 8); + CBNZx_MARK3(xRCX); + CBZx_MARKLOCK(xRCX); + } MARK; // Part with DF==0 LDRB_S9_postindex(x1, xRSI, 1); STRB_S9_postindex(x1, xRDI, 1); @@ -1686,6 +1697,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin STRB_S9_postindex(x1, xRDI, -1); SUBx_U12(xRCX, xRCX, 1); CBNZx_MARK2(xRCX); + MARKLOCK; // done } else { INST_NAME("MOVSB"); diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index 56b388d4..b6398a2b 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -899,6 +899,10 @@ #define CBNZx_MARKLOCK(reg) \ j64 = GETMARKLOCK-(dyn->native_size); \ CBNZx(reg, j64) +// Branch to MARKLOCK if reg is 0 (use j64) +#define CBZx_MARKLOCK(reg) \ + j64 = GETMARKLOCK-(dyn->native_size); \ + CBZx(reg, j64) #ifndef IFNATIVE #define IFNATIVE(A) if(dyn->insts[ninst].need_nat_flags&(A)) diff --git a/src/libtools/signals.c b/src/libtools/signals.c index 10dbdb74..e87b1c34 100644 --- a/src/libtools/signals.c +++ b/src/libtools/signals.c @@ -991,6 +991,47 @@ int sigbus_specialcases(siginfo_t* info, void * ucntx, void* pc, void* _fpsimd, p->uc_mcontext.pc+=4; // go to next opcode return 1; } + if((opcode&0b10111111111000000000110000000000) == 0b10111000010000000000010000000000) { + // this is a LDR postoffset + int size = 1<<((opcode>>30)&3); + int val = opcode&31; + int dest = (opcode>>5)&31; + int64_t offset = (opcode>>12)&0b111111111; + if((offset>>(9-1))&1) + offset |= (0xffffffffffffffffll<<9); + volatile uint8_t* addr = (void*)(p->uc_mcontext.regs[dest]); + uint64_t value = 0; + if(size==8 && (((uintptr_t)addr)&3)==0) { + for(int i=0; i<2; ++i) + value |= ((uint64_t)((volatile uint32_t*)addr)[i]) << (i*32); + } else + for(int i=0; i<size; ++i) + value |= ((uint64_t)addr[i]) << (i*8); + p->uc_mcontext.regs[val] = value; + p->uc_mcontext.regs[dest] += offset; + p->uc_mcontext.pc+=4; // go to next opcode + return 1; + } + if((opcode&0b10111111111000000000110000000000) == 0b10111000000000000000010000000000) { + // this is a STR postoffset + int size = 1<<((opcode>>30)&3); + int val = opcode&31; + int src = (opcode>>5)&31; + int64_t offset = (opcode>>12)&0b111111111; + if((offset>>(9-1))&1) + offset |= (0xffffffffffffffffll<<9); + volatile uint8_t* addr = (void*)(p->uc_mcontext.regs[src]); + uint64_t value = p->uc_mcontext.regs[val]; + if(size==8 && (((uintptr_t)addr)&3)==0) { + for(int i=0; i<2; ++i) + ((volatile uint32_t*)addr)[i] = (value>>(i*32))&0xffffffff; + } else + for(int i=0; i<size; ++i) + addr[i] = (value>>(i*8))&0xff; + p->uc_mcontext.regs[src] += offset; + p->uc_mcontext.pc+=4; // go to next opcode + return 1; + } #elif RV64 #define GET_FIELD(v, high, low) (((v) >> low) & ((1ULL << (high - low + 1)) - 1)) #define SIGN_EXT(val, val_sz) (((int32_t)(val) << (32 - (val_sz))) >> (32 - (val_sz))) |