diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2025-02-12 19:24:27 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2025-02-12 19:24:27 +0100 |
| commit | af0b0c6c9d95e8bb6359de0f42d7623233d1a7f9 (patch) | |
| tree | 545b0c1da19ba96e15345cf27f42eba2372d8879 /src | |
| parent | 0e588e444f2ba5e461a8fc106864b87cb2633320 (diff) | |
| download | box64-af0b0c6c9d95e8bb6359de0f42d7623233d1a7f9.tar.gz box64-af0b0c6c9d95e8bb6359de0f42d7623233d1a7f9.zip | |
[ARM64_DYNAREC] Improved some 66 F0 opcode, especially unaligned path
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/arm64_emitter.h | 1 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_66f0.c | 46 |
2 files changed, 30 insertions, 17 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index 21060124..f56a68dc 100644 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -262,6 +262,7 @@ int convert_bitmask(uint64_t bitmask); #define SUB_ext(sf, op, S, Rm, option, imm3, Rn, Rd) ((sf)<<31 | (op)<<30 | (S)<<29 | 0b01011<<24 | 1<<21 | (Rm)<<16 | (option)<<13 | (imm3)<<10 | (Rn)<<5 | (Rd)) #define SUBxw_UXTB(Rd, Rn, Rm) EMIT(SUB_ext(rex.w, 1, 0, Rm, 0b000, 0, Rn, Rd)) +#define SUBw_UXTB(Rd, Rn, Rm) EMIT(SUB_ext(0, 1, 0, Rm, 0b000, 0, Rn, Rd)) // CCMP compare if cond is true, set nzcv if false #define CCMP_reg(sf, Rm, cond, Rn, nzcv) ((sf)<<31 | 1<<30 | 1<<29 | 0b11010010<<21 | (Rm)<<16 | (cond)<<12 | (Rn)<<5 | (nzcv)) diff --git a/src/dynarec/arm64/dynarec_arm64_66f0.c b/src/dynarec/arm64/dynarec_arm64_66f0.c index 7ee6e9a3..2ee22297 100644 --- a/src/dynarec/arm64/dynarec_arm64_66f0.c +++ b/src/dynarec/arm64/dynarec_arm64_66f0.c @@ -158,6 +158,8 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n MARK3; LDRH_U12(x1, wback, 0); LDAXRB(x3, wback); // dummy read, to arm the write... + SUBw_UXTB(x3, x3, x1); + CBNZw_MARK3(x3); CMPSw_REG(x6, x1); B_MARK(cNE); // EAX == Ed @@ -166,8 +168,8 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n STRH_U12(gd, wback, 0); } } - SMDMB(); MARK; + SMDMB(); // Common part (and fallback for EAX != Ed) UFLAG_IF {emit_cmp16(dyn, ninst, x6, x1, x3, x4, x5);} BFIx(xRAX, x1, 0, 16); @@ -295,30 +297,36 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } } if(arm64_atomics) { - LDADDALH(x5, x1, wback); UFLAG_IF { - emit_add16(dyn, ninst, x1, x5, x3, x4); + LDADDALH(x5, x1, wback); + } else { + STADDLH(x5, wback); } } else { MARKLOCK; LDAXRH(x1, wback); - emit_add16(dyn, ninst, x1, x5, x3, x4); - STLXRH(x3, x1, wback); + ADDw_REG(x4, x1, x5); + STLXRH(x3, x4, wback); CBNZx_MARKLOCK(x3); } SMDMB(); if(!ALIGNED_ATOMICH) { - B_NEXT_nocond; + B_MARK2_nocond; MARK; // unaligned! also, not enough LDRH_U12(x1, wback, 0); LDAXRB(x4, wback); - BFIw(x1, x4, 0, 8); // re-inject - emit_add16(dyn, ninst, x1, x5, x3, x4); - STLXRB(x3, x1, wback); + SUBw_UXTB(x4, x4, x1); + CBNZw_MARK(x4); + ADDw_REG(x4, x1, x5); + STLXRB(x3, x4, wback); CBNZx_MARK(x3); - STRH_U12(x1, wback, 0); // put the whole value + STRH_U12(x4, wback, 0); // put the whole value SMDMB(); } + MARK2; + UFLAG_IF { + emit_add16(dyn, ninst, x1, x5, x3, x4); + } } break; case 1: //OR @@ -472,30 +480,34 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n NEGw_REG(x4, x5); UFLAG_IF { LDADDALH(x4, x1, wback); - emit_sub16(dyn, ninst, x1, x5, x3, x4); } else { STADDLH(x4, wback); } } else { MARKLOCK; LDAXRH(x1, wback); - emit_sub16(dyn, ninst, x1, x5, x3, x4); + SUBw_REG(x4, x1, x5); STLXRH(x3, x1, wback); CBNZx_MARKLOCK(x3); } SMDMB(); if(!ALIGNED_ATOMICH) { - B_NEXT_nocond; + B_MARK2_nocond; MARK; // unaligned! also, not enough LDRH_U12(x1, wback, 0); LDAXRB(x4, wback); - BFIw(x1, x4, 0, 8); // re-inject - emit_sub16(dyn, ninst, x1, x5, x3, x4); - STLXRB(x3, x1, wback); + SUBw_UXTB(x4, x4, x1); + CBNZw_MARK(x4); + SUBw_REG(x4, x1, x5); + STLXRB(x3, x4, wback); CBNZx_MARK(x3); - STRH_U12(x1, wback, 0); // put the whole value + STRH_U12(x4, wback, 0); // put the whole value SMDMB(); } + MARK2; + UFLAG_IF { + emit_sub16(dyn, ninst, x1, x5, x3, x4); + } } break; case 6: //XOR |