diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2025-02-12 15:17:01 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2025-02-12 15:17:01 +0100 |
| commit | f575a7846e24270dc5924bc8af67b5d03c28e231 (patch) | |
| tree | 977aa6d571d8ef8addef8bb8b6c3e3e7b0232e01 /src | |
| parent | c7ff0f658bc326c29dfbf79e9aac22250939e329 (diff) | |
| download | box64-f575a7846e24270dc5924bc8af67b5d03c28e231.tar.gz box64-f575a7846e24270dc5924bc8af67b5d03c28e231.zip | |
[ARM64_DYNAREC] Improved many LOCK prefixed opcodes, espcialy unaligned path
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/arm64_emitter.h | 3 | ||||
| -rw-r--r-- | src/dynarec/arm64/arm64_printer.c | 10 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_f0.c | 298 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.h | 4 |
4 files changed, 211 insertions, 104 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index ecda33c3..21060124 100644 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -260,6 +260,9 @@ int convert_bitmask(uint64_t bitmask); #define SBCSw_REG(Rd, Rn, Rm) FEMIT(ADDSUBC_gen(0, 1, 1, Rm, Rn, Rd)) #define SBCSxw_REG(Rd, Rn, Rm) FEMIT(ADDSUBC_gen(rex.w, 1, 1, Rm, Rn, Rd)) +#define SUB_ext(sf, op, S, Rm, option, imm3, Rn, Rd) ((sf)<<31 | (op)<<30 | (S)<<29 | 0b01011<<24 | 1<<21 | (Rm)<<16 | (option)<<13 | (imm3)<<10 | (Rn)<<5 | (Rd)) +#define SUBxw_UXTB(Rd, Rn, Rm) EMIT(SUB_ext(rex.w, 1, 0, Rm, 0b000, 0, Rn, Rd)) + // CCMP compare if cond is true, set nzcv if false #define CCMP_reg(sf, Rm, cond, Rn, nzcv) ((sf)<<31 | 1<<30 | 1<<29 | 0b11010010<<21 | (Rm)<<16 | (cond)<<12 | (Rn)<<5 | (nzcv)) #define CCMPw(Wn, Wm, nzcv, cond) FEMIT(CCMP_reg(0, Wm, cond, Wn, nzcv)) diff --git a/src/dynarec/arm64/arm64_printer.c b/src/dynarec/arm64/arm64_printer.c index 0d450d4a..b8fc2cc3 100644 --- a/src/dynarec/arm64/arm64_printer.c +++ b/src/dynarec/arm64/arm64_printer.c @@ -554,6 +554,16 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) snprintf(buff, sizeof(buff), "SUBS %s, %s, %s %s %d", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm], shifts[shift], imm); return buff; } + if(isMask(opcode, "f1001011001mmmmmoooiiinnnnnddddd", &a)) { + const char* shifts[2][8] = {{ "UXTB", "UXTH", "LSL", "UXTX", "SXTB", "SXTH", "SXTW", "SXTX"}, {"UXTB", "UXTH", "UXTW", "LSL", "SXTB", "SXTH", "SXTW", "SXTX" }}; + if(((sf==0 && shift==2) || (sf==1 && shift==3)) && imm==0) + snprintf(buff, sizeof(buff), "SUB %s, %s, %s", sf?XtSp[Rd]:WtSp[Rd], sf?XtSp[Rn]:WtSp[Rn], sf?XtSp[Rm]:WtSp[Rm]); + else if(shift==0) + snprintf(buff, sizeof(buff), "SUB %s, %s, %s %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm], shifts[sf][shift]); + else + snprintf(buff, sizeof(buff), "SUB %s, %s, %s %s %d", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm], shifts[sf][shift], imm); + return buff; + } // ---- LOGIC if(isMask(opcode, "f11100100Nrrrrrrssssssnnnnnddddd", &a)) { uint64_t i = DecodeBitMasks(a.N, imms, immr); diff --git a/src/dynarec/arm64/dynarec_arm64_f0.c b/src/dynarec/arm64/dynarec_arm64_f0.c index 19ecc0a4..6ec7fcbd 100644 --- a/src/dynarec/arm64/dynarec_arm64_f0.c +++ b/src/dynarec/arm64/dynarec_arm64_f0.c @@ -114,7 +114,6 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(arm64_atomics) { UFLAG_IF { LDADDALxw(gd, x1, wback); - emit_add32(dyn, ninst, rex, x1, gd, x3, x4); } else { STADDLxw(gd, wback); } @@ -122,23 +121,28 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { MARKLOCK; LDAXRxw(x1, wback); - emit_add32(dyn, ninst, rex, x1, gd, x3, x4); - STLXRxw(x3, x1, wback); + ADDxw_REG(x4, x1, gd); + STLXRxw(x3, x4, wback); CBNZx_MARKLOCK(x3); SMDMB(); } if(!ALIGNED_ATOMICxw) { - B_NEXT_nocond; + B_MARK2_nocond; MARK; // unaligned! also, not enough LDRxw_U12(x1, wback, 0); LDAXRB(x4, wback); - BFIxw(x1, x4, 0, 8); // re-inject - emit_add32(dyn, ninst, rex, x1, gd, x3, x4); - STLXRB(x3, x1, wback); + SUBxw_UXTB(x4, x4, x1); // substract with the byte only + CBNZw_MARK(x4); // jump if different + ADDxw_REG(x4, x1, gd); + STLXRB(x3, x4, wback); CBNZx_MARK(x3); - STRxw_U12(x1, wback, 0); // put the whole value + STRxw_U12(x4, wback, 0); // put the whole value SMDMB(); } + MARK2; + UFLAG_IF { + emit_add32(dyn, ninst, rex, x1, gd, x3, x4); + } } break; @@ -385,6 +389,8 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MARK3; LDRxw_U12(x1, wback, 0); LDAXRB(x3, wback); // dummy read, to arm the write... + SUBxw_UXTB(x3, x3, x1); + CBNZw_MARK3(x3); CMPSxw_REG(xRAX, x1); B_MARK(cNE); // EAX == Ed @@ -709,16 +715,9 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } if(arm64_atomics) { UFLAG_IF { - MOVxw_REG(x3, gd); - LDADDALxw(x3, gd, wback); - SMDMB(); - emit_add32(dyn, ninst, rex, x3, gd, x4, x5); + LDADDALxw(gd, x1, wback); } else { LDADDALxw(gd, gd, wback); - SMDMB(); - } - if(!ALIGNED_ATOMICxw) { - B_NEXT_nocond; } } else { MARKLOCK; @@ -726,28 +725,31 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin ADDxw_REG(x4, x1, gd); STLXRxw(x3, x4, wback); CBNZx_MARKLOCK(x3); - SMDMB(); - if(!ALIGNED_ATOMICxw) { - B_MARK2_nocond; - } } + SMDMB(); if(!ALIGNED_ATOMICxw) { + UFLAG_IF { + B_MARK2_nocond; + } else { + B_NEXT_nocond; + } MARK; LDRxw_U12(x1, wback, 0); LDAXRB(x4, wback); - BFIxw(x1, x4, 0, 8); + SUBxw_UXTB(x4, x4, x1); + CBNZw_MARK(x4); ADDxw_REG(x4, x1, gd); STLXRB(x3, x4, wback); CBNZx_MARK(x3); STRxw_U12(x4, wback, 0); SMDMB(); } - if(!ALIGNED_ATOMICxw || !arm64_atomics) { - MARK2; - IFX(X_ALL|X_PEND) { - MOVxw_REG(x2, x1); - emit_add32(dyn, ninst, rex, x2, gd, x3, x4); - } + MARK2; + UFLAG_IF { + MOVxw_REG(x3, x1); + emit_add32(dyn, ninst, rex, x3, gd, x4, x5); + MOVxw_REG(gd, x1); + } else if(!arm64_atomics || !ALIGNED_ATOMICxw) { MOVxw_REG(gd, x1); } } @@ -828,6 +830,8 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MARK2; LDPxw_S7_offset(x2, x3, wback, 0); LDAXRB(x5, wback); + SUBxw_UXTB(x5, x5, x2); + CBNZw_MARK2(x5); CMPSxw_REG(xRAX, x2); CCMPxw(xRDX, x3, 0, cEQ); B_MARKSEG(cNE); // EAX!=ED[0] || EDX!=Ed[1] @@ -989,34 +993,42 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin B_MARK(cNE); } } - if(arm64_atomics) { + if(arm64_atomics && 0) { // disabled because 0x80000000 has no negative NEGxw_REG(x1, gd); UFLAG_IF { LDADDALxw(x1, x1, wback); - emit_sub32(dyn, ninst, rex, x1, gd, x3, x4); } else { STADDLxw(x1, wback); } } else { MARKLOCK; LDAXRxw(x1, wback); - emit_sub32(dyn, ninst, rex, x1, gd, x3, x4); - STLXRxw(x3, x1, wback); + SUBxw_REG(x4, x1, gd); + STLXRxw(x3, x4, wback); CBNZx_MARKLOCK(x3); - SMDMB(); } + SMDMB(); if(!ALIGNED_ATOMICxw) { - B_NEXT_nocond; + UFLAG_IF { + B_MARK2_nocond; + } else { + B_NEXT_nocond; + } MARK; // unaligned! also, not enough LDRxw_U12(x1, wback, 0); LDAXRB(x4, wback); - BFIxw(x1, x4, 0, 8); // re-inject - emit_sub32(dyn, ninst, rex, x1, gd, x3, x4); - STLXRB(x3, x1, wback); + SUBxw_UXTB(x4, x4, x1); + CBNZw_MARK(x4); + SUBxw_REG(x4, x1, gd); + STLXRB(x3, x4, wback); CBNZx_MARK(x3); - STRxw_U12(x1, wback, 0); // put the whole value + STRxw_U12(x4, wback, 0); // put the whole value SMDMB(); } + UFLAG_IF { + MARK2; + emit_sub32(dyn, ninst, rex, x1, gd, x3, x4); + } } break; @@ -1043,31 +1055,34 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(arm64_atomics) { UFLAG_IF { LDEORALxw(gd, x1, wback); - emit_xor32(dyn, ninst, rex, x1, gd, x3, x4); } else { STEORLxw(gd, wback); } - SMDMB(); } else { MARKLOCK; LDAXRxw(x1, wback); - emit_xor32(dyn, ninst, rex, x1, gd, x3, x4); - STLXRxw(x3, x1, wback); + EORxw_REG(x4, x1, gd); + STLXRxw(x3, x4, wback); CBNZx_MARKLOCK(x3); - SMDMB(); } + SMDMB(); if(!ALIGNED_ATOMICxw) { - B_NEXT_nocond; + B_MARK2_nocond; MARK; // unaligned! also, not enough LDRxw_U12(x1, wback, 0); LDAXRB(x4, wback); - BFIxw(x1, x4, 0, 8); // re-inject - emit_xor32(dyn, ninst, rex, x1, gd, x3, x4); - STLXRB(x3, x1, wback); + SUBxw_UXTB(x4, x4, x1); + CBNZw_MARK(x4); + EORxw_REG(x4, x1, gd); + STLXRB(x3, x4, wback); CBNZx_MARK(x3); - STRxw_U12(x1, wback, 0); // put the whole value + STRxw_U12(x4, wback, 0); // put the whole value SMDMB(); } + MARK2; + UFLAG_IF { + emit_xor32(dyn, ninst, rex, x1, gd, x3, x4); + } } break; @@ -1312,6 +1327,9 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1); if(opcode==0x81) i64 = F32S; else i64 = F8S; + if((i64<=-0x1000) || (i64>=0x1000)) { + MOV64xw(x5, i64); + } if(!ALIGNED_ATOMICxw) { if(arm64_uscat) { ANDx_mask(x1, wback, 1, 0, 3); // mask = F @@ -1323,35 +1341,56 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } } if(arm64_atomics) { - MOV64xw(x3, i64); + if((i64>-0x1000) && (i64<0x1000)) { + MOV64xw(x5, i64); + } UFLAG_IF { - LDADDALxw(x3, x1, wback); - SMDMB(); - emit_add32(dyn, ninst, rex, x1, x3, x4, x5); + LDADDALxw(x5, x1, wback); } else { - STADDLxw(x3, wback); - SMDMB(); + STADDLxw(x5, wback); } + SMDMB(); } else { MARKLOCK; LDAXRxw(x1, wback); - emit_add32c(dyn, ninst, rex, x1, i64, x3, x4, x5); - STLXRxw(x3, x1, wback); + if(i64>=0 && i64<0x1000) { + ADDxw_U12(x4, x1, i64); + } else if(i64<0 && i64>-0x1000) { + SUBxw_U12(x4, x1, -i64); + } else { + ADDxw_REG(x4, x1, x5); + } + STLXRxw(x3, x4, wback); CBNZx_MARKLOCK(x3); SMDMB(); } if(!ALIGNED_ATOMICxw) { - B_NEXT_nocond; + B_MARK2_nocond; MARK; // unaligned! also, not enough LDRxw_U12(x1, wback, 0); LDAXRB(x4, wback); - BFIxw(x1, x4, 0, 8); // re-inject - emit_add32c(dyn, ninst, rex, x1, i64, x3, x4, x5); - STLXRB(x3, x1, wback); + SUBxw_UXTB(x4, x4, x1); + CBNZw_MARK(x4); + if(i64>=0 && i64<0x1000) { + ADDxw_U12(x4, x1, i64); + } else if(i64<0 && i64>-0x1000) { + SUBxw_U12(x4, x1, -i64); + } else { + ADDxw_REG(x4, x1, x5); + } + STLXRB(x3, x4, wback); CBNZx_MARK(x3); - STRxw_U12(x1, wback, 0); // put the whole value + STRxw_U12(x4, wback, 0); // put the whole value SMDMB(); } + MARK2; + UFLAG_IF { + if((i64<=-0x1000) || (i64>=0x1000)) { + emit_add32(dyn, ninst, rex, x1, x5, x3, x4); + } else { + emit_add32c(dyn, ninst, rex, x1, i64, x3, x4, x5); + } + } } break; case 1: //OR @@ -1360,13 +1399,12 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(MODREG) { if(opcode==0x81) i64 = F32S; else i64 = F8S; ed = TO_NAT((nextop & 7) + (rex.b << 3)); - MOV64xw(x5, i64); - emit_or32(dyn, ninst, rex, ed, x5, x3, x4); + emit_or32c(dyn, ninst, rex, ed, i64, x3, x4); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1); if(opcode==0x81) i64 = F32S; else i64 = F8S; - MOV64xw(x5, i64); if(arm64_atomics) { + MOV64xw(x5, i64); UFLAG_IF { LDSETALxw(x5, x1, wback); emit_or32(dyn, ninst, rex, x1, x5, x3, x4); @@ -1376,7 +1414,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { MARKLOCK; LDAXRxw(x1, wback); - emit_or32(dyn, ninst, rex, x1, x5, x3, x4); + emit_or32c(dyn, ninst, rex, x1, i64, x3, x4); STLXRxw(x3, x1, wback); CBNZx_MARKLOCK(x3); } @@ -1446,10 +1484,9 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin STCLRLxw(x5, wback); } } else { - MOV64xw(x5, i64); MARKLOCK; LDAXRxw(x1, wback); - emit_and32(dyn, ninst, rex, x1, x5, x3, x4); + emit_and32c(dyn, ninst, rex, x1, i64, x3, x4); STLXRxw(x3, x1, wback); CBNZx_MARKLOCK(x3); } @@ -1462,11 +1499,13 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(MODREG) { if(opcode==0x81) i64 = F32S; else i64 = F8S; ed = TO_NAT((nextop & 7) + (rex.b << 3)); - MOV64xw(x5, i64); - emit_sub32(dyn, ninst, rex, ed, x5, x3, x4); + emit_sub32c(dyn, ninst, rex, ed, i64, x3, x4, x5); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1); if(opcode==0x81) i64 = F32S; else i64 = F8S; + if((i64<=-0x1000) || (i64>=0x1000)) { + MOV64xw(x5, i64); + } if(!ALIGNED_ATOMICxw) { if(arm64_uscat) { ANDx_mask(x1, wback, 1, 0, 3); // mask = F @@ -1478,36 +1517,64 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } } if(arm64_atomics) { - MOV64xw(x5, -i64); + if((i64>-0x1000) && (i64<0x1000)) { + MOV64xw(x5, -i64); + } else { + NEGxw_REG(x5, x5); + } UFLAG_IF { LDADDALxw(x5, x1, wback); - SMDMB(); - NEGxw_REG(x5, x5); - emit_sub32(dyn, ninst, rex, x1, x5, x3, x4); + if((i64<=-0x1000) || (i64>=0x1000)) + NEGxw_REG(x5, x5); } else { STADDLxw(x5, wback); - SMDMB(); } + SMDMB(); } else { MARKLOCK; LDAXRxw(x1, wback); - emit_sub32c(dyn, ninst, rex, x1, i64, x3, x4, x5); - STLXRxw(x3, x1, wback); + if(i64>=0 && i64<0x1000) { + SUBxw_U12(x4, x1, i64); + } else if(i64<0 && i64>-0x1000) { + ADDxw_U12(x4, x1, -i64); + } else { + SUBxw_REG(x4, x1, x5); + } + STLXRxw(x3, x4, wback); CBNZx_MARKLOCK(x3); SMDMB(); } if(!ALIGNED_ATOMICxw) { - B_NEXT_nocond; + UFLAG_IF { + B_MARK2_nocond; + } else { + B_NEXT_nocond; + } MARK; // unaligned! also, not enough LDRxw_U12(x1, wback, 0); LDAXRB(x4, wback); - BFIxw(x1, x4, 0, 8); // re-inject - emit_sub32c(dyn, ninst, rex, x1, i64, x3, x4, x5); - STLXRB(x3, x1, wback); + SUBxw_UXTB(x4, x4, x1); + CBNZw_MARK(x4); + if(i64>=0 && i64<0x1000) { + SUBxw_U12(x4, x1, i64); + } else if(i64<0 && i64>-0x1000) { + ADDxw_U12(x4, x1, -i64); + } else { + SUBxw_REG(x4, x1, x5); + } + STLXRB(x3, x4, wback); CBNZx_MARK(x3); - STRxw_U12(x1, wback, 0); // put the whole value + STRxw_U12(x4, wback, 0); // put the whole value SMDMB(); } + UFLAG_IF { + MARK2; + if((i64<=-0x1000) || (i64>=0x1000)) { + emit_sub32(dyn, ninst, rex, x1, x5, x3, x4); + } else { + emit_sub32c(dyn, ninst, rex, x1, i64, x3, x4, x5); + } + } } break; case 6: //XOR @@ -1521,8 +1588,8 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1); if(opcode==0x81) i64 = F32S; else i64 = F8S; - MOV64xw(x5, i64); if(arm64_atomics) { + MOV64xw(x5, i64); UFLAG_IF { LDEORALxw(x5, x1, wback); emit_xor32(dyn, ninst, rex, x1, x5, x3, x4); @@ -1532,7 +1599,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { MARKLOCK; LDAXRxw(x1, wback); - emit_xor32(dyn, ninst, rex, x1, x5, x3, x4); + emit_xor32c(dyn, ninst, rex, x1, i64, x3, x4); STLXRxw(x3, x1, wback); CBNZx_MARKLOCK(x3); } @@ -1630,7 +1697,9 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(!ALIGNED_ATOMICxw) { MARK; LDRxw_U12(x1, ed, 0); - LDAXRB(x3, ed); + LDAXRB(x4, ed); + SUBxw_UXTB(x4, x4, x1); + CBNZw_MARK(x4); STLXRB(x3, gd, ed); CBNZx_MARK(x3); STRxw_U12(gd, ed, 0); @@ -1663,11 +1732,16 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin EBBACK; } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - MARKLOCK; - LDAXRB(x1, wback); - MVNw_REG(x1, x1); - STLXRB(x3, x1, wback); - CBNZx_MARKLOCK(x3); + if(arm64_atomics) { + MOV32w(x1, 0xff); + STEORLB(x1, wback); + } else { + MARKLOCK; + LDAXRB(x1, wback); + MVNw_REG(x1, x1); + STLXRB(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } SMDMB(); } break; @@ -1764,30 +1838,38 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOV32w(x3, 1); UFLAG_IF { LDADDALxw(x3, x1, wback); - emit_inc32(dyn, ninst, rex, x1, x3, x4); } else { STADDLxw(x3, wback); } } else { MARKLOCK; LDAXRxw(x1, wback); - emit_inc32(dyn, ninst, rex, x1, x3, x4); - STLXRxw(x3, x1, wback); + ADDxw_U12(x4, x1, 1); + STLXRxw(x3, x4, wback); CBNZx_MARKLOCK(x3); } SMDMB(); if(!ALIGNED_ATOMICxw) { - B_NEXT_nocond; + UFLAG_IF { + B_MARK2_nocond; + } else { + B_NEXT_nocond; + } MARK; LDRxw_U12(x1, wback, 0); LDAXRB(x4, wback); - BFIxw(x1, x4, 0, 8); // re-inject - emit_inc32(dyn, ninst, rex, x1, x3, x4); - STLXRB(x3, x1, wback); + SUBxw_UXTB(x4, x4, x1); + CBNZw_MARK(x4); + ADDxw_U12(x4, x1, 1); + STLXRB(x3, x4, wback); CBNZw_MARK(x3); - STRxw_U12(x1, wback, 0); + STRxw_U12(x4, wback, 0); SMDMB(); } + UFLAG_IF { + MARK2; + emit_inc32(dyn, ninst, rex, x1, x3, x4); + } } break; case 1: //DEC Ed @@ -1812,30 +1894,38 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOV64xw(x3, -1); UFLAG_IF { LDADDALxw(x3, x1, wback); - emit_dec32(dyn, ninst, rex, x1, x3, x4); } else { STADDLxw(x3, wback); } } else { MARKLOCK; LDAXRxw(x1, wback); - emit_dec32(dyn, ninst, rex, x1, x3, x4); - STLXRxw(x3, x1, wback); + SUBxw_U12(x4, x1, 1); + STLXRxw(x3, x4, wback); CBNZx_MARKLOCK(x3); } SMDMB(); if(!ALIGNED_ATOMICxw) { - B_NEXT_nocond; + UFLAG_IF { + B_MARK2_nocond; + } else { + B_NEXT_nocond; + } MARK; LDRxw_U12(x1, wback, 0); LDAXRB(x4, wback); - BFIxw(x1, x4, 0, 8); // re-inject - emit_dec32(dyn, ninst, rex, x1, x3, x4); - STLXRB(x3, x1, wback); + SUBxw_UXTB(x4, x4, x1); + CBNZw_MARK(x4); + SUBxw_U12(x4, x1, 1); + STLXRB(x3, x4, wback); CBNZw_MARK(x3); - STRxw_U12(x1, wback, 0); + STRxw_U12(x4, wback, 0); SMDMB(); } + UFLAG_IF { + MARK2; + emit_dec32(dyn, ninst, rex, x1, x3, x4); + } } break; default: diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index fcaa38d7..19f432e0 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -796,6 +796,10 @@ #define CBNZx_MARK2(reg) \ j64 = GETMARK2-(dyn->native_size); \ CBNZx(reg, j64) +// Branch to MARK2 if reg is not 0 (use j64) +#define CBNZw_MARK2(reg) \ + j64 = GETMARK2-(dyn->native_size); \ + CBNZw(reg, j64) #define CBNZxw_MARK2(reg) \ j64 = GETMARK2-(dyn->native_size); \ CBNZxw(reg, j64) |