diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2023-10-17 18:25:16 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2023-10-17 18:25:16 +0200 |
| commit | 6ef2b22a3ce4aaf5df5069dab226c1eeab806b19 (patch) | |
| tree | f55bcee33bd10845d34de282f23cfeb1f7bb1507 /src | |
| parent | da19b2008a8e4afc75ec46a43453dcc93256d48e (diff) | |
| download | box64-6ef2b22a3ce4aaf5df5069dab226c1eeab806b19.tar.gz box64-6ef2b22a3ce4aaf5df5069dab226c1eeab806b19.zip | |
[ARM64_DYNAREC] Added support for BOX4_DYNAREC_ALIGNED_ATOMICS
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_00.c | 37 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_66f0.c | 87 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_f0.c | 262 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.c | 12 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.h | 3 | ||||
| -rw-r--r-- | src/include/debug.h | 1 | ||||
| -rw-r--r-- | src/main.c | 10 | ||||
| -rw-r--r-- | src/tools/rcfile.c | 2 |
8 files changed, 250 insertions, 164 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c index f2c3cd60..49f0a757 100644 --- a/src/dynarec/arm64/dynarec_arm64_00.c +++ b/src/dynarec/arm64/dynarec_arm64_00.c @@ -1079,28 +1079,37 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { GETGD; addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - TSTx_mask(ed, 1, 0, 1+rex.w); // mask=3 or 7 - B_MARK(cNE); + if(!ALIGNED_ATOMICxw) { + TSTx_mask(ed, 1, 0, 1+rex.w); // mask=3 or 7 + B_MARK(cNE); + } if(arm64_atomics) { SWPALxw(gd, gd, ed); - B_NEXT_nocond; + if(!ALIGNED_ATOMICxw) { + B_NEXT_nocond; + } } else { MARKLOCK; LDAXRxw(x1, ed); STLXRxw(x3, gd, ed); CBNZx_MARKLOCK(x3); - B_MARK2_nocond; + if(!ALIGNED_ATOMICxw) { + B_MARK2_nocond; + } + } + if(!ALIGNED_ATOMICxw) { + MARK; + LDRxw_U12(x1, ed, 0); + LDAXRB(x3, ed); + STLXRB(x3, gd, ed); + CBNZx_MARK(x3); + STRxw_U12(gd, ed, 0); + SMDMB(); + MARK2; + } + if(!ALIGNED_ATOMICxw || !arm64_atomics) { + MOVxw_REG(gd, x1); } - MARK; - SMDMB(); - LDRxw_U12(x1, ed, 0); - LDAXRB(x3, ed); - STLXRB(x3, gd, ed); - CBNZx_MARK(x3); - STRxw_U12(gd, ed, 0); - SMDMB(); - MARK2; - MOVxw_REG(gd, x1); } break; case 0x88: diff --git a/src/dynarec/arm64/dynarec_arm64_66f0.c b/src/dynarec/arm64/dynarec_arm64_66f0.c index df60a75f..3e8fb480 100644 --- a/src/dynarec/arm64/dynarec_arm64_66f0.c +++ b/src/dynarec/arm64/dynarec_arm64_66f0.c @@ -97,8 +97,10 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n BFIx(ed, gd, 0, 16); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - TSTx_mask(wback, 1, 0, 0); // mask=1 - B_MARK3(cNE); + if(!ALIGNED_ATOMICH) { + TSTx_mask(wback, 1, 0, 0); // mask=1 + B_MARK3(cNE); + } // Aligned version if(arm64_atomics) { MOVw_REG(x1, x6); @@ -113,18 +115,20 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n CBNZx_MARKLOCK(x4); // done } - B_MARK_nocond; - // Unaligned version - MARK3; - LDRH_U12(x1, wback, 0); - LDAXRB(x3, wback); // dummy read, to arm the write... - CMPSw_REG(x6, x1); - B_MARK(cNE); - // EAX == Ed - STLXRB(x4, gd, wback); - CBNZx_MARK3(x4); - STRH_U12(gd, wback, 0); - SMDMB(); + if(!ALIGNED_ATOMICH) { + B_MARK_nocond; + // Unaligned version + MARK3; + LDRH_U12(x1, wback, 0); + LDAXRB(x3, wback); // dummy read, to arm the write... + CMPSw_REG(x6, x1); + B_MARK(cNE); + // EAX == Ed + STLXRB(x4, gd, wback); + CBNZx_MARK3(x4); + STRH_U12(gd, wback, 0); + SMDMB(); + } } MARK; // Common part (and fallback for EAX != Ed) @@ -213,8 +217,10 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?2:1); if(opcode==0x81) i32 = F16S; else i32 = F8S; MOV32w(x5, i32); - TSTx_mask(wback, 1, 0, 0); // mask=1 - B_MARK(cNE); + if(!ALIGNED_ATOMICH) { + TSTx_mask(wback, 1, 0, 0); // mask=1 + B_MARK(cNE); + } if(arm64_atomics) { LDADDALH(x5, x1, wback); UFLAG_IF { @@ -227,15 +233,18 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n STLXRH(x3, x1, wback); CBNZx_MARKLOCK(x3); } - B_NEXT_nocond; - MARK; // unaligned! also, not enough - LDRH_U12(x1, wback, 0); - LDAXRB(x4, wback); - BFIw(x1, x4, 0, 8); // re-inject - emit_add16(dyn, ninst, x1, x5, x3, x4); - STLXRB(x3, x1, wback); - CBNZx_MARK(x3); - STRH_U12(x1, wback, 0); // put the whole value + if(!ALIGNED_ATOMICH) { + B_NEXT_nocond; + MARK; // unaligned! also, not enough + LDRH_U12(x1, wback, 0); + LDAXRB(x4, wback); + BFIw(x1, x4, 0, 8); // re-inject + emit_add16(dyn, ninst, x1, x5, x3, x4); + STLXRB(x3, x1, wback); + CBNZx_MARK(x3); + STRH_U12(x1, wback, 0); // put the whole value + SMDMB(); + } } break; case 1: //OR @@ -358,8 +367,10 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?2:1); if(opcode==0x81) i32 = F16S; else i32 = F8S; MOV32w(x5, i32); - TSTx_mask(wback, 1, 0, 0); // mask=1 - B_MARK(cNE); + if(!ALIGNED_ATOMICH) { + TSTx_mask(wback, 1, 0, 0); // mask=1 + B_MARK(cNE); + } if(arm64_atomics) { NEGw_REG(x4, x5); UFLAG_IF { @@ -375,15 +386,18 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n STLXRH(x3, x1, wback); CBNZx_MARKLOCK(x3); } - B_NEXT_nocond; - MARK; // unaligned! also, not enough - LDRH_U12(x1, wback, 0); - LDAXRB(x4, wback); - BFIw(x1, x4, 0, 8); // re-inject - emit_sub16(dyn, ninst, x1, x5, x3, x4); - STLXRB(x3, x1, wback); - CBNZx_MARK(x3); - STRH_U12(x1, wback, 0); // put the whole value + if(!ALIGNED_ATOMICH) { + B_NEXT_nocond; + MARK; // unaligned! also, not enough + LDRH_U12(x1, wback, 0); + LDAXRB(x4, wback); + BFIw(x1, x4, 0, 8); // re-inject + emit_sub16(dyn, ninst, x1, x5, x3, x4); + STLXRB(x3, x1, wback); + CBNZx_MARK(x3); + STRH_U12(x1, wback, 0); // put the whole value + SMDMB(); + } } break; case 6: //XOR @@ -431,7 +445,6 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } break; } - SMDMB(); break; case 0xFF: diff --git a/src/dynarec/arm64/dynarec_arm64_f0.c b/src/dynarec/arm64/dynarec_arm64_f0.c index b5e9fe3c..e5a8d097 100644 --- a/src/dynarec/arm64/dynarec_arm64_f0.c +++ b/src/dynarec/arm64/dynarec_arm64_f0.c @@ -234,7 +234,6 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin nextop = F8; GETGB(x1); UBFXx(x6, xRAX, 0, 8); - SMDMB(); if(MODREG) { if(rex.rex) { wback = xRAX+(nextop&7)+(rex.b<<3); @@ -258,8 +257,14 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); if(arm64_atomics) { - MOVw_REG(x2, x6); - CASALB(x2, gd, wback); + UFLAG_IF { + MOVw_REG(x2, x6); + CASALB(x6, gd, wback); + emit_cmp8(dyn, ninst, x2, x6, x3, x4, x5); + } else { + CASALB(x6, gd, wback); + } + BFIx(xRAX, x6, 0, 8); } else { MARKLOCK; LDAXRB(x2, wback); @@ -270,11 +275,10 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin CBNZx_MARKLOCK(x4); // done MARK; + UFLAG_IF {emit_cmp8(dyn, ninst, x6, x2, x3, x4, x5);} + BFIx(xRAX, x2, 0, 8); } - UFLAG_IF {emit_cmp8(dyn, ninst, x6, x2, x3, x4, x5);} - BFIx(xRAX, x2, 0, 8); } - SMDMB(); break; default: DEFAULT; @@ -300,8 +304,10 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin B_NEXT_nocond; } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 - B_MARK3(cNE); + if(!ALIGNED_ATOMICxw) { + TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 + B_MARK3(cNE); + } // Aligned version if(arm64_atomics) { UFLAG_IF { @@ -312,7 +318,9 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { CASALxw(xRAX, gd, wback); } - B_NEXT_nocond; + if(!ALIGNED_ATOMICxw) { + B_NEXT_nocond; + } } else { MARKLOCK; LDAXRxw(x1, wback); @@ -322,23 +330,29 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin STLXRxw(x4, gd, wback); CBNZx_MARKLOCK(x4); // done - B_MARK_nocond; + if(!ALIGNED_ATOMICxw) { + B_MARK_nocond; + } + } + if(!ALIGNED_ATOMICxw) { + // Unaligned version + MARK3; + LDRxw_U12(x1, wback, 0); + LDAXRB(x3, wback); // dummy read, to arm the write... + CMPSxw_REG(xRAX, x1); + B_MARK(cNE); + // EAX == Ed + STLXRB(x4, gd, wback); + CBNZx_MARK3(x4); + STRxw_U12(gd, wback, 0); + SMDMB(); + } + if(!ALIGNED_ATOMICxw || !arm64_atomics) { + MARK; + // Common part (and fallback for EAX != Ed) + UFLAG_IF {emit_cmp32(dyn, ninst, rex, xRAX, x1, x3, x4, x5);} + MOVxw_REG(xRAX, x1); // upper par of RAX will be erase on 32bits, no mater what } - // Unaligned version - MARK3; - LDRxw_U12(x1, wback, 0); - LDAXRB(x3, wback); // dummy read, to arm the write... - CMPSxw_REG(xRAX, x1); - B_MARK(cNE); - // EAX == Ed - STLXRB(x4, gd, wback); - CBNZx_MARK3(x4); - STRxw_U12(gd, wback, 0); - SMDMB(); - MARK; - // Common part (and fallback for EAX != Ed) - UFLAG_IF {emit_cmp32(dyn, ninst, rex, xRAX, x1, x3, x4, x5);} - MOVxw_REG(xRAX, x1); // upper par of RAX will be erase on 32bits, no mater what } break; default: @@ -413,8 +427,10 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin emit_add32(dyn, ninst, rex, ed, gd, x3, x4); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 - B_MARK(cNE); // unaligned + if(!ALIGNED_ATOMICxw) { + TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 + B_MARK(cNE); // unaligned + } if(arm64_atomics) { UFLAG_IF { MOVxw_REG(x3, gd); @@ -423,30 +439,38 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { LDADDALxw(gd, gd, wback); } - B_NEXT_nocond; + if(!ALIGNED_ATOMICxw) { + B_NEXT_nocond; + } } else { MARKLOCK; LDAXRxw(x1, wback); ADDxw_REG(x4, x1, gd); STLXRxw(x3, x4, wback); CBNZx_MARKLOCK(x3); - B_MARK2_nocond; + if(!ALIGNED_ATOMICxw) { + B_MARK2_nocond; + } } - MARK; - LDRxw_U12(x1, wback, 0); - LDAXRB(x4, wback); - BFIxw(x1, x4, 0, 8); - ADDxw_REG(x4, x1, gd); - STLXRB(x3, x4, wback); - CBNZx_MARK(x3); - STRxw_U12(x4, wback, 0); - SMDMB(); - MARK2; - IFX(X_ALL|X_PEND) { - MOVxw_REG(x2, x1); - emit_add32(dyn, ninst, rex, x2, gd, x3, x4); + if(!ALIGNED_ATOMICxw) { + MARK; + LDRxw_U12(x1, wback, 0); + LDAXRB(x4, wback); + BFIxw(x1, x4, 0, 8); + ADDxw_REG(x4, x1, gd); + STLXRB(x3, x4, wback); + CBNZx_MARK(x3); + STRxw_U12(x4, wback, 0); + SMDMB(); + } + if(!ALIGNED_ATOMICxw || !arm64_atomics) { + MARK2; + IFX(X_ALL|X_PEND) { + MOVxw_REG(x2, x1); + emit_add32(dyn, ninst, rex, x2, gd, x3, x4); + } + MOVxw_REG(gd, x1); } - MOVxw_REG(gd, x1); } break; default: @@ -852,8 +876,10 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1); if(opcode==0x81) i64 = F32S; else i64 = F8S; - TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 - B_MARK(cNE); + if(!ALIGNED_ATOMICxw) { + TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 + B_MARK(cNE); + } if(arm64_atomics) { MOV64xw(x3, i64); UFLAG_IF { @@ -869,16 +895,18 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin STLXRxw(x3, x1, wback); CBNZx_MARKLOCK(x3); } - B_NEXT_nocond; - MARK; // unaligned! also, not enough - LDRxw_U12(x1, wback, 0); - LDAXRB(x4, wback); - BFIxw(x1, x4, 0, 8); // re-inject - emit_add32c(dyn, ninst, rex, x1, i64, x3, x4, x5); - STLXRB(x3, x1, wback); - CBNZx_MARK(x3); - STRxw_U12(x1, wback, 0); // put the whole value - SMDMB(); + if(!ALIGNED_ATOMICxw) { + B_NEXT_nocond; + MARK; // unaligned! also, not enough + LDRxw_U12(x1, wback, 0); + LDAXRB(x4, wback); + BFIxw(x1, x4, 0, 8); // re-inject + emit_add32c(dyn, ninst, rex, x1, i64, x3, x4, x5); + STLXRB(x3, x1, wback); + CBNZx_MARK(x3); + STRxw_U12(x1, wback, 0); // put the whole value + SMDMB(); + } } break; case 1: //OR @@ -990,8 +1018,10 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1); if(opcode==0x81) i64 = F32S; else i64 = F8S; - TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 - B_MARK(cNE); + if(!ALIGNED_ATOMICxw) { + TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 + B_MARK(cNE); + } if(arm64_atomics) { MOV64xw(x5, -i64); UFLAG_IF { @@ -1008,16 +1038,18 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin STLXRxw(x3, x1, wback); CBNZx_MARKLOCK(x3); } - B_NEXT_nocond; - MARK; // unaligned! also, not enough - LDRxw_U12(x1, wback, 0); - LDAXRB(x4, wback); - BFIxw(x1, x4, 0, 8); // re-inject - emit_sub32c(dyn, ninst, rex, x1, i64, x3, x4, x5); - STLXRB(x3, x1, wback); - CBNZx_MARK(x3); - STRxw_U12(x1, wback, 0); // put the whole value - SMDMB(); + if(!ALIGNED_ATOMICxw) { + B_NEXT_nocond; + MARK; // unaligned! also, not enough + LDRxw_U12(x1, wback, 0); + LDAXRB(x4, wback); + BFIxw(x1, x4, 0, 8); // re-inject + emit_sub32c(dyn, ninst, rex, x1, i64, x3, x4, x5); + STLXRB(x3, x1, wback); + CBNZx_MARK(x3); + STRxw_U12(x1, wback, 0); // put the whole value + SMDMB(); + } } break; case 6: //XOR @@ -1084,7 +1116,6 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin BFIx(gb1, x1, gb2, 8); BFIx(eb1, x4, eb2, 8); } else { - SMDMB(); GETGB(x4); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); if(arm64_atomics) { @@ -1096,7 +1127,6 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin // do the swap 14 -> strb(ed), 1 -> gd STLXRB(x3, x4, ed); CBNZx_MARKLOCK(x3); - SMDMB(); } BFIx(gb1, x1, gb2, 8); } @@ -1114,27 +1144,37 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin GETGD; SMDMB(); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - TSTx_mask(ed, 1, 0, 1+rex.w); // mask=3 or 7 - B_MARK(cNE); + if(!ALIGNED_ATOMICxw) { + TSTx_mask(ed, 1, 0, 1+rex.w); // mask=3 or 7 + B_MARK(cNE); + } if(arm64_atomics) { SWPALxw(gd, gd, ed); - B_NEXT_nocond; + if(!ALIGNED_ATOMICxw) { + B_NEXT_nocond; + } } else { MARKLOCK; LDAXRxw(x1, ed); STLXRxw(x3, gd, ed); CBNZx_MARKLOCK(x3); - B_MARK2_nocond; + if(!ALIGNED_ATOMICxw) { + B_MARK2_nocond; + } + } + if(!ALIGNED_ATOMICxw) { + MARK; + LDRxw_U12(x1, ed, 0); + LDAXRB(x3, ed); + STLXRB(x3, gd, ed); + CBNZx_MARK(x3); + STRxw_U12(gd, ed, 0); + SMDMB(); + MARK2; + } + if(!ALIGNED_ATOMICxw || !arm64_atomics) { + MOVxw_REG(gd, x1); } - MARK; - LDRxw_U12(x1, ed, 0); - LDAXRB(x3, ed); - STLXRB(x3, gd, ed); - CBNZx_MARK(x3); - STRxw_U12(gd, ed, 0); - MARK2; - SMDMB(); - MOVxw_REG(gd, x1); } break; @@ -1248,8 +1288,10 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin emit_inc32(dyn, ninst, rex, ed, x3, x4); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 - B_MARK(cNE); // unaligned + if(!ALIGNED_ATOMICxw) { + TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 + B_MARK(cNE); // unaligned + } if(arm64_atomics) { MOV32w(x3, 1); UFLAG_IF { @@ -1265,16 +1307,18 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin STLXRxw(x3, x1, wback); CBNZx_MARKLOCK(x3); } - B_NEXT_nocond; - MARK; - LDRxw_U12(x1, wback, 0); - LDAXRB(x4, wback); - BFIxw(x1, x4, 0, 8); // re-inject - emit_inc32(dyn, ninst, rex, x1, x3, x4); - STLXRB(x3, x1, wback); - CBNZw_MARK(x3); - STRxw_U12(x1, wback, 0); - SMDMB(); + if(!ALIGNED_ATOMICxw) { + B_NEXT_nocond; + MARK; + LDRxw_U12(x1, wback, 0); + LDAXRB(x4, wback); + BFIxw(x1, x4, 0, 8); // re-inject + emit_inc32(dyn, ninst, rex, x1, x3, x4); + STLXRB(x3, x1, wback); + CBNZw_MARK(x3); + STRxw_U12(x1, wback, 0); + SMDMB(); + } } break; case 1: //DEC Ed @@ -1286,9 +1330,10 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin emit_dec32(dyn, ninst, rex, ed, x3, x4); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 - B_MARK(cNE); // unaligned - MARKLOCK; + if(!ALIGNED_ATOMICxw) { + TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 + B_MARK(cNE); // unaligned + } if(arm64_atomics) { MOV64xw(x3, -1); UFLAG_IF { @@ -1298,21 +1343,24 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin STADDLxw(x3, wback); } } else { + MARKLOCK; LDAXRxw(x1, wback); emit_dec32(dyn, ninst, rex, x1, x3, x4); STLXRxw(x3, x1, wback); CBNZx_MARKLOCK(x3); } - B_NEXT_nocond; - MARK; - LDRxw_U12(x1, wback, 0); - LDAXRB(x4, wback); - BFIxw(x1, x4, 0, 8); // re-inject - emit_dec32(dyn, ninst, rex, x1, x3, x4); - STLXRB(x3, x1, wback); - CBNZw_MARK(x3); - STRxw_U12(x1, wback, 0); - SMDMB(); + if(!ALIGNED_ATOMICxw) { + B_NEXT_nocond; + MARK; + LDRxw_U12(x1, wback, 0); + LDAXRB(x4, wback); + BFIxw(x1, x4, 0, 8); // re-inject + emit_dec32(dyn, ninst, rex, x1, x3, x4); + STLXRB(x3, x1, wback); + CBNZw_MARK(x3); + STRxw_U12(x1, wback, 0); + SMDMB(); + } } break; default: diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c index 2b0a28e0..59f6c4f4 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.c +++ b/src/dynarec/arm64/dynarec_arm64_helper.c @@ -69,7 +69,7 @@ uintptr_t geted(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, u } } else { switch(lock) { - case 1: addLockAddress(tmp); break; + case 1: addLockAddress(tmp); if(fixaddress) *fixaddress=tmp; break; case 2: if(isLockAddress(tmp)) *l=1; break; } MOV64x(ret, tmp); @@ -106,7 +106,7 @@ uintptr_t geted(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, u ADDx_REG(ret, ret, xRIP); } switch(lock) { - case 1: addLockAddress(addr+delta+tmp); break; + case 1: addLockAddress(addr+delta+tmp); if(fixaddress) *fixaddress=addr+delta+tmp; break; case 2: if(isLockAddress(addr+delta+tmp)) *l=1; break; } } else { @@ -126,7 +126,7 @@ uintptr_t geted(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, u i64 = F8S; if(i64==0 || ((i64>=absmin) && (i64<=absmax) && !(i64&mask)) || (unscaled && (i64>-256) && (i64<256))) { *fixaddress = i64; - if(unscaled && (i64>-256) && (i64<256)) + if(unscaled && i64 && (i64>-256) && (i64<256)) *unscaled = 1; if((nextop&7)==4) { if (sib_reg!=4) { @@ -221,7 +221,7 @@ static uintptr_t geted_32(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t } } else { switch(lock) { - case 1: addLockAddress((int32_t)tmp); break; + case 1: addLockAddress((int32_t)tmp); if(fixaddress) *fixaddress=(int32_t)tmp; break; case 2: if(isLockAddress((int32_t)tmp)) *l=1; break; } MOV32w(ret, tmp); @@ -237,7 +237,7 @@ static uintptr_t geted_32(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t uint64_t tmp = F32; MOV32w(ret, tmp); switch(lock) { - case 1: addLockAddress(tmp); break; + case 1: addLockAddress(tmp); if(fixaddress) *fixaddress=tmp; break; case 2: if(isLockAddress(tmp)) *l=1; break; } } else { @@ -260,7 +260,7 @@ static uintptr_t geted_32(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t i32 = F8S; if(i32==0 || ((i32>=absmin) && (i32<=absmax) && !(i32&mask)) || (unscaled && (i32>-256) && (i32<256))) { *fixaddress = i32; - if(unscaled && (i32>-256) && (i32<256)) + if(unscaled && i32 && (i32>-256) && (i32<256)) *unscaled = 1; if((nextop&7)==4) { if (sib_reg!=4) { diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index 6a457ec0..a5c9170b 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -481,6 +481,9 @@ TSTw_mask(xFlags, 0b010110, 0); \ CNEGx(r, r, cNE) +#define ALIGNED_ATOMICxw ((fixedaddress && !(fixedaddress&((1<<(2+rex.w)-1)))) || box64_dynarec_aligned_atomics) +#define ALIGNED_ATOMICH ((fixedaddress && !(fixedaddress&1)) || box64_dynarec_aligned_atomics) + // CALL will use x7 for the call address. Return value can be put in ret (unless ret is -1) // R0 will not be pushed/popd if ret is -2 #define CALL(F, ret) call_c(dyn, ninst, F, x7, ret, 1, 0) diff --git a/src/include/debug.h b/src/include/debug.h index 6c1ed419..c21ff7ec 100644 --- a/src/include/debug.h +++ b/src/include/debug.h @@ -29,6 +29,7 @@ extern int box64_dynarec_fastpage; extern int box64_dynarec_wait; extern int box64_dynarec_test; extern int box64_dynarec_missing; +extern int box64_dynarec_aligned_atomics; #ifdef ARM64 extern int arm64_asimd; extern int arm64_aes; diff --git a/src/main.c b/src/main.c index a66e743b..cdbff19c 100644 --- a/src/main.c +++ b/src/main.c @@ -66,6 +66,7 @@ int box64_dynarec_jvm = 1; int box64_dynarec_wait = 1; int box64_dynarec_test = 0; int box64_dynarec_missing = 0; +int box64_dynarec_aligned_atomics = 0; uintptr_t box64_nodynarec_start = 0; uintptr_t box64_nodynarec_end = 0; #ifdef ARM64 @@ -637,6 +638,15 @@ void LoadLogEnv() if(box64_dynarec_fastpage) printf_log(LOG_INFO, "Dynarec will use Fast HotPage\n"); } + p = getenv("BOX64_DYNAREC_ALIGNED_ATOMICS"); + if(p) { + if(strlen(p)==1) { + if(p[0]>='0' && p[0]<='1') + box64_dynarec_aligned_atomics = p[0]-'0'; + } + if(box64_dynarec_aligned_atomics) + printf_log(LOG_INFO, "Dynarec will generate only aligned atomics code\n"); + } p = getenv("BOX64_DYNAREC_MISSING"); if(p) { if(strlen(p)==1) { diff --git a/src/tools/rcfile.c b/src/tools/rcfile.c index 98b0d4cb..098d2616 100644 --- a/src/tools/rcfile.c +++ b/src/tools/rcfile.c @@ -142,6 +142,7 @@ ENTRYBOOL(BOX64_DYNAREC_BLEEDING_EDGE, box64_dynarec_bleeding_edge) \ ENTRYBOOL(BOX64_DYNAREC_JVM, box64_dynarec_jvm) \ ENTRYINT(BOX64_DYNAREC_HOTPAGE, box64_dynarec_hotpage, 0, 255, 8) \ ENTRYBOOL(BOX64_DYNAREC_FASTPAGE, box64_dynarec_fastpage) \ +ENTRYBOOL(BOX64_DYNAREC_ALIGNED_ATOMICS, box64_dynarec_aligned_atomics) \ ENTRYBOOL(BOX64_DYNAREC_WAIT, box64_dynarec_wait) \ ENTRYSTRING_(BOX64_NODYNAREC, box64_nodynarec) \ ENTRYBOOL(BOX64_DYNAREC_TEST, box64_dynarec_test) \ @@ -164,6 +165,7 @@ IGNORE(BOX64_DYNAREC_BLEEDING_EDGE) \ IGNORE(BOX64_DYNAREC_JVM) \ IGNORE(BOX64_DYNAREC_HOTPAGE) \ IGNORE(BOX64_DYNAREC_FASTPAGE) \ +IGNORE(BOX64_DYNAREC_ALIGNED_ATOMICS) \ IGNORE(BOX64_DYNAREC_WAIT) \ IGNORE(BOX64_NODYNAREC) \ IGNORE(BOX64_DYNAREC_TEST) \ |