diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-11-19 16:39:57 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-11-19 09:39:57 +0100 |
| commit | 127d273ada8bd26c05a4778db5e0f10b9e627621 (patch) | |
| tree | 32e6e4f157ca5fba1d80690fabcb44c127d39539 /src | |
| parent | 469d4f81eb1ec9bb6b5919de15e266f1bbc9a388 (diff) | |
| download | box64-127d273ada8bd26c05a4778db5e0f10b9e627621.tar.gz box64-127d273ada8bd26c05a4778db5e0f10b9e627621.zip | |
[DYNAREC] Reworked strong memory emulation (#2043)
* [ARM64_DYNAREC] Reworked strong memory emulation * Simplify * [RV64,LA64_DYNAREC] Reworked strong memory emulation * forgot this * more tweaks
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_00.c | 20 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_66.c | 5 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_660f.c | 1 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_67.c | 10 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c | 2 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c | 1 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c | 1 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_f20f.c | 1 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.h | 297 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_private.h | 1 | ||||
| -rw-r--r-- | src/dynarec/dynarec_native_pass.c | 7 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_helper.h | 219 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_private.h | 1 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 200 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_private.h | 3 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_emitter.h | 2 |
16 files changed, 571 insertions, 200 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c index 807c93cd..facfd3df 100644 --- a/src/dynarec/arm64/dynarec_arm64_00.c +++ b/src/dynarec/arm64/dynarec_arm64_00.c @@ -646,7 +646,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SKIPTEST(x1); dyn->doublepush = 0; } else { - WILLWRITE(); gd = xRAX+(opcode&0x07)+(rex.b<<3); u32 = PK(0); i32 = 1; @@ -730,7 +729,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0x60: if(rex.is32bits) { INST_NAME("PUSHAD"); - WILLWRITE(); MOVw_REG(x1, xRSP); PUSH2_32(xRAX, xRCX); PUSH2_32(xRDX, xRBX); @@ -814,7 +812,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin LDRSW_U12(x1, x3, 0); PUSH1z(x1); } else { - WILLWRITE(); MOV64z(x3, i64); PUSH1z(x3); SMWRITE(); @@ -883,7 +880,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("PUSH Ib"); i64 = F8S; MOV64z(x3, i64); - WILLWRITE(); PUSH1z(x3); SMWRITE(); break; @@ -1292,7 +1288,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin BFIx(eb1, gd, eb2*8, 8); } else { addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff, 0, rex, &lock, 0, 0); - WILLWRITELOCK(lock); STB(gd, ed, fixedaddress); SMWRITELOCK(lock); } @@ -1304,8 +1299,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(MODREG) { // reg <= reg MOVxw_REG(xRAX+(nextop&7)+(rex.b<<3), gd); } else { // mem <= reg - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, &lock, 0, 0); - WILLWRITELOCK(lock); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff << (2 + rex.w), (1 << (2 + rex.w)) - 1, rex, &lock, 0, 0); STxw(gd, ed, fixedaddress); SMWRITELOCK(lock); } @@ -1521,8 +1515,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin else u64 = F64; MOV64z(x1, u64); - lock=isLockAddress(u64); - WILLWRITELOCK(lock); + lock = isLockAddress(u64); STRB_U12(xRAX, x1, 0); SMWRITELOCK(lock); break; @@ -1533,8 +1526,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin else u64 = F64; MOV64z(x1, u64); - lock=isLockAddress(u64); - WILLWRITELOCK(lock); + lock = isLockAddress(u64); STRxw_U12(xRAX, x1, 0); SMWRITELOCK(lock); break; @@ -1693,7 +1685,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin emit_test32c(dyn, ninst, rex, xRAX, i64, x3, x4, x5); break; case 0xAA: - WILLWRITE(); if(rep) { INST_NAME("REP STOSB"); CBZx_NEXT(xRCX); @@ -1718,7 +1709,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SMWRITE(); break; case 0xAB: - WILLWRITE(); if(rep) { INST_NAME("REP STOSD"); CBZx_NEXT(xRCX); @@ -2248,7 +2238,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin ed = x3; } else ed = xZR; - WILLWRITELOCK(lock); STB(ed, wback, fixedaddress); SMWRITELOCK(lock); } @@ -2268,7 +2257,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin ed = x3; } else ed = xZR; - WILLWRITELOCK(lock); STxw(ed, wback, fixedaddress); SMWRITELOCK(lock); } @@ -2363,7 +2351,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin call_n(dyn, ninst, *(void**)(addr+8), tmp); addr+=8+8; } else { - WILLWRITE2(); GETIP(ip+1); // read the 0xCC STORE_XEMU_CALL(xRIP); ADDx_U12(x1, xEmu, (uint32_t)offsetof(x64emu_t, ip)); // setup addr as &emu->ip @@ -3037,7 +3024,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { TABLE64(x2, addr); } - WILLWRITE2(); PUSH1(x2); MESSAGE(LOG_DUMP, "Native Call to %s (retn=%d)\n", getBridgeName((void*)(dyn->insts[ninst].natcall-1))?:GetNativeName(GetNativeFnc(dyn->insts[ninst].natcall-1)), dyn->insts[ninst].retn); SKIPTEST(x1); // disable test as this hack dos 2 instructions for 1 diff --git a/src/dynarec/arm64/dynarec_arm64_66.c b/src/dynarec/arm64/dynarec_arm64_66.c index 946716ed..0d2b69c6 100644 --- a/src/dynarec/arm64/dynarec_arm64_66.c +++ b/src/dynarec/arm64/dynarec_arm64_66.c @@ -711,7 +711,6 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0x9C: INST_NAME("PUSHF"); READFLAGS(X_ALL); - WILLWRITE(); PUSH1_16(xFlags); SMWRITE(); break; @@ -752,8 +751,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin else u64 = F64; MOV64z(x1, u64); - if(isLockAddress(u64)) lock=1; else lock = 0; - WILLWRITELOCK(lock); + lock = isLockAddress(u64); STRH_U12(xRAX, x1, 0); SMWRITELOCK(lock); break; @@ -865,7 +863,6 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 0xAB: - WILLWRITE(); if(rep) { INST_NAME("REP STOSW"); CBZx_NEXT(xRCX); diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index 6bd25c4a..d2deeb3b 100644 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -2869,7 +2869,6 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n v1 = sse_get_reg_empty(dyn, ninst, x1, (nextop&7) + (rex.b<<3)); FMOVD(v1, v0); } else { - WILLWRITE2(); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0); VST64(v0, ed, fixedaddress); SMWRITE2(); diff --git a/src/dynarec/arm64/dynarec_arm64_67.c b/src/dynarec/arm64/dynarec_arm64_67.c index bce01f5c..f570f0a1 100644 --- a/src/dynarec/arm64/dynarec_arm64_67.c +++ b/src/dynarec/arm64/dynarec_arm64_67.c @@ -780,7 +780,6 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin v1 = sse_get_reg_empty(dyn, ninst, x1, (nextop&7) + (rex.b<<3)); FMOVD(v1, v0); } else { - WILLWRITE2(); addr = geted32(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0); VST64(v0, ed, fixedaddress); SMWRITE2(); @@ -889,8 +888,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin BFIx(ed, gd, 0, 16); } } else { - addr = geted32(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, &lock, 0, 0); - WILLWRITELOCK(lock); + addr = geted32(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff << 1, 1, rex, &lock, 0, 0); STH(gd, ed, fixedaddress); SMWRITELOCK(lock); } @@ -1102,7 +1100,6 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin BFIx(eb1, gd, eb2*8, 8); } else { addr = geted32(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff, 0, rex, &lock, 0, 0); - WILLWRITELOCK(lock); STB(gd, ed, fixedaddress); SMWRITELOCK(lock); } @@ -1114,8 +1111,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(MODREG) { // reg <= reg MOVxw_REG(xRAX+(nextop&7)+(rex.b<<3), gd); } else { // mem <= reg - addr = geted32(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, &lock, 0, 0); - WILLWRITELOCK(lock); + addr = geted32(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff << (2 + rex.w), (1 << (2 + rex.w)) - 1, rex, &lock, 0, 0); STxw(gd, ed, fixedaddress); SMWRITELOCK(lock); } @@ -1348,7 +1344,6 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin ed = x3; } else ed = xZR; - WILLWRITELOCK(lock); STB(ed, wback, fixedaddress); SMWRITELOCK(lock); } @@ -1364,7 +1359,6 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin addr = geted32(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, &lock, 0, 4); i64 = F32S; MOV64xw(x3, i64); - WILLWRITELOCK(lock); STxw(x3, ed, fixedaddress); SMWRITELOCK(lock); } diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c index 15a938bd..2c5947cf 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c @@ -558,7 +558,6 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip if(MODREG) { v1 = sse_get_reg(dyn, ninst, x3, (nextop&7)+(rex.b<<3), 1); } else { - WILLWRITE2(); addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, 0); unscaled = 0; v1 = fpu_get_scratch(dyn, ninst); @@ -603,7 +602,6 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip if(MODREG) { v1 = sse_get_reg(dyn, ninst, x3, (nextop&7)+(rex.b<<3), 1); } else { - WILLWRITE2(); addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, 0); unscaled = 0; v1 = fpu_get_scratch(dyn, ninst); diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c index 69a58b58..853f3207 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c @@ -490,7 +490,6 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip if(MODREG) { v1 = sse_get_reg_empty(dyn, ninst, x3, (nextop&7)+(rex.b<<3)); } else { - WILLWRITE2(); v1 = fpu_get_scratch(dyn, ninst); addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, &unscaled, 0xfff<<(3+vex.l), vex.l?15:7, rex, NULL, 0, 1); } diff --git a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c index 992f4543..65dfb240 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c @@ -88,7 +88,6 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, if(v1!=v2) VMOVeD(v1, 1, v2, 1); YMM0((nextop&7)+(rex.b<<3)); } else { - WILLWRITE2(); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0); VST64(v0, ed, fixedaddress); SMWRITE2(); diff --git a/src/dynarec/arm64/dynarec_arm64_f20f.c b/src/dynarec/arm64/dynarec_arm64_f20f.c index 8c0cb3b9..6563aea1 100644 --- a/src/dynarec/arm64/dynarec_arm64_f20f.c +++ b/src/dynarec/arm64/dynarec_arm64_f20f.c @@ -71,7 +71,6 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n d0 = sse_get_reg(dyn, ninst, x1, ed, 1); VMOVeD(d0, 0, v0, 0); } else { - WILLWRITE2(); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0); VST64(v0, ed, fixedaddress); SMWRITE2(); diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index 5e297221..e76aac02 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -36,64 +36,176 @@ #define FEMIT(A) EMIT(A) #endif -// Strong mem emulation helpers -#define SMREAD_VAL 4 -#define SMWRITE2_MIN 1 -#define SMFIRST_MIN 1 -#define SMSEQ_MIN 2 -#define SMSEQ_MAX 3 +/* Box64 Strong Memory Model Emulation + * + * Definition of a SEQ: + * A SEQ is a sequence of opcodes that writes to guest memory, terminated by JMP, RET, CALL, etc. + * + * Memory barriers are added in the following cases to emulate the strong memory model: + * 1. End of a SEQ: + * - Scalar operations (a1) + * - SIMD operations (a2) + * 2. Start of a SEQ: + * - Scalar operations (b1) + * - SIMD operations (b2) + * 3. Right before the last guest memory store in a SEQ: + * - Scalar operations (c1) + * - SIMD operations (c2) + * 4. After every third guest memory store in a SEQ (d) + * + * STRONGMEM levels: + * LEVEL1: Includes a1, b1 + * LEVEL2: Includes LEVEL1, plus a2, b2, c1, c2 + * LEVEL3: Includes LEVEL2, plus d + */ + +#define STRONGMEM_SIMD_WRITE 2 // The level of SIMD memory writes will be tracked +#define STRONGMEM_LAST_WRITE 2 // The level of a barrier before the last guest memory store will be put +#define STRONGMEM_SEQ_WRITE 3 // The level of a barrier at every third memory store will be put + #if STEP == 1 -// pass 1 has the jump point available -#define SMWRITE() dyn->insts[ninst].will_write = 1; dyn->smwrite = 1 + +#define SMWRITE() \ + do { \ + /* Mark that current sequence writes to guest memory. */ \ + /* This will be used in SMEND for last_write. */ \ + dyn->smwrite = 1; \ + /* Mark that current opcode writes to guest memory. */ \ + dyn->insts[ninst].will_write = 1; \ + } while (0) + +#define SMWRITELOCK(lock) \ + do { \ + dyn->insts[ninst].lock = lock; \ + SMWRITE(); \ + } while (0) + +#define SMWRITE2() \ + do { \ + if (box64_dynarec_strongmem >= STRONGMEM_SIMD_WRITE) { \ + dyn->smwrite = 1; \ + dyn->insts[ninst].will_write = 2; \ + } \ + } while (0) + #define SMREAD() #define SMREADLOCK(lock) -#define SMMIGHTREAD() -#define WILLWRITE2() if(box64_dynarec_strongmem>SMWRITE2_MIN) {WILLWRITE();} -#define SMWRITE2() if(box64_dynarec_strongmem>SMWRITE2_MIN) {SMWRITE();} -#define SMWRITELOCK(lock) SMWRITE() -#define WILLWRITELOCK(lock) #define WILLWRITE() -#define SMMIGHTWRITE() if(!MODREG) {SMWRITE();} -#define SMSTART() dyn->smwrite = 0; dyn->smread = 0; -#define SMEND() if(dyn->smwrite && (box64_dynarec_strongmem>SMFIRST_MIN)) {int i = ninst; while(i>=0 && !dyn->insts[i].will_write) --i; if(i>=0) {dyn->insts[i].last_write = 1;}} dyn->smwrite = 0 +#define WILLWRITELOCK(lock) + +#define SMSTART() \ + do { \ + /* Clear current state at the start of a potential SEQ. */ \ + dyn->smwrite = 0; \ + } while (0) + +#define SMEND() \ + do { \ + /* If there is any guest memory write, which is a SEQ, then compute the last_write. */ \ + if (dyn->smwrite && (box64_dynarec_strongmem >= STRONGMEM_LAST_WRITE)) { \ + int i = ninst; \ + while (i >= 0 && !dyn->insts[i].will_write) \ + --i; \ + if (i >= 0) { dyn->insts[i].last_write = 1; } \ + } \ + dyn->smwrite = 0; \ + } while (0) + #define SMDMB() + #else -// Sequence of Write will trigger a DMB on "last" write if strongmem is >= 1 -// Block will trigget at 1st and last if strongmem is >= SMFIRST_MIN -// Read will contribute to trigger a DMB on "first" read if strongmem is >= SMREAD_MIN -// Opcode will read -#define SMREAD() if(dyn->insts[ninst].will_write) {WILLWRITE();} else if(box64_dynarec_strongmem==SMREAD_VAL && !dyn->smread) {DSB_SY(); dyn->smread = 1;} -// Opcode will read with option forced lock -#define SMREADLOCK(lock) if((lock)) {SMWRITELOCK(lock);} else {SMREAD();} -// Opcode might read (depend on nextop) -#define SMMIGHTREAD() if(!MODREG) {SMREAD();} -// Opcode has wrote -#define SMWRITE() if((box64_dynarec_strongmem>=SMFIRST_MIN) && dyn->smwrite==0 && (box64_dynarec_strongmem!=SMREAD_VAL)) {SMDMB();} if(box64_dynarec_strongmem>SMSEQ_MIN && (box64_dynarec_strongmem!=SMREAD_VAL)) {if(++dyn->smwrite>=SMSEQ_MAX) {SMDMB(); dyn->smwrite=1;}} else dyn->smwrite=1 -// Opcode has wrote (strongmem>1 only) -#define WILLWRITE2() if(box64_dynarec_strongmem>SMWRITE2_MIN) {WILLWRITE();} -#define SMWRITE2() if(box64_dynarec_strongmem>SMWRITE2_MIN) {SMWRITE();} -// Opcode has wrote with option forced lock -#define SMWRITELOCK(lock) if(lock) {SMDMB(); dyn->smwrite=1;} else {SMWRITE();} -// Opcode has wrote with option forced lock -#define WILLWRITELOCK(lock) if(lock) {DMB_ISH();} else {WILLWRITE();} -// Opcode might have wrote (depend on nextop) -#define SMMIGHTWRITE() if(!MODREG) {SMWRITE();} -// Opcode will write (without reading) -#define WILLWRITE() if((box64_dynarec_strongmem>=SMFIRST_MIN) && dyn->smwrite==0 && (box64_dynarec_strongmem!=SMREAD_VAL)) {SMDMB();} else if(box64_dynarec_strongmem>=SMFIRST_MIN && dyn->insts[ninst].last_write && (box64_dynarec_strongmem!=SMREAD_VAL)) {SMDMB();} dyn->smwrite=1 -// Start of sequence -#define SMSTART() SMEND() -// End of sequence -#define SMEND() if(dyn->smwrite && box64_dynarec_strongmem && (box64_dynarec_strongmem!=SMREAD_VAL)) {DMB_ISH();} dyn->smwrite=0; dyn->smread=0 -// Force a Data memory barrier (for LOCK: prefix) -#define SMDMB() \ - if (box64_dynarec_strongmem && !box64_dynarec_weakbarrier) { \ - DSB_ISH(); \ - } else { \ - DMB_ISH(); \ - } \ - dyn->smwrite = 0; \ - dyn->smread = 0 +// An opcode writes guest memory, this need to be put after the STORE instruction manually. +#define SMWRITE() \ + do { \ + /* Put a barrier at every third memory write. */ \ + if (box64_dynarec_strongmem >= STRONGMEM_SEQ_WRITE) { \ + if (++dyn->smwrite >= 3 /* Every third memory write */) { \ + DMB_ISH(); \ + dyn->smwrite = 1; \ + } \ + } else { \ + /* Mark that current sequence writes to guest memory. */ \ + dyn->smwrite = 1; \ + } \ + } while (0) + +// Similar to SMWRITE, but checks lock. +#define SMWRITELOCK(lock) \ + do { \ + if (lock) { \ + DMB_ISH(); \ + } else { \ + SMWRITE(); \ + } \ + } while (0) + +// Similar to SMWRITE, but for SIMD instructions. +#define SMWRITE2() \ + do { \ + if (box64_dynarec_strongmem >= STRONGMEM_SIMD_WRITE) \ + SMWRITE(); \ + } while (0) + +// An opcode reads guest memory, this need to be put before the LOAD instruction manually. +#define SMREAD() + +// Similar to SMREAD, but checks lock. +#define SMREADLOCK(lock) \ + do { \ + if (lock) { \ + DMB_ISH(); \ + } else { \ + SMREAD(); \ + } \ + } while (0) + +// An opcode will write memory, this will be put before the STORE instruction automatically. +#define WILLWRITE() \ + do { \ + if (box64_dynarec_strongmem >= dyn->insts[ninst].will_write && dyn->smwrite == 0) { \ + /* Will write but never written, this is the start of a SEQ, put a barrier. */ \ + DMB_ISH(); \ + } else if (box64_dynarec_strongmem >= STRONGMEM_LAST_WRITE && dyn->insts[ninst].last_write) { \ + /* Last write, put a barrier */ \ + DMB_ISH(); \ + } \ + } while (0) + +// Similar to WILLWRITE, but checks lock. +#define WILLWRITELOCK(lock) \ + do { \ + if (lock) { \ + DMB_ISH(); \ + } else { \ + WILLWRITE(); \ + } \ + } while (0) + +// Used to clear the state at the start of a SEQ +#define SMSTART() \ + do { \ + dyn->smwrite = 0; \ + } while (0) + +// Will be put at the end of the SEQ +#define SMEND() \ + do { \ + if (box64_dynarec_strongmem) { \ + /* Check if there is any guest memory write. */ \ + int i = ninst; \ + while (i >= 0 && !dyn->insts[i].will_write) \ + --i; \ + if (i >= 0) { \ + /* It's a SEQ, put a barrier here. */ \ + DMB_ISH(); \ + } \ + } \ + dyn->smwrite = 0; \ + } while (0) + +// The barrier. +#define SMDMB() DMB_ISH() #endif //LOCK_* define @@ -619,36 +731,35 @@ vy = ymm_get_reg_empty(dyn, ninst, x1, vex.v, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1, -1) // Get EX as a quad, (x3 is used) -#define GETEX_Y(a, w, D) \ - if(MODREG) { \ - a = sse_get_reg(dyn, ninst, x3, (nextop&7)+(rex.b<<3), w); \ - } else { \ - if(w) {WILLWRITE2();} else {SMREAD();} \ - addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, D); \ - unscaled = 0; \ - a = fpu_get_scratch(dyn, ninst); \ - VLDR128_U12(a, ed, fixedaddress); \ +#define GETEX_Y(a, w, D) \ + if (MODREG) { \ + a = sse_get_reg(dyn, ninst, x3, (nextop & 7) + (rex.b << 3), w); \ + } else { \ + SMREAD(); \ + addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0xffe << 4, 15, rex, NULL, 0, D); \ + unscaled = 0; \ + a = fpu_get_scratch(dyn, ninst); \ + VLDR128_U12(a, ed, fixedaddress); \ } // Get EX as a quad, (x3 is used) #define GETEX_empty_Y(a, D) \ if(MODREG) { \ a = sse_get_reg_empty(dyn, ninst, x3, (nextop&7)+(rex.b<<3)); \ } else { \ - WILLWRITE2(); \ a = fpu_get_scratch(dyn, ninst); \ addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, D); \ unscaled = 0; \ } // Get EX as a quad, (x1 is used) -#define GETEX(a, w, D) \ - if(MODREG) { \ - a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), w); \ - } else { \ - if(w) {WILLWRITE2();} else {SMREAD();} \ - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<4, 15, rex, NULL, 0, D); \ - a = fpu_get_scratch(dyn, ninst); \ - VLD128(a, ed, fixedaddress); \ +#define GETEX(a, w, D) \ + if (MODREG) { \ + a = sse_get_reg(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), w); \ + } else { \ + SMREAD(); \ + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff << 4, 15, rex, NULL, 0, D); \ + a = fpu_get_scratch(dyn, ninst); \ + VLD128(a, ed, fixedaddress); \ } // Put Back EX if it was a memory and not an emm register @@ -660,42 +771,42 @@ // Get Ex as a double, not a quad (warning, x1 get used) -#define GETEXSD(a, w, D) \ - if(MODREG) { \ - a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), w); \ - } else { \ - if(w) {WILLWRITE2();} else {SMREAD();} \ - a = fpu_get_scratch(dyn, ninst); \ - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, D); \ - VLD64(a, ed, fixedaddress); \ +#define GETEXSD(a, w, D) \ + if (MODREG) { \ + a = sse_get_reg(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), w); \ + } else { \ + SMREAD(); \ + a = fpu_get_scratch(dyn, ninst); \ + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff << 3, 7, rex, NULL, 0, D); \ + VLD64(a, ed, fixedaddress); \ } // Get Ex as 64bits, not a quad (warning, x1 get used) #define GETEX64(a, w, D) GETEXSD(a, w, D) // Get Ex as a single, not a quad (warning, x1 get used) -#define GETEXSS(a, w, D) \ - if(MODREG) { \ - a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), w); \ - } else { \ - if(w) {WILLWRITE2();} else {SMREAD();} \ - a = fpu_get_scratch(dyn, ninst); \ - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, D); \ - VLD32(a, ed, fixedaddress); \ +#define GETEXSS(a, w, D) \ + if (MODREG) { \ + a = sse_get_reg(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), w); \ + } else { \ + SMREAD(); \ + a = fpu_get_scratch(dyn, ninst); \ + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff << 2, 3, rex, NULL, 0, D); \ + VLD32(a, ed, fixedaddress); \ } // Get Ex as 32bits, not a quad (warning, x1 get used) #define GETEX32(a, w, D) GETEXSS(a, w, D) // Get Ex as 16bits, not a quad (warning, x1 get used) -#define GETEX16(a, w, D) \ - if(MODREG) { \ - a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), w); \ - } else { \ - if(w) {WILLWRITE2();} else {SMREAD();} \ - a = fpu_get_scratch(dyn, ninst); \ - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, D); \ - VLD16(a, ed, fixedaddress); \ +#define GETEX16(a, w, D) \ + if (MODREG) { \ + a = sse_get_reg(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), w); \ + } else { \ + SMREAD(); \ + a = fpu_get_scratch(dyn, ninst); \ + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff << 1, 1, rex, NULL, 0, D); \ + VLD16(a, ed, fixedaddress); \ } // Get GM, might use x1, x2 and x3 diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h index 3b8cb1f3..295574b8 100644 --- a/src/dynarec/arm64/dynarec_arm64_private.h +++ b/src/dynarec/arm64/dynarec_arm64_private.h @@ -108,6 +108,7 @@ typedef struct instruction_arm64_s { uint8_t barrier_maybe; uint8_t will_write; uint8_t last_write; + uint8_t lock; uint8_t set_nat_flags; // 0 or combinaison of native flags define uint8_t use_nat_flags; // 0 or combinaison of native flags define uint8_t use_nat_flags_before; // 0 or combinaison of native flags define diff --git a/src/dynarec/dynarec_native_pass.c b/src/dynarec/dynarec_native_pass.c index dee5d496..5df1dce2 100644 --- a/src/dynarec/dynarec_native_pass.c +++ b/src/dynarec/dynarec_native_pass.c @@ -116,6 +116,13 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int GOTEST(x1, x2); } if(dyn->insts[ninst].pred_sz>1) {SMSTART();} + #if STEP > 1 + if (dyn->insts[ninst].lock) { + WILLWRITELOCK(dyn->insts[ninst].lock); + } else if (dyn->insts[ninst].will_write) { + WILLWRITE(); + } + #endif if((dyn->insts[ninst].x64.need_before&~X_PEND) && !dyn->insts[ninst].pred_sz) { READFLAGS(dyn->insts[ninst].x64.need_before&~X_PEND); } diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h index 21f3c2af..686a2a09 100644 --- a/src/dynarec/la64/dynarec_la64_helper.h +++ b/src/dynarec/la64/dynarec_la64_helper.h @@ -32,50 +32,177 @@ #define PK64(a) *(uint64_t*)(addr + a) #define PKip(a) *(uint8_t*)(ip + a) -// Strong mem emulation helpers -#define SMREAD_MIN 2 -#define SMWRITE_MIN 1 -// Sequence of Read will trigger a DMB on "first" read if strongmem is >= SMREAD_MIN -// Sequence of Write will trigger a DMB on "last" write if strongmem is >= 1 -// All Write operation that might use a lock all have a memory barrier if strongmem is >= SMWRITE_MIN -// Opcode will read -#define SMREAD() \ - if ((dyn->smread == 0) && (box64_dynarec_strongmem > SMREAD_MIN)) { \ - SMDMB(); \ - } else \ - dyn->smread = 1 -// Opcode will read with option forced lock +/* Box64 Strong Memory Model Emulation + * + * Definition of a SEQ: + * A SEQ is a sequence of opcodes that writes to guest memory, terminated by JMP, RET, CALL, etc. + * + * Memory barriers are added in the following cases to emulate the strong memory model: + * 1. End of a SEQ: + * - Scalar operations (a1) + * - SIMD operations (a2) + * 2. Start of a SEQ: + * - Scalar operations (b1) + * - SIMD operations (b2) + * 3. Right before the last guest memory store in a SEQ: + * - Scalar operations (c1) + * - SIMD operations (c2) + * 4. After every third guest memory store in a SEQ (d) + * + * STRONGMEM levels: + * LEVEL1: Includes a1, b1 + * LEVEL2: Includes LEVEL1, plus a2, b2, c1, c2 + * LEVEL3: Includes LEVEL2, plus d + */ + +#define STRONGMEM_SIMD_WRITE 2 // The level of SIMD memory writes will be tracked +#define STRONGMEM_LAST_WRITE 2 // The level of a barrier before the last guest memory store will be put +#define STRONGMEM_SEQ_WRITE 3 // The level of a barrier at every third memory store will be put + +#if STEP == 1 + +#define SMWRITE() \ + do { \ + /* Mark that current sequence writes to guest memory. */ \ + /* This will be used in SMEND for last_write. */ \ + dyn->smwrite = 1; \ + /* Mark that current opcode writes to guest memory. */ \ + dyn->insts[ninst].will_write = 1; \ + } while (0) + +#define SMWRITELOCK(lock) \ + do { \ + dyn->insts[ninst].lock = lock; \ + SMWRITE(); \ + } while (0) + +#define SMWRITE2() \ + do { \ + if (box64_dynarec_strongmem >= STRONGMEM_SIMD_WRITE) { \ + dyn->smwrite = 1; \ + dyn->insts[ninst].will_write = 2; \ + } \ + } while (0) + +#define SMREAD() +#define SMREADLOCK(lock) +#define WILLWRITE() +#define WILLWRITELOCK(lock) + +#define SMSTART() \ + do { \ + /* Clear current state at the start of a potential SEQ. */ \ + dyn->smwrite = 0; \ + } while (0) + +#define SMEND() \ + do { \ + /* If there is any guest memory write, which is a SEQ, then compute the last_write. */ \ + if (dyn->smwrite && (box64_dynarec_strongmem >= STRONGMEM_LAST_WRITE)) { \ + int i = ninst; \ + while (i >= 0 && !dyn->insts[i].will_write) \ + --i; \ + if (i >= 0) { dyn->insts[i].last_write = 1; } \ + } \ + dyn->smwrite = 0; \ + } while (0) + +#define SMDMB() + +#else + +// An opcode writes guest memory, this need to be put after the STORE instruction manually. +#define SMWRITE() \ + do { \ + /* Put a barrier at every third memory write. */ \ + if (box64_dynarec_strongmem >= STRONGMEM_SEQ_WRITE) { \ + if (++dyn->smwrite >= 3 /* Every third memory write */) { \ + DBAR(0); \ + dyn->smwrite = 1; \ + } \ + } else { \ + /* Mark that current sequence writes to guest memory. */ \ + dyn->smwrite = 1; \ + } \ + } while (0) + +// Similar to SMWRITE, but checks lock. +#define SMWRITELOCK(lock) \ + do { \ + if (lock) { \ + DBAR(0); \ + } else { \ + SMWRITE(); \ + } \ + } while (0) + +// Similar to SMWRITE, but for SIMD instructions. +#define SMWRITE2() \ + do { \ + if (box64_dynarec_strongmem >= STRONGMEM_SIMD_WRITE) \ + SMWRITE(); \ + } while (0) + +// An opcode reads guest memory, this need to be put before the LOAD instruction manually. +#define SMREAD() + +// Similar to SMREAD, but checks lock. #define SMREADLOCK(lock) \ - if ((lock) || ((dyn->smread == 0) && (box64_dynarec_strongmem > SMREAD_MIN))) { SMDMB(); } -// Opcode might read (depend on nextop) -#define SMMIGHTREAD() \ - if (!MODREG) { SMREAD(); } -// Opcode has wrote -#define SMWRITE() dyn->smwrite = 1 -// Opcode has wrote (strongmem>1 only) -#define SMWRITE2() \ - if (box64_dynarec_strongmem > SMREAD_MIN) dyn->smwrite = 1 -// Opcode has wrote with option forced lock -#define SMWRITELOCK(lock) \ - if (lock || (box64_dynarec_strongmem > SMWRITE_MIN)) { \ - SMDMB(); \ - } else \ - dyn->smwrite = 1 -// Opcode might have wrote (depend on nextop) -#define SMMIGHTWRITE() \ - if (!MODREG) { SMWRITE(); } -// Start of sequence -#define SMSTART() SMEND() -// End of sequence -#define SMEND() \ - if (dyn->smwrite && box64_dynarec_strongmem) { DBAR(0); } \ - dyn->smwrite = 0; \ - dyn->smread = 0; -// Force a Data memory barrier (for LOCK: prefix) -#define SMDMB() \ - DBAR(0); \ - dyn->smwrite = 0; \ - dyn->smread = 1 + do { \ + if (lock) { \ + DBAR(0); \ + } else { \ + SMREAD(); \ + } \ + } while (0) + +// An opcode will write memory, this will be put before the STORE instruction automatically. +#define WILLWRITE() \ + do { \ + if (box64_dynarec_strongmem >= dyn->insts[ninst].will_write && dyn->smwrite == 0) { \ + /* Will write but never written, this is the start of a SEQ, put a barrier. */ \ + DBAR(0); \ + } else if (box64_dynarec_strongmem >= STRONGMEM_LAST_WRITE && dyn->insts[ninst].last_write) { \ + /* Last write, put a barrier */ \ + DBAR(0); \ + } \ + } while (0) + +// Similar to WILLWRITE, but checks lock. +#define WILLWRITELOCK(lock) \ + do { \ + if (lock) { \ + DBAR(0); \ + } else { \ + WILLWRITE(); \ + } \ + } while (0) + +// Used to clear the state at the start of a SEQ +#define SMSTART() \ + do { \ + dyn->smwrite = 0; \ + } while (0) + +// Will be put at the end of the SEQ +#define SMEND() \ + do { \ + if (box64_dynarec_strongmem) { \ + /* Check if there is any guest memory write. */ \ + int i = ninst; \ + while (i >= 0 && !dyn->insts[i].will_write) \ + --i; \ + if (i >= 0) { \ + /* It's a SEQ, put a barrier here. */ \ + DBAR(0); \ + } \ + } \ + dyn->smwrite = 0; \ + } while (0) + +// The barrier. +#define SMDMB() DBAR(0) +#endif // LOCK_* define #define LOCK_LOCK (int*)1 @@ -750,7 +877,11 @@ #define TABLE64(A, V) #endif -#define ARCH_INIT() +#define ARCH_INIT() \ + do { \ + dyn->smread = dyn->smwrite = 0; \ + } while (0) + #define ARCH_RESET() #if STEP < 2 diff --git a/src/dynarec/la64/dynarec_la64_private.h b/src/dynarec/la64/dynarec_la64_private.h index 1dab0696..2e64ac55 100644 --- a/src/dynarec/la64/dynarec_la64_private.h +++ b/src/dynarec/la64/dynarec_la64_private.h @@ -91,6 +91,7 @@ typedef struct instruction_la64_s { uint8_t barrier_maybe; uint8_t will_write; uint8_t last_write; + uint8_t lock; uint8_t df_notneeded; flagcache_t f_exit; // flags status at end of instruction lsxcache_t lsx; // lsxcache at end of instruction (but before poping) diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 074a000f..7903ca06 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -32,33 +32,177 @@ #define PK64(a) *(uint64_t*)(addr + a) #define PKip(a) *(uint8_t*)(ip + a) +/* Box64 Strong Memory Model Emulation + * + * Definition of a SEQ: + * A SEQ is a sequence of opcodes that writes to guest memory, terminated by JMP, RET, CALL, etc. + * + * Memory barriers are added in the following cases to emulate the strong memory model: + * 1. End of a SEQ: + * - Scalar operations (a1) + * - SIMD operations (a2) + * 2. Start of a SEQ: + * - Scalar operations (b1) + * - SIMD operations (b2) + * 3. Right before the last guest memory store in a SEQ: + * - Scalar operations (c1) + * - SIMD operations (c2) + * 4. After every third guest memory store in a SEQ (d) + * + * STRONGMEM levels: + * LEVEL1: Includes a1, b1 + * LEVEL2: Includes LEVEL1, plus a2, b2, c1, c2 + * LEVEL3: Includes LEVEL2, plus d + */ + +#define STRONGMEM_SIMD_WRITE 2 // The level of SIMD memory writes will be tracked +#define STRONGMEM_LAST_WRITE 2 // The level of a barrier before the last guest memory store will be put +#define STRONGMEM_SEQ_WRITE 3 // The level of a barrier at every third memory store will be put + +#if STEP == 1 + +#define SMWRITE() \ + do { \ + /* Mark that current sequence writes to guest memory. */ \ + /* This will be used in SMEND for last_write. */ \ + dyn->smwrite = 1; \ + /* Mark that current opcode writes to guest memory. */ \ + dyn->insts[ninst].will_write = 1; \ + } while (0) + +#define SMWRITELOCK(lock) \ + do { \ + dyn->insts[ninst].lock = lock; \ + SMWRITE(); \ + } while (0) + +#define SMWRITE2() \ + do { \ + if (box64_dynarec_strongmem >= STRONGMEM_SIMD_WRITE) { \ + dyn->smwrite = 1; \ + dyn->insts[ninst].will_write = 2; \ + } \ + } while (0) + +#define SMREAD() +#define SMREADLOCK(lock) +#define WILLWRITE() +#define WILLWRITELOCK(lock) + +#define SMSTART() \ + do { \ + /* Clear current state at the start of a potential SEQ. */ \ + dyn->smwrite = 0; \ + } while (0) -// Strong mem emulation helpers -#define SMREAD_MIN 2 -#define SMWRITE_MIN 1 -// Sequence of Read will trigger a DMB on "first" read if strongmem is >= SMREAD_MIN -// Sequence of Write will trigger a DMB on "last" write if strongmem is >= 1 -// All Write operation that might use a lock all have a memory barrier if strongmem is >= SMWRITE_MIN -// Opcode will read -#define SMREAD() if((dyn->smread==0) && (box64_dynarec_strongmem>SMREAD_MIN)) {SMDMB();} else dyn->smread=1 -// Opcode will read with option forced lock -#define SMREADLOCK(lock) if((lock) || ((dyn->smread==0) && (box64_dynarec_strongmem>SMREAD_MIN))) {SMDMB();} -// Opcode might read (depend on nextop) -#define SMMIGHTREAD() if(!MODREG) {SMREAD();} -// Opcode has wrote -#define SMWRITE() dyn->smwrite=1 -// Opcode has wrote (strongmem>1 only) -#define SMWRITE2() if(box64_dynarec_strongmem>SMREAD_MIN) dyn->smwrite=1 -// Opcode has wrote with option forced lock -#define SMWRITELOCK(lock) if(lock || (box64_dynarec_strongmem>SMWRITE_MIN)) {SMDMB();} else dyn->smwrite=1 -// Opcode might have wrote (depend on nextop) -#define SMMIGHTWRITE() if(!MODREG) {SMWRITE();} -// Start of sequence -#define SMSTART() SMEND() -// End of sequence -#define SMEND() if(dyn->smwrite && box64_dynarec_strongmem) {FENCE();} dyn->smwrite=0; dyn->smread=0; -// Force a Data memory barrier (for LOCK: prefix) -#define SMDMB() FENCE(); dyn->smwrite=0; dyn->smread=1 +#define SMEND() \ + do { \ + /* If there is any guest memory write, which is a SEQ, then compute the last_write. */ \ + if (dyn->smwrite && (box64_dynarec_strongmem >= STRONGMEM_LAST_WRITE)) { \ + int i = ninst; \ + while (i >= 0 && !dyn->insts[i].will_write) \ + --i; \ + if (i >= 0) { dyn->insts[i].last_write = 1; } \ + } \ + dyn->smwrite = 0; \ + } while (0) + +#define SMDMB() + +#else + +// An opcode writes guest memory, this need to be put after the STORE instruction manually. +#define SMWRITE() \ + do { \ + /* Put a barrier at every third memory write. */ \ + if (box64_dynarec_strongmem >= STRONGMEM_SEQ_WRITE) { \ + if (++dyn->smwrite >= 3 /* Every third memory write */) { \ + FENCE_RW_RW(); \ + dyn->smwrite = 1; \ + } \ + } else { \ + /* Mark that current sequence writes to guest memory. */ \ + dyn->smwrite = 1; \ + } \ + } while (0) + +// Similar to SMWRITE, but checks lock. +#define SMWRITELOCK(lock) \ + do { \ + if (lock) { \ + FENCE_RW_RW(); \ + } else { \ + SMWRITE(); \ + } \ + } while (0) + +// Similar to SMWRITE, but for SIMD instructions. +#define SMWRITE2() \ + do { \ + if (box64_dynarec_strongmem >= STRONGMEM_SIMD_WRITE) \ + SMWRITE(); \ + } while (0) + +// An opcode reads guest memory, this need to be put before the LOAD instruction manually. +#define SMREAD() + +// Similar to SMREAD, but checks lock. +#define SMREADLOCK(lock) \ + do { \ + if (lock) { \ + FENCE_RW_RW(); \ + } else { \ + SMREAD(); \ + } \ + } while (0) + +// An opcode will write memory, this will be put before the STORE instruction automatically. +#define WILLWRITE() \ + do { \ + if (box64_dynarec_strongmem >= dyn->insts[ninst].will_write && dyn->smwrite == 0) { \ + /* Will write but never written, this is the start of a SEQ, put a barrier. */ \ + FENCE_RW_RW(); \ + } else if (box64_dynarec_strongmem >= STRONGMEM_LAST_WRITE && dyn->insts[ninst].last_write) { \ + /* Last write, put a barrier */ \ + FENCE_RW_RW(); \ + } \ + } while (0) + +// Similar to WILLWRITE, but checks lock. +#define WILLWRITELOCK(lock) \ + do { \ + if (lock) { \ + FENCE_RW_RW(); \ + } else { \ + WILLWRITE(); \ + } \ + } while (0) + +// Used to clear the state at the start of a SEQ +#define SMSTART() \ + do { \ + dyn->smwrite = 0; \ + } while (0) + +// Will be put at the end of the SEQ +#define SMEND() \ + do { \ + if (box64_dynarec_strongmem) { \ + /* Check if there is any guest memory write. */ \ + int i = ninst; \ + while (i >= 0 && !dyn->insts[i].will_write) \ + --i; \ + if (i >= 0) { \ + /* It's a SEQ, put a barrier here. */ \ + FENCE_RW_RW(); \ + } \ + } \ + dyn->smwrite = 0; \ + } while (0) + +// The barrier. +#define SMDMB() FENCE_RW_RW() +#endif // LOCK_* define #define LOCK_LOCK (int*)1 @@ -1099,8 +1243,10 @@ #define FTABLE64(A, V) #endif -#define ARCH_INIT() \ +#define ARCH_INIT() \ + dyn->smread = dyn->smwrite = 0; \ dyn->vector_sew = VECTOR_SEWNA; + #define ARCH_RESET() \ dyn->vector_sew = VECTOR_SEWNA; diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h index 5c00231e..3de4b465 100644 --- a/src/dynarec/rv64/dynarec_rv64_private.h +++ b/src/dynarec/rv64/dynarec_rv64_private.h @@ -122,6 +122,9 @@ typedef struct instruction_rv64_s { uint16_t ymm0_out; // the ymm0 at th end of the opcode uint16_t ymm0_pass2, ymm0_pass3; int barrier_maybe; + uint8_t will_write; + uint8_t last_write; + uint8_t lock; uint8_t df_notneeded; flagcache_t f_exit; // flags status at end of instruction extcache_t e; // extcache at end of instruction (but before poping) diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index 4e199e00..99fcf0e5 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -399,7 +399,7 @@ f28–31 ft8–11 FP temporaries Caller } while (0) #define FENCE_gen(pred, succ) (((pred) << 24) | ((succ) << 20) | 0b0001111) -#define FENCE() EMIT(FENCE_gen(3, 3)) +#define FENCE_RW_RW() EMIT(FENCE_gen(3, 3)) #define FENCE_I_gen() ((0b001 << 12) | 0b0001111) #define FENCE_I() EMIT(FENCE_I_gen()) |