diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2022-11-26 20:05:29 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2022-11-26 20:05:29 +0100 |
| commit | a186b22d6e1b0ae92a45dba84d3f67e0248aff89 (patch) | |
| tree | 2f7322f9a8b4f49836c9808a8d8424efd7718bcb /src | |
| parent | a43186d137bb06f0bbdc4da4560dcee5023974eb (diff) | |
| download | box64-a186b22d6e1b0ae92a45dba84d3f67e0248aff89.tar.gz box64-a186b22d6e1b0ae92a45dba84d3f67e0248aff89.zip | |
[DYNAREC] Refactored Strong Memory Model emulation
Diffstat (limited to 'src')
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_00.c | 48 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_0f.c | 43 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_64.c | 16 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_66.c | 20 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_660f.c | 38 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_6664.c | 4 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_66f0.c | 24 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_67.c | 9 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_f0.c | 68 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_f20f.c | 5 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_f30f.c | 11 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_helper.c | 5 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_helper.h | 58 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_pass3.h | 5 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_private.h | 6 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_native_pass.c | 1 | ||||
| -rwxr-xr-x | src/main.c | 2 |
17 files changed, 256 insertions, 107 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c index 3b3af4bf..360f4b9a 100755 --- a/src/dynarec/arm64/dynarec_arm64_00.c +++ b/src/dynarec/arm64/dynarec_arm64_00.c @@ -530,6 +530,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(MODREG) { // reg <= reg SXTWx(gd, xRAX+(nextop&7)+(rex.b<<3)); } else { // mem <= reg + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0); LDRSW_U12(gd, ed, fixedaddress); } @@ -537,6 +538,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(MODREG) { // reg <= reg MOVw_REG(gd, xRAX+(nextop&7)+(rex.b<<3)); } else { // mem <= reg + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0); LDRw_U12(gd, ed, fixedaddress); } @@ -860,7 +862,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin BFIx(gb1, x1, gb2*8, 8); BFIx(eb1, x4, eb2*8, 8); } else { - DMB_ISH(); + SMDMB(); GETGB(x4); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, LOCK_LOCK, 0, 0); MARKLOCK; @@ -869,7 +871,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin // do the swap 14 -> strb(ed), 1 -> gd STLXRB(x3, x4, ed); CBNZx_MARKLOCK(x3); - DMB_ISH(); + SMDMB(); BFIx(gb1, x1, gb2*8, 8); } break; @@ -885,7 +887,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { GETGD; addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, LOCK_LOCK, 0, 0); - DMB_ISH(); + SMDMB(); TSTx_mask(ed, 1, 0, 1+rex.w); // mask=3 or 7 B_MARK(cNE); MARKLOCK; @@ -897,7 +899,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin LDRxw_U12(x1, ed, 0); STRxw_U12(gd, ed, 0); MARK2; - DMB_ISH(); + SMDMB(); MOVxw_REG(gd, x1); } break; @@ -931,10 +933,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff, 0, rex, &lock, 0, 0); STRB_U12(gd, ed, fixedaddress); - if(lock || (box64_dynarec_strongmem && - (dyn->insts[ninst].x64.barrier || box64_dynarec_strongmem>1 || (dyn->insts[ninst+1].x64.barrier || dyn->insts[ninst+1].x64.jmp)))) { - DMB_ISH(); - } + SMWRITELOCK(lock); } break; case 0x89: @@ -946,10 +945,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { // mem <= reg addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, &lock, 0, 0); STRxw_U12(gd, ed, fixedaddress); - if(lock || (box64_dynarec_strongmem && - (dyn->insts[ninst].x64.barrier || box64_dynarec_strongmem>1 || (dyn->insts[ninst+1].x64.barrier || dyn->insts[ninst+1].x64.jmp)))) { - DMB_ISH(); - } + SMWRITELOCK(lock); } break; case 0x8A: @@ -980,10 +976,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } } else { addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff, 0, rex, &lock, 0, 0); - if(lock || (box64_dynarec_strongmem && - (dyn->insts[ninst].x64.barrier || !ninst || box64_dynarec_strongmem>1 || (ninst && dyn->insts[ninst-1].x64.barrier)))) { - DMB_ISH(); - } + SMREADLOCK(lock); LDRB_U12(x4, wback, fixedaddress); ed = x4; } @@ -997,10 +990,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOVxw_REG(gd, xRAX+(nextop&7)+(rex.b<<3)); } else { addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, &lock, 0, 0); - if(lock || (box64_dynarec_strongmem && - (dyn->insts[ninst].x64.barrier || !ninst || box64_dynarec_strongmem>1 || (ninst && dyn->insts[ninst-1].x64.barrier)))) { - DMB_ISH(); - } + SMREADLOCK(lock); LDRxw_U12(gd, ed, fixedaddress); } break; @@ -1013,6 +1003,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, NULL, 0, 0); LDRH_U12(x3, xEmu, offsetof(x64emu_t, segs[(nextop&0x38)>>3])); STRH_U12(x3, ed, fixedaddress); + SMWRITE2(); } break; case 0x8D: @@ -1037,6 +1028,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if((nextop&0xC0)==0xC0) { ed = xRAX+(nextop&7)+(rex.b<<3); } else { + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 0, rex, NULL, 0, 0); LDRH_U12(x1, ed, fixedaddress); ed = x1; @@ -1144,12 +1136,14 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin u64 = F64; MOV64x(x1, u64); STRB_U12(xRAX, x1, 0); + SMWRITE(); break; case 0xA3: INST_NAME("MOV Od,EAX"); u64 = F64; MOV64x(x1, u64); STRxw_U12(xRAX, x1, 0); + SMWRITE(); break; case 0xA4: if(rep) { @@ -1523,7 +1517,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin GETED(1); u8 = (F8)&(rex.w?0x3f:0x1f); emit_rol32c(dyn, ninst, rex, ed, u8, x3, x4); - if(u8) { WBACK; } break; case 1: INST_NAME("ROR Ed, Ib"); @@ -1531,7 +1524,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin GETED(1); u8 = (F8)&(rex.w?0x3f:0x1f); emit_ror32c(dyn, ninst, rex, ed, u8, x3, x4); - if(u8) { WBACK; } break; case 2: INST_NAME("RCL Ed, Ib"); @@ -1630,10 +1622,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin u8 = F8; MOV32w(x3, u8); STRB_U12(x3, ed, fixedaddress); - if(lock || (box64_dynarec_strongmem && - (dyn->insts[ninst].x64.barrier || box64_dynarec_strongmem>1 || (dyn->insts[ninst+1].x64.barrier || dyn->insts[ninst+1].x64.jmp)))) { - DMB_ISH(); - } + SMWRITELOCK(lock); } break; case 0xC7: @@ -1648,10 +1637,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin i64 = F32S; MOV64xw(x3, i64); STRxw_U12(x3, ed, fixedaddress); - if(lock || (box64_dynarec_strongmem && - (dyn->insts[ninst].x64.barrier || box64_dynarec_strongmem>1 || (dyn->insts[ninst+1].x64.barrier || dyn->insts[ninst+1].x64.jmp)))) { - DMB_ISH(); - } + SMWRITELOCK(lock); } break; @@ -2513,7 +2499,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 4: // JMP Ed INST_NAME("JMP Ed"); - BARRIER(BARRIER_FULL); + BARRIER(BARRIER_FLOAT); GETEDx(0); jump_to_next(dyn, 0, ed, ninst); *need_epilog = 0; diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index 427177be..3e958790 100755 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -39,6 +39,7 @@ if(MODREG) { \ a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), w); \ } else { \ + SMREAD(); \ addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, D); \ a = fpu_get_scratch(dyn); \ VLDR128_U12(a, ed, fixedaddress); \ @@ -52,6 +53,7 @@ if(MODREG) { \ a = mmx_get_reg(dyn, ninst, x1, x2, x3, (nextop&7)); \ } else { \ + SMREAD(); \ addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, D); \ a = fpu_get_scratch(dyn); \ VLDR64_U12(a, ed, fixedaddress); \ @@ -60,6 +62,7 @@ #define PUTEM(a) \ if(!MODREG) { \ VSTR64_U12(a, ed, fixedaddress); \ + SMWRITE2(); \ } uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) @@ -108,6 +111,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0x05: INST_NAME("SYSCALL"); + SMEND(); GETIP(addr); STORE_XEMU_CALL(xRIP); CALL_S(x64Syscall, -1); @@ -172,6 +176,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin v0 = sse_get_reg_empty(dyn, ninst, x1, gd); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0); VLDR128_U12(v0, ed, fixedaddress); // no alignment issue with ARMv8 NEON :) + SMWRITE2(); } break; case 0x11: @@ -186,6 +191,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0); VSTR128_U12(v0, ed, fixedaddress); + SMWRITE2(); } break; case 0x12: @@ -198,6 +204,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { INST_NAME("MOVLPS Gx,Ex"); GETGX(v0, 1); + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0); VLD1_64(v0, 0, ed); } @@ -212,11 +219,13 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0); VST1_64(v0, 0, ed); // better to use VST1 than VSTR_64, to avoid NEON->VFPU transfert I assume + SMWRITE2(); } break; case 0x14: INST_NAME("UNPCKLPS Gx, Ex"); nextop = F8; + SMREAD(); GETEX(q0, 0, 0); GETGX(v0, 1); VZIP1Q_32(v0, v0, q0); @@ -224,6 +233,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0x15: INST_NAME("UNPCKHPS Gx, Ex"); nextop = F8; + SMREAD(); GETEX(q0, 0, 0); GETGX(v0, 1); VZIP2Q_32(v0, v0, q0); @@ -237,6 +247,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin VMOVeD(v0, 1, v1, 0); } else { INST_NAME("MOVHPS Gx,Ex"); + SMREAD(); GETGX(v0, 1); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0); VLD1_64(v0, 1, ed); @@ -252,6 +263,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0); VST1_64(v0, 1, ed); + SMWRITE2(); } break; case 0x18: @@ -303,6 +315,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin VMOVQ(v0, v1); } else { v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0); VLDR128_U12(v0, ed, fixedaddress); } @@ -319,6 +332,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0); VSTR128_U12(v0, ed, fixedaddress); + SMWRITE2(); } break; @@ -348,6 +362,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin s0 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3), 0); } else { s0 = fpu_get_scratch(dyn); + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0); VLDR32_U12(s0, ed, fixedaddress); } @@ -407,6 +422,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(MODREG) { REVxw(gd, xRAX+(nextop&7)+(rex.b<<3)); } else { + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, 0); LDRxw_U12(gd, ed, fixedaddress); REVxw(gd, gd); @@ -419,6 +435,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(MODREG) { // reg <= reg REVxw(xRAX+(nextop&7)+(rex.b<<3), gd); } else { // mem <= reg + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, 0); REVxw(x1, gd); STRxw_U12(x1, ed, fixedaddress); @@ -491,6 +508,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin BFIx(gd, x1, 3, 1); } else { // EX is memory + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, (0xfff<<3)-8, 7, rex, NULL, 0, 0); LDRx_U12(x1, ed, fixedaddress+0); LSRx(x1, x1, 31); @@ -1034,6 +1052,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { VSTR32_U12(v0, ed, fixedaddress); } + SMWRITE2(); } break; case 0x7F: @@ -1046,6 +1065,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0); VSTR64_U12(v0, ed, fixedaddress); + SMWRITE2(); } break; @@ -1096,6 +1116,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { \ addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff, 0, rex, NULL, 0, 0); \ STRB_U12(x3, ed, fixedaddress); \ + SMWRITE(); \ } GOCOND(0x90, "SET", "Eb"); @@ -1115,6 +1136,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(MODREG) { ed = xRAX+(nextop&7)+(rex.b<<3); } else { + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, 0); ASRxw(x1, gd, 5+rex.w); // r1 = (gd>>5) ADDx_REG_LSL(x3, wback, x1, 2+rex.w); //(&ed)+=r1*4; @@ -1162,6 +1184,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin ed = xRAX+(nextop&7)+(rex.b<<3); wback = 0; } else { + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, 0); ASRxw(x1, gd, 5+rex.w); // r1 = (gd>>5) ADDx_REG_LSL(x3, wback, x1, 2+rex.w); //(&ed)+=r1*4; @@ -1187,6 +1210,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin CSELxw(ed, ed, x4, cNE); if(wback) { STRxw_U12(ed, wback, fixedaddress); + SMWRITE(); } break; case 0xAC: @@ -1216,12 +1240,15 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin nextop = F8; if((nextop&0xF8)==0xE8) { INST_NAME("LFENCE"); + SMDMB(); } else if((nextop&0xF8)==0xF0) { INST_NAME("MFENCE"); + SMDMB(); } else if((nextop&0xF8)==0xF8) { INST_NAME("SFENCE"); + SMDMB(); } else { switch((nextop>>3)&7) { case 0: @@ -1320,6 +1347,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin ed = xRAX+(nextop&7)+(rex.b<<3); wback = 0; } else { + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, 0); ASRxw(x1, gd, 5+rex.w); // r1 = (gd>>5) ADDx_REG_LSL(x3, wback, x1, 2+rex.w); //(&ed)+=r1*4; @@ -1345,6 +1373,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin CSELxw(ed, ed, x4, cEQ); if(wback) { STRxw_U12(ed, wback, fixedaddress); + SMWRITE(); } break; @@ -1363,6 +1392,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } UBFXxw(gd, eb1, eb2*8, 8); } else { + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff, 0, rex, NULL, 0, 0); LDRB_U12(gd, ed, fixedaddress); } @@ -1375,6 +1405,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin ed = xRAX+(nextop&7)+(rex.b<<3); UBFXxw(gd, ed, 0, 16); } else { + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<1, 1, rex, NULL, 0, 0); LDRH_U12(gd, ed, fixedaddress); } @@ -1391,6 +1422,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(MODREG) { ed = xRAX+(nextop&7)+(rex.b<<3); } else { + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, 1); LDRxw_U12(x1, wback, fixedaddress); ed = x1; @@ -1407,6 +1439,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin ed = xRAX+(nextop&7)+(rex.b<<3); wback = 0; } else { + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, 1); LDRxw_U12(x1, wback, fixedaddress); ed = x1; @@ -1419,6 +1452,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin EORxw_REG_LSL(ed, ed, x4, u8); if(wback) { STRxw_U12(ed, wback, fixedaddress); + SMWRITE(); } MARK3; break; @@ -1430,6 +1464,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin ed = xRAX+(nextop&7)+(rex.b<<3); wback = 0; } else { + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, 1); LDRxw_U12(x1, wback, fixedaddress); ed = x1; @@ -1442,6 +1477,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin EORxw_REG_LSL(ed, ed, x4, u8); if(wback) { STRxw_U12(ed, wback, fixedaddress); + SMWRITE(); } MARK3; break; @@ -1453,6 +1489,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin ed = xRAX+(nextop&7)+(rex.b<<3); wback = 0; } else { + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, 1); LDRxw_U12(x1, wback, fixedaddress); ed = x1; @@ -1464,6 +1501,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin EORxw_REG_LSL(ed, ed, x4, u8); if(wback) { STRxw_U12(ed, wback, fixedaddress); + SMWRITE(); } MARK3; break; @@ -1481,6 +1519,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin ed = xRAX+(nextop&7)+(rex.b<<3); wback = 0; } else { + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, 0); ASRxw(x1, gd, 5+rex.w); // r1 = (gd>>5) ADDx_REG_LSL(x3, wback, x1, 2+rex.w); //(&ed)+=r1*4; @@ -1505,6 +1544,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin EORxw_REG(ed, ed, x4); if(wback) { STRxw_U12(ed, wback, fixedaddress); + SMWRITE(); } break; case 0xBC: @@ -1553,6 +1593,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } SBFXxw(gd, wback, wb2, 8); } else { + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, 0xfff, 0, rex, NULL, 0, 0); LDRSBxw_U12(gd, ed, fixedaddress); } @@ -1565,6 +1606,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin ed = xRAX+(nextop&7)+(rex.b<<3); SXTHxw(gd, ed); } else { + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, 0xfff<<1, 1, rex, NULL, 0, 0); LDRSHxw_U12(gd, ed, fixedaddress); } @@ -1663,6 +1705,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin VMOVeS(d0, i, v1, (u8>>(i*2)&3)); } } else { + SMREAD(); for(int i=2; i<4; ++i) { ADDx_U12(x2, ed, (u8>>(i*2)&3)*4); VLD1_32(d0, i, x2); diff --git a/src/dynarec/arm64/dynarec_arm64_64.c b/src/dynarec/arm64/dynarec_arm64_64.c index ff735cff..6d4ac7d6 100644 --- a/src/dynarec/arm64/dynarec_arm64_64.c +++ b/src/dynarec/arm64/dynarec_arm64_64.c @@ -91,6 +91,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { grab_segdata(dyn, addr, ninst, x4, seg); v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0); ADDx_REG(x4, x4, ed); VLDR64_U12(v0, x4, fixedaddress); // upper part reseted @@ -107,6 +108,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { grab_segdata(dyn, addr, ninst, x4, seg); v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0); ADDx_REG(x4, x4, ed); VLDR32_U12(v0, x4, fixedaddress); @@ -132,6 +134,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0); ADDx_REG(x4, x4, ed); VSTR128_U12(v0, x4, fixedaddress); + SMWRITE2(); } break; case 1: @@ -148,6 +151,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0); ADDx_REG(x4, x4, ed); VSTR64_U12(v0, x4, fixedaddress); + SMWRITE2(); } break; case 2: @@ -163,6 +167,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0); ADDx_REG(x4, x4, ed); VSTR32_U12(v0, x4, fixedaddress); + SMWRITE2(); } break; default: @@ -182,6 +187,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin VMOVQ(v0, v1); } else { grab_segdata(dyn, addr, ninst, x4, seg); + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0); ADDx_REG(x4, x4, ed); VLDR128_U12(v0, ed, fixedaddress); @@ -242,6 +248,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } UBFXxw(gd, eb1, eb2*8, 8); } else { + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, NULL, 0, 0); LDRB_REG(gd, ed, x4); } @@ -301,6 +308,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SXTWx(gd, xRAX+(nextop&7)+(rex.b<<3)); } else { // mem <= reg grab_segdata(dyn, addr, ninst, x4, seg); + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, NULL, 0, 0); LDRSW_REG(gd, ed, x4); } @@ -309,6 +317,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOVw_REG(gd, xRAX+(nextop&7)+(rex.b<<3)); } else { // mem <= reg grab_segdata(dyn, addr, ninst, x4, seg); + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, NULL, 0, 0); LDRw_REG(gd, ed, x4); } @@ -510,6 +519,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } } else { grab_segdata(dyn, addr, ninst, x4, seg); + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0, rex, NULL, 0, 0); LDRB_REG(x4, wback, x4); ed = x4; @@ -526,6 +536,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { // mem <= reg addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, NULL, 0, 0); STRxw_REG(gd, ed, x4); + SMWRITE2(); } break; @@ -537,6 +548,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(MODREG) { // reg <= reg MOVxw_REG(gd, xRAX+(nextop&7)+(rex.b<<3)); } else { // mem <= reg + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, NULL, 0, 0); LDRxw_REG(gd, ed, x4); } @@ -563,6 +575,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin u8 = F8; MOV32w(x3, u8); STRB_REG(x3, ed, x4); + SMWRITE2(); } break; case 0xC7: @@ -578,6 +591,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin i64 = F32S; MOV64xw(x3, i64); STRxw_REG(x3, ed, x4); + SMWRITE2(); } break; @@ -967,7 +981,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 4: // JMP Ed INST_NAME("JMP Ed"); - BARRIER(1); + BARRIER(BARRIER_FLOAT); GETEDOx(x6, 0); jump_to_next(dyn, 0, ed, ninst); *need_epilog = 0; diff --git a/src/dynarec/arm64/dynarec_arm64_66.c b/src/dynarec/arm64/dynarec_arm64_66.c index f46da3f9..493544ea 100755 --- a/src/dynarec/arm64/dynarec_arm64_66.c +++ b/src/dynarec/arm64/dynarec_arm64_66.c @@ -444,7 +444,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { GETGD; addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, LOCK_LOCK, 0, 0); - DMB_ISH(); + SMDMB(); TSTx_mask(ed, 1, 0, 0); // mask=1 B_MARK(cNE); MARKLOCK; @@ -456,7 +456,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin LDRH_U12(x1, ed, 0); STRH_U12(gd, ed, 0); MARK2; - DMB_ISH(); + SMDMB(); BFIx(gd, x1, 0, 16); } break; @@ -473,10 +473,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<1, 1, rex, &lock, 0, 0); STRH_U12(gd, ed, fixedaddress); - if(lock || (box64_dynarec_strongmem && - (dyn->insts[ninst].x64.barrier || box64_dynarec_strongmem>1 || (dyn->insts[ninst+1].x64.barrier || dyn->insts[ninst+1].x64.jmp)))) { - DMB_ISH(); - } + SMWRITELOCK(lock); } break; case 0x8B: @@ -490,10 +487,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } } else { addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<1, 1, rex, &lock, 0, 0); - if(lock || (box64_dynarec_strongmem && - (dyn->insts[ninst].x64.barrier || box64_dynarec_strongmem>1 || (dyn->insts[ninst+1].x64.barrier || dyn->insts[ninst+1].x64.jmp)))) { - DMB_ISH(); - } + SMREADLOCK(lock); LDRH_U12(x1, ed, fixedaddress); BFIx(gd, x1, 0, 16); } @@ -537,6 +531,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin u64 = F64; MOV64x(x1, u64); STRH_U12(xRAX, x1, 0); + SMWRITE(); break; case 0xA5: @@ -714,10 +709,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin u16 = F16; MOV32w(x1, u16); STRH_U12(x1, ed, fixedaddress); - if(lock || (box64_dynarec_strongmem && - (dyn->insts[ninst].x64.barrier || box64_dynarec_strongmem>1 || (dyn->insts[ninst+1].x64.barrier || dyn->insts[ninst+1].x64.jmp)))) { - DMB_ISH(); - } + SMWRITELOCK(lock); } break; diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index 41c583da..cad47b37 100755 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -27,6 +27,7 @@ if(MODREG) { \ a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), w); \ } else { \ + SMREAD(); \ addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, D); \ a = fpu_get_scratch(dyn); \ VLDR128_U12(a, ed, fixedaddress); \ @@ -81,6 +82,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n v0 = sse_get_reg_empty(dyn, ninst, x1, gd); VMOVQ(v0, v1); } else { + SMREAD(); v0 = sse_get_reg_empty(dyn, ninst, x1, gd); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0); VLDR128_U12(v0, ed, fixedaddress); @@ -97,6 +99,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } else { addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0); VSTR128_U12(v0, ed, fixedaddress); + SMWRITE2(); } break; case 0x12: @@ -108,6 +111,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n DEFAULT; return addr; } + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0); VLD1_64(v0, 0, ed); break; @@ -122,6 +126,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0); VST1_64(v0, 0, ed); + SMWRITE2(); break; case 0x14: INST_NAME("UNPCKLPD Gx, Ex"); @@ -131,6 +136,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0); VMOVeD(v0, 1, v1, 0); } else { + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0); VLD1_64(v0, 1, ed); } @@ -144,6 +150,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0); VMOVeD(v0, 1, v1, 1); } else { + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0); v1 = fpu_get_scratch(dyn); ADDx_U12(ed, ed, 8); @@ -159,6 +166,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n DEFAULT; return addr; } + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0); VLD1_64(v0, 1, ed); break; @@ -173,6 +181,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0); VST1_64(v0, 1, ed); + SMWRITE2(); break; case 0x1F: @@ -191,6 +200,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n v0 = sse_get_reg_empty(dyn, ninst, x1, gd); VMOVQ(v0, v1); } else { + SMREAD(); v0 = sse_get_reg_empty(dyn, ninst, x1, gd); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0); VLDR128_U12(v0, ed, fixedaddress); @@ -208,6 +218,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } else { addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0); VSTR128_U12(v0, ed, fixedaddress); + SMWRITE2(); } break; @@ -772,6 +783,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n if(MODREG) { \ ed = xRAX+(nextop&7)+(rex.b<<3); \ } else { \ + SMREAD(); \ addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<1, 1, rex, NULL, 0, 0); \ LDRH_U12(x1, ed, fixedaddress); \ ed = x1; \ @@ -1113,6 +1125,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n VMOVQ(v0, v1); } else { GETGX_empty(v0); + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0); VLDR128_U12(v0, ed, fixedaddress); } @@ -1187,6 +1200,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } } else { GETGX_empty(v0); + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 1); u8 = F8; if (u8) { @@ -1218,6 +1232,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } if(!MODREG) { VSTR128_U12(q0, ed, fixedaddress); + SMWRITE2(); } } break; @@ -1231,6 +1246,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } if(!MODREG) { VSTR128_U12(q0, ed, fixedaddress); + SMWRITE2(); } break; case 6: @@ -1245,6 +1261,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } if(!MODREG) { VSTR128_U12(q0, ed, fixedaddress); + SMWRITE2(); } } break; @@ -1268,6 +1285,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } if(!MODREG) { VSTR128_U12(q0, ed, fixedaddress); + SMWRITE2(); } } break; @@ -1281,6 +1299,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } if(!MODREG) { VSTR128_U12(q0, ed, fixedaddress); + SMWRITE2(); } break; case 6: @@ -1295,6 +1314,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } if(!MODREG) { VSTR128_U12(q0, ed, fixedaddress); + SMWRITE2(); } } break; @@ -1317,6 +1337,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } if(!MODREG) { VSTR128_U12(q0, ed, fixedaddress); + SMWRITE2(); } } break; @@ -1334,6 +1355,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } if(!MODREG) { VSTR128_U12(q0, ed, fixedaddress); + SMWRITE2(); } } break; @@ -1349,6 +1371,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } if(!MODREG) { VSTR128_U12(q0, ed, fixedaddress); + SMWRITE2(); } } break; @@ -1366,6 +1389,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } if(!MODREG) { VSTR128_U12(q0, ed, fixedaddress); + SMWRITE2(); } } break; @@ -1431,6 +1455,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } else { addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0); VSTR64_U12(v0, ed, fixedaddress); + SMWRITE2(); } } else { if(MODREG) { @@ -1439,6 +1464,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } else { addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0); VSTR32_U12(v0, ed, fixedaddress); + SMWRITE2(); } } break; @@ -1452,6 +1478,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } else { addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0); VSTR128_U12(v0, ed, fixedaddress); + SMWRITE2(); } break; @@ -1464,6 +1491,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n if(MODREG) { ed = xRAX+(nextop&7)+(rex.b<<3); } else { + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<2, (1<<2)-1, rex, NULL, 0, 0); SBFXw(x1, gd, 4, 12); // r1 = (gw>>4) ADDx_REG_LSL(x3, wback, x1, 1); //(&ed)+=r1*2; @@ -1566,6 +1594,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n ed = xRAX+(nextop&7)+(rex.b<<3); wback = 0; } else { + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<2, (1<<2)-1, rex, NULL, 0, 0); SBFXw(x4, gd, 4, 12); // r1 = (gw>>4) ADDx_REG_LSL(x3, wback, x4, 1); //(&ed)+=r1*2; @@ -1583,6 +1612,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n EORx_REG(ed, ed, x1); if(wback) { STRH_U12(ed, wback, fixedaddress); + SMWRITE(); } break; @@ -1600,6 +1630,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } UBFXxw(x1, eb1, eb2*8, 8); } else { + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff, 0, rex, NULL, 0, 0); LDRB_U12(x1, ed, fixedaddress); } @@ -1613,6 +1644,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n eb1 = xRAX+(nextop&7)+(rex.b<<3); UBFXxw(x1, eb1, 0, 16); } else { + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff>>1, 1, rex, NULL, 0, 0); LDRH_U12(x1, ed, fixedaddress); } @@ -1631,6 +1663,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n ed = xRAX+(nextop&7)+(rex.b<<3); wback = 0; } else { + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<2, (1<<2)-1, rex, NULL, 0, 0); SBFXw(x4, gd, 4, 12); // r1 = (gw>>4) ADDx_REG_LSL(x3, wback, x4, 1); //(&ed)+=r1*2; @@ -1647,6 +1680,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n EORx_REG(ed, ed, x1); if(wback) { STRH_U12(ed, wback, fixedaddress); + SMWRITE(); } break; case 0xBC: @@ -1699,6 +1733,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } SBFXw(x1, eb1, eb2, 8); } else { + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff, 0, rex, NULL, 0, 0); LDRSBw_U12(x1, ed, fixedaddress); } @@ -1746,6 +1781,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n ed = xRAX+(nextop&7)+(rex.b<<3); VMOVQHfrom(v0, u8, ed); } else { + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0, rex, NULL, 0, 1); u8 = (F8)&7; VLD1_16(v0, u8, wback); @@ -1760,6 +1796,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n u8 = (F8)&7; VMOVHto(gd, v0, u8); } else { + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0, rex, NULL, 0, 1); u8 = (F8)&7; LDRH_U12(gd, wback, u8*2); @@ -1858,6 +1895,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } else { addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0); VSTR64_U12(v0, ed, fixedaddress); + SMWRITE2(); } break; case 0xD7: diff --git a/src/dynarec/arm64/dynarec_arm64_6664.c b/src/dynarec/arm64/dynarec_arm64_6664.c index 56697b22..1ec70948 100644 --- a/src/dynarec/arm64/dynarec_arm64_6664.c +++ b/src/dynarec/arm64/dynarec_arm64_6664.c @@ -65,6 +65,7 @@ uintptr_t dynarec64_6664(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n v1 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3), 0); } else { grab_segdata(dyn, addr, ninst, x4, _FS); + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0); v1 = fpu_get_scratch(dyn); \ VLDR128_REG(v1, ed, x4); @@ -84,6 +85,7 @@ uintptr_t dynarec64_6664(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n grab_segdata(dyn, addr, ninst, x4, _FS); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0); VSTR64_REG(v0, ed, x4); + SMWRITE(); } break; @@ -113,6 +115,7 @@ uintptr_t dynarec64_6664(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } else { STRH_REG(gd, ed, x4); } + SMWRITE(); } break; @@ -131,6 +134,7 @@ uintptr_t dynarec64_6664(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } } else { // mem <= reg grab_segdata(dyn, addr, ninst, x4, _FS); + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, NULL, 0, 0); if(rex.w) { LDRx_REG(gd, ed, x4); diff --git a/src/dynarec/arm64/dynarec_arm64_66f0.c b/src/dynarec/arm64/dynarec_arm64_66f0.c index 01a739b6..bfdf24ee 100644 --- a/src/dynarec/arm64/dynarec_arm64_66f0.c +++ b/src/dynarec/arm64/dynarec_arm64_66f0.c @@ -57,7 +57,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; GETGW(x5); - DMB_ISH(); + SMDMB(); if(MODREG) { ed = xRAX+(nextop&7)+(rex.b<<3); UXTHw(x6, ed); @@ -71,7 +71,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n STLXRH(x3, x1, wback); CBNZx_MARKLOCK(x3); } - DMB_ISH(); + SMDMB(); break; case 0x0F: @@ -83,7 +83,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; GETGD; - DMB_ISH(); + SMDMB(); UXTHw(x6, xRAX); if(MODREG) { ed = xRAX+(nextop&7)+(rex.b<<3); @@ -122,7 +122,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n // Common part (and fallback for EAX != Ed) UFLAG_IF {emit_cmp32(dyn, ninst, rex, x6, x1, x3, x4, x5);} BFIx(xRAX, x1, 0, 16); - DMB_ISH(); + SMDMB(); break; case 0xC1: @@ -131,7 +131,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n nextop = F8; gd = xRAX+((nextop&0x38)>>3)+(rex.r<<3); UXTHx(x5, gd); - DMB_ISH(); + SMDMB(); if(MODREG) { ed = xRAX+(nextop&7)+(rex.b<<3); BFIx(gd, ed, 0, 16); @@ -150,7 +150,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } BFIx(gd, x1, 0, 16); } - DMB_ISH(); + SMDMB(); break; default: @@ -161,7 +161,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n case 0x81: case 0x83: nextop = F8; - DMB_ISH(); + SMDMB(); switch((nextop>>3)&7) { case 0: //ADD if(opcode==0x81) { @@ -355,7 +355,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } break; } - DMB_ISH(); + SMDMB(); break; case 0xFF: @@ -365,7 +365,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n case 0: // INC Ew INST_NAME("LOCK INC Ew"); SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING); - DMB_ISH(); + SMDMB(); if(MODREG) { ed = xRAX+(nextop&7)+(rex.b<<3); UXTHw(x6, ed); @@ -379,12 +379,12 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n STLXRH(x3, x1, wback); CBNZx_MARKLOCK(x3); } - DMB_ISH(); + SMDMB(); break; case 1: //DEC Ew INST_NAME("LOCK DEC Ew"); SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING); - DMB_ISH(); + SMDMB(); if(MODREG) { ed = xRAX+(nextop&7)+(rex.b<<3); UXTHw(x6, ed); @@ -398,7 +398,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n STLXRH(x3, x1, wback); CBNZx_MARKLOCK(x3); } - DMB_ISH(); + SMDMB(); break; default: DEFAULT; diff --git a/src/dynarec/arm64/dynarec_arm64_67.c b/src/dynarec/arm64/dynarec_arm64_67.c index aeae1dd3..1b35d645 100755 --- a/src/dynarec/arm64/dynarec_arm64_67.c +++ b/src/dynarec/arm64/dynarec_arm64_67.c @@ -186,6 +186,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin s0 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3), 0); } else { s0 = fpu_get_scratch(dyn); + SMREAD(); addr = geted32(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0); VLDR32_U12(s0, ed, fixedaddress); } @@ -204,6 +205,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin VMOV(v0, v1); } else { v0 = mmx_get_reg_empty(dyn, ninst, x1, x2, x3, gd); + SMREAD(); addr = geted32(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0); VLDR64_U12(v0, ed, fixedaddress); } @@ -219,6 +221,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { addr = geted32(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0); VSTR64_U12(v0, ed, fixedaddress); + SMWRITE(); } break; @@ -633,7 +636,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { // mem <= reg addr = geted32(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, &lock, 0, 0); STRxw_U12(gd, ed, fixedaddress); - if(lock) {DMB_ISH();} + SMWRITELOCK(lock); } break; @@ -645,7 +648,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOVxw_REG(gd, xRAX+(nextop&7)+(rex.b<<3)); } else { addr = geted32(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, &lock, 0, 0); - if(lock) {DMB_ISH();} + SMREADLOCK(lock); LDRxw_U12(gd, ed, fixedaddress); } break; @@ -750,7 +753,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin i64 = F32S; MOV64xw(x3, i64); STRxw_U12(x3, ed, fixedaddress); - if(lock) {DMB_ISH();} + SMWRITELOCK(lock); } break; diff --git a/src/dynarec/arm64/dynarec_arm64_f0.c b/src/dynarec/arm64/dynarec_arm64_f0.c index d13c91b2..b333d079 100644 --- a/src/dynarec/arm64/dynarec_arm64_f0.c +++ b/src/dynarec/arm64/dynarec_arm64_f0.c @@ -58,7 +58,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("LOCK ADD Eb, Gb"); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; - DMB_ISH(); + SMDMB(); GETGB(x2); if((nextop&0xC0)==0xC0) { if(rex.rex) { @@ -80,14 +80,14 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin STLXRB(x4, x1, wback); CBNZx_MARKLOCK(x4); } - DMB_ISH(); + SMDMB(); break; case 0x01: INST_NAME("LOCK ADD Ed, Gd"); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; GETGD; - DMB_ISH(); + SMDMB(); if((nextop&0xC0)==0xC0) { ed = xRAX+(nextop&7)+(rex.b<<3); emit_add32(dyn, ninst, rex, ed, gd, x3, x4); @@ -99,14 +99,14 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin STLXRxw(x3, x1, wback); CBNZx_MARKLOCK(x3); } - DMB_ISH(); + SMDMB(); break; case 0x08: INST_NAME("LOCK OR Eb, Gb"); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; - DMB_ISH(); + SMDMB(); GETGB(x2); if((nextop&0xC0)==0xC0) { if(rex.rex) { @@ -128,14 +128,14 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin STLXRB(x4, x1, wback); CBNZx_MARKLOCK(x4); } - DMB_ISH(); + SMDMB(); break; case 0x09: INST_NAME("LOCK OR Ed, Gd"); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; GETGD; - DMB_ISH(); + SMDMB(); if(MODREG) { ed = xRAX+(nextop&7)+(rex.b<<3); emit_or32(dyn, ninst, rex, ed, gd, x3, x4); @@ -147,7 +147,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin STLXRxw(x3, x1, wback); CBNZx_MARKLOCK(x3); } - DMB_ISH(); + SMDMB(); break; case 0x0F: @@ -160,7 +160,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin nextop = F8; GETGB(x1); UBFXx(x6, xRAX, 0, 8); - DMB_ISH(); + SMDMB(); if(MODREG) { if(rex.rex) { wback = xRAX+(nextop&7)+(rex.b<<3); @@ -196,14 +196,14 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin UFLAG_IF {emit_cmp32(dyn, ninst, rex, x6, x2, x3, x4, x5);} BFIx(xRAX, x2, 0, 8); // upper par of RAX will be erase on 32bits, no mater what } - DMB_ISH(); + SMDMB(); break; case 0xB1: INST_NAME("LOCK CMPXCHG Ed, Gd"); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; GETGD; - DMB_ISH(); + SMDMB(); if(MODREG) { ed = xRAX+(nextop&7)+(rex.b<<3); wback = 0; @@ -244,7 +244,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin UFLAG_IF {emit_cmp32(dyn, ninst, rex, xRAX, x1, x3, x4, x5);} MOVxw_REG(xRAX, x1); // upper par of RAX will be erase on 32bits, no mater what } - DMB_ISH(); + SMDMB(); break; case 0xC1: @@ -252,7 +252,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; GETGD; - DMB_ISH(); + SMDMB(); if(MODREG) { ed = xRAX+(nextop&7)+(rex.b<<3); MOVxw_REG(x1, ed); @@ -284,7 +284,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } MOVxw_REG(gd, x1); } - DMB_ISH(); + SMDMB(); break; case 0xC7: @@ -292,7 +292,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SETFLAGS(X_ZF, SF_SUBSET); nextop = F8; addr = geted(dyn, addr, ninst, nextop, &wback, x1, &fixedaddress, 0, 0, rex, LOCK_LOCK, 0, 0); - DMB_ISH(); + SMDMB(); MARKLOCK; LDAXPxw(x2, x3, wback); CMPSxw_REG(xRAX, x2); @@ -308,7 +308,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOVxw_REG(xRDX, x3); MOV32w(x1, 0); MARK3; - DMB_ISH(); + SMDMB(); BFIw(xFlags, x1, F_ZF, 1); break; @@ -322,7 +322,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; GETGD; - DMB_ISH(); + SMDMB(); if(MODREG) { ed = xRAX+(nextop&7)+(rex.b<<3); emit_and32(dyn, ninst, rex, ed, gd, x3, x4); @@ -334,7 +334,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin STLXRxw(x3, x1, wback); CBNZx_MARKLOCK(x3); } - DMB_ISH(); + SMDMB(); break; case 0x29: @@ -342,7 +342,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; GETGD; - DMB_ISH(); + SMDMB(); if(MODREG) { ed = xRAX+(nextop&7)+(rex.b<<3); emit_sub32(dyn, ninst, rex, ed, gd, x3, x4); @@ -354,7 +354,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin STLXRxw(x3, x1, wback); CBNZx_MARKLOCK(x3); } - DMB_ISH(); + SMDMB(); break; case 0x66: @@ -362,7 +362,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0x80: nextop = F8; - DMB_ISH(); + SMDMB(); switch((nextop>>3)&7) { case 0: //ADD INST_NAME("ADD Eb, Ib"); @@ -521,12 +521,12 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin default: DEFAULT; } - DMB_ISH(); + SMDMB(); break; case 0x81: case 0x83: nextop = F8; - DMB_ISH(); + SMDMB(); switch((nextop>>3)&7) { case 0: //ADD if(opcode==0x81) { @@ -550,7 +550,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin emit_add32c(dyn, ninst, rex, x1, i64, x3, x4, x5); STLXRxw(x3, x1, wback); CBNZx_MARKLOCK(x3); - DMB_ISH(); + SMDMB(); B_NEXT_nocond; MARK; // unaligned! also, not enough LDRxw_U12(x1, wback, 0); @@ -658,7 +658,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin emit_sub32c(dyn, ninst, rex, x1, i64, x3, x4, x5); STLXRxw(x3, x1, wback); CBNZx_MARKLOCK(x3); - DMB_ISH(); + SMDMB(); B_NEXT_nocond; MARK; // unaligned! also, not enough LDRxw_U12(x1, wback, 0); @@ -703,7 +703,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } break; } - DMB_ISH(); + SMDMB(); break; case 0x86: @@ -726,7 +726,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin BFIx(gb1, x1, gb2*8, 8); BFIx(eb1, x4, eb2*8, 8); } else { - DMB_ISH(); + SMDMB(); GETGB(x4); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, LOCK_LOCK, 0, 0); MARKLOCK; @@ -735,7 +735,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin // do the swap 14 -> strb(ed), 1 -> gd STLXRB(x3, x4, ed); CBNZx_MARKLOCK(x3); - DMB_ISH(); + SMDMB(); BFIx(gb1, x1, gb2*8, 8); } break; @@ -750,7 +750,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOVxw_REG(ed, x1); } else { GETGD; - DMB_ISH(); + SMDMB(); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, LOCK_LOCK, 0, 0); TSTx_mask(ed, 1, 0, 1+rex.w); // mask=3 or 7 B_MARK(cNE); @@ -763,7 +763,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin LDRxw_U12(x1, ed, 0); STRxw_U12(gd, ed, 0); MARK2; - DMB_ISH(); + SMDMB(); MOVxw_REG(gd, x1); } break; @@ -775,7 +775,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0: // INC Ed INST_NAME("LOCK INC Ed"); SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING); - DMB_ISH(); + SMDMB(); if(MODREG) { ed = xRAX+(nextop&7)+(rex.b<<3); emit_inc32(dyn, ninst, rex, ed, x3, x4); @@ -798,12 +798,12 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin CBNZw_MARK(x3); STRxw_U12(x1, wback, 0); } - DMB_ISH(); + SMDMB(); break; case 1: //DEC Ed INST_NAME("LOCK DEC Ed"); SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING); - DMB_ISH(); + SMDMB(); if(MODREG) { ed = xRAX+(nextop&7)+(rex.b<<3); emit_dec32(dyn, ninst, rex, ed, x3, x4); @@ -826,7 +826,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin CBNZw_MARK(x3); STRxw_U12(x1, wback, 0); } - DMB_ISH(); + SMDMB(); break; default: DEFAULT; diff --git a/src/dynarec/arm64/dynarec_arm64_f20f.c b/src/dynarec/arm64/dynarec_arm64_f20f.c index 7cb87af3..ff69ad54 100755 --- a/src/dynarec/arm64/dynarec_arm64_f20f.c +++ b/src/dynarec/arm64/dynarec_arm64_f20f.c @@ -27,6 +27,7 @@ if(MODREG) { \ a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), w); \ } else { \ + SMREAD(); \ a = fpu_get_scratch(dyn); \ addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, D); \ VLDR64_U12(a, ed, fixedaddress); \ @@ -81,6 +82,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n d0 = sse_get_reg(dyn, ninst, x1, ed, 0); VMOVeD(v0, 0, d0, 0); } else { + SMREAD(); v0 = sse_get_reg_empty(dyn, ninst, x1, gd); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0); VLDR64_U12(v0, ed, fixedaddress); // upper part reseted @@ -98,6 +100,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } else { addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0); VSTR64_U12(v0, ed, fixedaddress); + SMWRITE2(); } break; case 0x12: @@ -109,6 +112,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n v0 = sse_get_reg_empty(dyn, ninst, x1, gd); VMOVeD(v0, 0, d0, 0); } else { + SMREAD(); v0 = sse_get_reg_empty(dyn, ninst, x1, gd); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0); VLDR64_U12(v0, ed, fixedaddress); @@ -413,6 +417,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n VMOVQ(v0, v1); } else { v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 7, rex, NULL, 0, 0); VLDR128_U12(v0, ed, fixedaddress); } diff --git a/src/dynarec/arm64/dynarec_arm64_f30f.c b/src/dynarec/arm64/dynarec_arm64_f30f.c index 9619f260..d407bb72 100755 --- a/src/dynarec/arm64/dynarec_arm64_f30f.c +++ b/src/dynarec/arm64/dynarec_arm64_f30f.c @@ -27,6 +27,7 @@ if(MODREG) { \ a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), w); \ } else { \ + SMREAD(); \ a = fpu_get_scratch(dyn); \ addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, D); \ VLDR32_U12(a, ed, fixedaddress); \ @@ -37,6 +38,7 @@ if(MODREG) { \ a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), w); \ } else { \ + SMREAD(); \ addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, D); \ a = fpu_get_scratch(dyn); \ VLDR128_U12(a, ed, fixedaddress); \ @@ -85,6 +87,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n VMOVeS(v0, 0, q0, 0); } else { v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0); VLDR32_U12(v0, ed, fixedaddress); } @@ -100,6 +103,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } else { addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0); VSTR32_U12(v0, ed, fixedaddress); + SMWRITE2(); } break; case 0x12: @@ -108,6 +112,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n if(MODREG) { q1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0); } else { + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0); q1 = fpu_get_scratch(dyn); VLDR128_U12(q1, ed, fixedaddress); @@ -122,6 +127,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n if(MODREG) { q1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0); } else { + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0); q1 = fpu_get_scratch(dyn); VLDR128_U12(q1, ed, fixedaddress); @@ -313,6 +319,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n VMOVQ(v0, v1); } else { GETGX_empty(v0); + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0); VLDR128_U12(v0, ed, fixedaddress); } @@ -348,6 +355,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n FMOVD(v0, v1); } else { GETGX_empty(v0); + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0); VLDR64_U12(v0, ed, fixedaddress); } @@ -362,6 +370,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } else { addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0); VSTR128_U12(v0, ed, fixedaddress); + SMWRITE2(); } break; @@ -424,6 +433,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n VEORQ(v0, v0, v0); // usefull? VMOV(v0, v1); } else { + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0); VLDR64_U12(v0, ed, fixedaddress); } @@ -437,6 +447,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0); } else { v1 = fpu_get_scratch(dyn); + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0); VLDR64_U12(v1, ed, fixedaddress); } diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c index d38c8078..d3d37a0c 100755 --- a/src/dynarec/arm64/dynarec_arm64_helper.c +++ b/src/dynarec/arm64/dynarec_arm64_helper.c @@ -389,6 +389,7 @@ void jump_to_epilog(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst) GETIP_(ip); } TABLE64(x2, (uintptr_t)arm64_epilog); + SMEND(); BR(x2); } @@ -426,6 +427,7 @@ void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst) #ifdef HAVE_TRACE //MOVx(x3, 15); no access to PC reg #endif + SMEND(); BLR(x2); // save LR... } @@ -445,6 +447,7 @@ void ret_to_epilog(dynarec_arm_t* dyn, int ninst) UBFXx(x3, xRIP, 0, JMPTABL_SHIFT); LDRx_REG_LSL3(x2, x2, x3); MOVx_REG(x1, xRIP); + SMEND(); BLR(x2); // save LR CLEARIP(); } @@ -471,6 +474,7 @@ void retn_to_epilog(dynarec_arm_t* dyn, int ninst, int n) UBFXx(x3, xRIP, 0, JMPTABL_SHIFT); LDRx_REG_LSL3(x2, x2, x3); MOVx_REG(x1, xRIP); + SMEND(); BLR(x2); // save LR CLEARIP(); } @@ -503,6 +507,7 @@ void iret_to_epilog(dynarec_arm_t* dyn, int ninst, int is64bits) MOVx_REG(xRSP, x3); // Ret.... MOV64x(x2, (uintptr_t)arm64_epilog); // epilog on purpose, CS might have changed! + SMEND(); BR(x2); CLEARIP(); } diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index 008c14e5..dcffd85d 100755 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -32,6 +32,30 @@ #define PK64(a) *(uint64_t*)(addr+a) #define PKip(a) *(uint8_t*)(ip+a) +// Strong mem emulation helpers +// Sequence of Read will trigger a DMB on "first" read if strongmem is 2 +// Squence of Write will trigger a DMB on "last" write if strongmem is 1 +// Opcode will read +#define SMREAD() if(!dyn->smread && box64_dynarec_strongmem>1) {DMB_ISH(); dyn->smread=1;} +// Opcode will read with option forced lock +#define SMREADLOCK(lock) if(lock) {SMDMB();} else if(!dyn->smread && box64_dynarec_strongmem>1) {DMB_ISH(); dyn->smread=1;} +// Opcode migh read (depend on nextop) +#define SMMIGHTREAD() if(!MODREG) {SMREAD();} +// Opcode has wrote +#define SMWRITE() dyn->smwrite=1 +// Opcode has wrote (strongmem>1 only) +#define SMWRITE2() if(box64_dynarec_strongmem>1) dyn->smwrite=1 +// Opcode has wrote with option forced lock +#define SMWRITELOCK(lock) if(lock) {SMDMB();} else dyn->smwrite=1 +// Opcode migh have wrote (depend on nextop) +#define SMMIGHTWRITE() if(!MODREG) {SMWRITE();} +// Start of sequence +#define SMSTART() SMEND() +// End of sequence +#define SMEND() if(dyn->smwrite && box64_dynarec_strongmem) {DMB_ISH();} dyn->smwrite=0; dyn->smread=0; +// Force a Data memory barrier (for LOCK: prefix) +#define SMDMB() DMB_ISH(); if(dyn->smwrite) dyn->smwrite=0; dyn->smread=1 + //LOCK_* define #define LOCK_LOCK (int*)1 @@ -42,6 +66,7 @@ ed = xRAX+(nextop&7)+(rex.b<<3); \ wback = 0; \ } else { \ + SMREAD() \ addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, D); \ LDRxw_U12(x1, wback, fixedaddress); \ ed = x1; \ @@ -50,6 +75,7 @@ ed = xRAX+(nextop&7)+(rex.b<<3); \ wback = 0; \ } else { \ + SMREAD(); \ addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, D); \ LDRx_U12(x1, wback, fixedaddress); \ ed = x1; \ @@ -58,6 +84,7 @@ ed = xEAX+(nextop&7)+(rex.b<<3); \ wback = 0; \ } else { \ + SMREAD(); \ addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<2, 3, rex, NULL,0, D); \ LDRw_U12(x1, wback, fixedaddress); \ ed = x1; \ @@ -68,6 +95,7 @@ wb = x1; \ wback = 0; \ } else { \ + SMREAD(); \ addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, D); \ LDRSW_U12(x1, wback, fixedaddress); \ wb = ed = x1; \ @@ -76,6 +104,7 @@ ed = xRAX+(nextop&7)+(rex.b<<3); \ wback = 0; \ } else { \ + SMREAD(); \ addr = geted32(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, D); \ LDRxw_U12(x1, wback, fixedaddress); \ ed = x1; \ @@ -86,6 +115,7 @@ wb = x1; \ wback = 0; \ } else { \ + SMREAD(); \ addr = geted32(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, D); \ LDRSW_U12(x1, wback, fixedaddress); \ wb = ed = x1; \ @@ -95,6 +125,7 @@ ed = xRAX+(nextop&7)+(rex.b<<3); \ wback = 0; \ } else { \ + SMREAD(); \ addr = geted(dyn, addr, ninst, nextop, &wback, (hint==x2)?x1:x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, D); \ LDRxw_U12(hint, wback, fixedaddress); \ ed = hint; \ @@ -103,6 +134,7 @@ ed = xRAX+(nextop&7)+(rex.b<<3); \ wback = 0; \ } else { \ + SMREAD(); \ addr = geted32(dyn, addr, ninst, nextop, &wback, (hint==x2)?x1:x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, D); \ LDRxw_U12(hint, wback, fixedaddress); \ ed = hint; \ @@ -113,6 +145,7 @@ MOVxw_REG(ret, ed); \ wback = 0; \ } else { \ + SMREAD(); \ addr = geted(dyn, addr, ninst, nextop, &wback, hint, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, D); \ ed = ret; \ LDRxw_U12(ed, wback, fixedaddress); \ @@ -122,31 +155,34 @@ MOVxw_REG(ret, ed); \ wback = 0; \ } else { \ + SMREAD(); \ addr = geted32(dyn, addr, ninst, nextop, &wback, hint, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, D); \ ed = ret; \ LDRxw_U12(ed, wback, fixedaddress); \ } // Write back ed in wback (if wback not 0) -#define WBACK if(wback) {STRxw_U12(ed, wback, fixedaddress);} +#define WBACK if(wback) {STRxw_U12(ed, wback, fixedaddress); SMWRITE();} // Write back ed in wback (if wback not 0) -#define WBACKx if(wback) {STRx_U12(ed, wback, fixedaddress);} +#define WBACKx if(wback) {STRx_U12(ed, wback, fixedaddress); SMWRITE();} // Write back ed in wback (if wback not 0) -#define WBACKw if(wback) {STRw_U12(ed, wback, fixedaddress);} +#define WBACKw if(wback) {STRw_U12(ed, wback, fixedaddress); SMWRITE();} //GETEDO can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI #define GETEDO(O, D) if(MODREG) { \ ed = xRAX+(nextop&7)+(rex.b<<3); \ wback = 0; \ } else { \ + SMREAD(); \ addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, NULL, 0, D); \ LDRxw_REG(x1, wback, O); \ ed = x1; \ } -#define WBACKO(O) if(wback) {STRxw_REG(ed, wback, O);} +#define WBACKO(O) if(wback) {STRxw_REG(ed, wback, O); SMWRITE2();} //GETEDOx can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI #define GETEDOx(O, D) if(MODREG) { \ ed = xRAX+(nextop&7)+(rex.b<<3); \ wback = 0; \ } else { \ + SMREAD(); \ addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, NULL, 0, D); \ LDRx_REG(x1, wback, O); \ ed = x1; \ @@ -157,6 +193,7 @@ wb = x1; \ wback = 0; \ } else { \ + SMREAD(); \ addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, NULL, 0, D); \ LDRSW_REG(x1, wback, O); \ wb = ed = x1; \ @@ -176,6 +213,7 @@ ed = i; \ wb1 = 0; \ } else { \ + SMREAD(); \ addr = geted(dyn, addr, ninst, nextop, &wback, w, &fixedaddress, 0xfff<<1, (1<<1)-1, rex, NULL, 0, D); \ LDRH_U12(i, wback, fixedaddress); \ ed = i; \ @@ -188,6 +226,7 @@ ed = i; \ wb1 = 0; \ } else { \ + SMREAD(); \ addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<1, (1<<1)-1, rex, NULL, 0, D); \ LDRH_U12(i, wback, fixedaddress); \ ed = i; \ @@ -200,15 +239,16 @@ ed = i; \ wb1 = 0; \ } else { \ + SMREAD(); \ addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<1, (1<<1)-1, rex, NULL, 0, D); \ LDRSHx_U12(i, wback, fixedaddress);\ ed = i; \ wb1 = 1; \ } // Write ed back to original register / memory -#define EWBACK if(wb1) {STRH_U12(ed, wback, fixedaddress);} else {BFIx(wback, ed, 0, 16);} +#define EWBACK if(wb1) {STRH_U12(ed, wback, fixedaddress); SMWRITE();} else {BFIx(wback, ed, 0, 16);} // Write w back to original register / memory -#define EWBACKW(w) if(wb1) {STRH_U12(w, wback, fixedaddress);} else {BFIx(wback, w, 0, 16);} +#define EWBACKW(w) if(wb1) {STRH_U12(w, wback, fixedaddress); SMWRITE();} else {BFIx(wback, w, 0, 16);} // Write back gd in correct register #define GWBACK BFIx((xRAX+((nextop&0x38)>>3)+(rex.r<<3)), gd, 0, 16); //GETEB will use i for ed, and can use r3 for wback. @@ -225,6 +265,7 @@ wb1 = 0; \ ed = i; \ } else { \ + SMREAD(); \ addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff, 0, rex, NULL, 0, D); \ LDRB_U12(i, wback, fixedaddress); \ wb1 = 1; \ @@ -244,6 +285,7 @@ wb1 = 0; \ ed = i; \ } else { \ + SMREAD(); \ addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0, rex, NULL, 0, D); \ ADDx_REG(x3, wback, i); \ if(wback!=x3) wback = x3; \ @@ -265,6 +307,7 @@ wb1 = 0; \ ed = i; \ } else { \ + SMREAD(); \ addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff, 0, rex, NULL, 0, D); \ LDRSBx_U12(i, wback, fixedaddress); \ wb1 = 1; \ @@ -284,13 +327,14 @@ wb1 = 0; \ ed = i; \ } else { \ + SMREAD(); \ addr = geted32(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff, 0, rex, NULL, 0, D); \ LDRB_U12(i, wback, fixedaddress); \ wb1 = 1; \ ed = i; \ } // Write eb (ed) back to original register / memory -#define EBBACK if(wb1) {STRB_U12(ed, wback, fixedaddress);} else {BFIx(wback, ed, wb2, 8);} +#define EBBACK if(wb1) {STRB_U12(ed, wback, fixedaddress); SMWRITE();} else {BFIx(wback, ed, wb2, 8);} //GETGB will use i for gd #define GETGB(i) if(rex.rex) { \ gb1 = xRAX+((nextop&0x38)>>3)+(rex.r<<3); \ diff --git a/src/dynarec/arm64/dynarec_arm64_pass3.h b/src/dynarec/arm64/dynarec_arm64_pass3.h index 2a003711..13b2b323 100755 --- a/src/dynarec/arm64/dynarec_arm64_pass3.h +++ b/src/dynarec/arm64/dynarec_arm64_pass3.h @@ -15,7 +15,7 @@ #define INST_NAME(name) \ if(box64_dynarec_dump) {\ printf_x64_instruction(my_context->dec, &dyn->insts[ninst].x64, name); \ - dynarec_log(LOG_NONE, "%s%p: %d emited opcodes, inst=%d, barrier=%d state=%d/%d(%d), %s=%X/%X, use=%X, need=%X/%X", \ + dynarec_log(LOG_NONE, "%s%p: %d emited opcodes, inst=%d, barrier=%d state=%d/%d(%d), %s=%X/%X, use=%X, need=%X/%X, sm=%d/%d", \ (box64_dynarec_dump>1)?"\e[32m":"", \ (void*)(dyn->native_start+dyn->insts[ninst].address), \ dyn->insts[ninst].size/4, \ @@ -29,7 +29,8 @@ dyn->insts[ninst].x64.gen_flags, \ dyn->insts[ninst].x64.use_flags, \ dyn->insts[ninst].x64.need_before, \ - dyn->insts[ninst].x64.need_after); \ + dyn->insts[ninst].x64.need_after, \ + dyn->smread, dyn->smwrite); \ if(dyn->insts[ninst].pred_sz) { \ dynarec_log(LOG_NONE, ", pred="); \ for(int ii=0; ii<dyn->insts[ninst].pred_sz; ++ii)\ diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h index 37d43372..6abdf829 100755 --- a/src/dynarec/arm64/dynarec_arm64_private.h +++ b/src/dynarec/arm64/dynarec_arm64_private.h @@ -82,7 +82,7 @@ typedef struct instruction_arm64_s { } instruction_arm64_t; typedef struct dynarec_arm_s { - instruction_arm64_t *insts; + instruction_arm64_t*insts; int32_t size; int32_t cap; uintptr_t start; // start of the block @@ -91,7 +91,7 @@ typedef struct dynarec_arm_s { uintptr_t native_start; // start of the arm code size_t native_size; // size of emitted arm code uintptr_t last_ip; // last set IP in RIP (or NULL if unclean state) TODO: move to a cache something - uint64_t *table64; // table of 64bits value + uint64_t* table64; // table of 64bits value int table64size;// size of table (will be appended at end of executable code) int table64cap; uintptr_t tablestart; @@ -103,6 +103,8 @@ typedef struct dynarec_arm_s { int* predecessor;// single array of all predecessor dynablock_t* dynablock; instsize_t* instsize; + uint8_t smread; // for strongmem model emulation + uint8_t smwrite; // for strongmem model emulation } dynarec_arm_t; void add_next(dynarec_arm_t *dyn, uintptr_t addr); diff --git a/src/dynarec/dynarec_native_pass.c b/src/dynarec/dynarec_native_pass.c index 7ab25a6f..12dbda7d 100755 --- a/src/dynarec/dynarec_native_pass.c +++ b/src/dynarec/dynarec_native_pass.c @@ -124,6 +124,7 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr) dyn->n.stack_push = 0; dyn->n.swapped = 0; NEW_INST; + if(dyn->insts[ninst].pred_sz>1) {SMSTART();} fpu_reset_scratch(dyn); if((dyn->insts[ninst].x64.need_before&~X_PEND) && !dyn->insts[ninst].pred_sz) { READFLAGS(dyn->insts[ninst].x64.need_before&~X_PEND); diff --git a/src/main.c b/src/main.c index 8ec81411..e1221454 100755 --- a/src/main.c +++ b/src/main.c @@ -1267,7 +1267,7 @@ int main(int argc, const char **argv, char **env) { printf_log(LOG_INFO, "dota2 detected, forcing dummy crashhandler\n"); box64_dummy_crashhandler = 1; #ifdef DYNAREC - box64_dynarec_strongmem = 2; + box64_dynarec_strongmem = 1; #endif } // special case for steam-runtime-check-requirements to fake 64bits suport |