diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-12-19 01:13:52 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-12-18 18:13:52 +0100 |
| commit | 4662ab1c80fa800851793af44e32460ff5ebb11e (patch) | |
| tree | 03a606f3946c24f6871c90c5b7356572e069f08e /src | |
| parent | b0b34e3a2ad0290cda6c301f08913fca536f0b6c (diff) | |
| download | box64-4662ab1c80fa800851793af44e32460ff5ebb11e.tar.gz box64-4662ab1c80fa800851793af44e32460ff5ebb11e.zip | |
[RV64_DYNAREC] Added unaligned support to CMPXCHG8B opcode (#2166)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f0.c | 155 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 14 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_private.h | 1 |
3 files changed, 101 insertions, 69 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_f0.c b/src/dynarec/rv64/dynarec_rv64_f0.c index 369a3e6c..f30ea9f0 100644 --- a/src/dynarec/rv64/dynarec_rv64_f0.c +++ b/src/dynarec/rv64/dynarec_rv64_f0.c @@ -217,14 +217,14 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MARK3; // Unaligned ANDI(x5, wback, -(1 << (rex.w + 2))); - MARK2; // Use MARK2 as a "MARKLOCK" since we're running out of marks. + MARKLOCK2; LDxw(x1, wback, 0); LRxw(x6, x5, 1, 1); SUBxw(x3, x1, xRAX); BNEZ_MARK(x3); // EAX == Ed SCxw(x4, x6, x5, 1, 1); - BNEZ_MARK2(x4); + BNEZ_MARKLOCK2(x4); SDxw(gd, wback, 0); MARK; UFLAG_IF { emit_cmp32(dyn, ninst, rex, xRAX, x1, x3, x4, x5, x6); } @@ -269,67 +269,92 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0xC7: switch (rep) { case 0: - if (rex.w) { - INST_NAME("LOCK CMPXCHG16B Gq, Eq"); - static int warned = 0; - PASS3(if (!warned) dynarec_log(LOG_INFO, "Warning, LOCK CMPXCHG16B is not well supported on RISC-V and issues are expected.\n")); - warned = 1; - } else { - INST_NAME("LOCK CMPXCHG8B Gq, Eq"); - } - SETFLAGS(X_ZF, SF_SUBSET, NAT_FLAGS_NOFUSION); - nextop = F8; - addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, LOCK_LOCK, 0, 0); - ANDI(xFlags, xFlags, ~(1 << F_ZF)); - if (rex.w) { - // there is no atomic move on 16bytes, so implement it with mutex - LD(x7, xEmu, offsetof(x64emu_t, context)); - ADDI(x7, x7, offsetof(box64context_t, mutex_16b)); - ADDI(x4, xZR, 1); - MARK2; - AMOSWAP_W(x4, x4, x7, 1, 1); - // x4 == 1 if locked - BNEZ_MARK2(x4); + switch (rex.w) { + case 0: + INST_NAME("LOCK CMPXCHG8B Gq, Eq"); + SETFLAGS(X_ZF, SF_SUBSET, NAT_FLAGS_NOFUSION); + nextop = F8; + addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, LOCK_LOCK, 0, 0); + ANDI(xFlags, xFlags, ~(1 << F_ZF)); + SMDMB(); + ZEXTW2(x3, xRAX); + SLLI(x2, xRDX, 32); + OR(x3, x3, x2); // x3 is edx:eax + ZEXTW2(x4, xRBX); + SLLI(x2, xRCX, 32); + OR(x4, x4, x2); // x4 is ecx:ebx + ANDI(x5, wback, (1 << (rex.w + 2)) - 1); + BNEZ_MARK3(x1); + // Aligned + MARKLOCK; + LR_D(x2, wback, 1, 1); + BNE_MARK(x2, x3); // edx:eax != ed, load m64 into edx:eax + SC_D(x5, x4, wback, 1, 1); // set ZF and load ecx:ebx into m64 + BNEZ_MARKLOCK(x5); + ORI(xFlags, xFlags, 1 << F_ZF); + SMDMB(); + B_NEXT_nocond; + MARK; + SLLI(xRDX, x2, 32); + ZEXTW2(xRAX, x2); + SMDMB(); + B_NEXT_nocond; + MARK3; + // Unaligned + ANDI(x5, wback, -(1 << (rex.w + 2))); + MARKLOCK2; + LD(x2, wback, 0); + LR_D(x6, x5, 1, 1); + BNE_MARK2(x2, x3); // edx:eax != ed, load m64 into edx:eax + SCxw(x7, x6, x5, 1, 1); + BNEZ_MARKLOCK2(x7); + SDxw(x4, wback, 0); // set ZF and load ecx:ebx into m64 + ORI(xFlags, xFlags, 1 << F_ZF); + SMDMB(); + B_NEXT_nocond; + MARK2; + SLLI(xRDX, x2, 32); + ZEXTW2(xRAX, x2); + SMDMB(); + break; + case 1: + INST_NAME("LOCK CMPXCHG16B Gq, Eq"); + static int warned = 0; + PASS3(if (!warned) dynarec_log(LOG_INFO, "Warning, LOCK CMPXCHG16B is not well supported on RISC-V and issues are expected.\n")); + PASS3(warned = 1); + SETFLAGS(X_ZF, SF_SUBSET, NAT_FLAGS_NOFUSION); + nextop = F8; + addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, LOCK_LOCK, 0, 0); + ANDI(xFlags, xFlags, ~(1 << F_ZF)); + // there is no atomic move on 16bytes, so implement it with mutex + LD(x7, xEmu, offsetof(x64emu_t, context)); + ADDI(x7, x7, offsetof(box64context_t, mutex_16b)); + ADDI(x4, xZR, 1); + MARK2; + AMOSWAP_W(x4, x4, x7, 1, 1); + // x4 == 1 if locked + BNEZ_MARK2(x4); - SMDMB(); - LD(x3, wback, 8); - MARKLOCK; - LR_D(x2, wback, 1, 1); - BNE_MARK(x2, xRAX); - BNE_MARK(x3, xRDX); - SC_D(x5, xRBX, wback, 1, 1); - BNEZ_MARKLOCK(x5); - SD(xRCX, wback, 8); - ORI(xFlags, xFlags, 1 << F_ZF); - B_MARK3_nocond; - MARK; - MV(xRAX, x2); - MV(xRDX, x3); - MARK3; - SMDMB(); + SMDMB(); + LD(x3, wback, 8); + MARKLOCK; + LR_D(x2, wback, 1, 1); + BNE_MARK(x2, xRAX); + BNE_MARK(x3, xRDX); + SC_D(x5, xRBX, wback, 1, 1); + BNEZ_MARKLOCK(x5); + SD(xRCX, wback, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + B_MARK3_nocond; + MARK; + MV(xRAX, x2); + MV(xRDX, x3); + MARK3; + SMDMB(); - // unlock - AMOSWAP_W(xZR, xZR, x7, 1, 1); - } else { - SMDMB(); - ZEXTW2(x3, xRAX); - SLLI(x2, xRDX, 32); - OR(x3, x3, x2); - ZEXTW2(x4, xRBX); - SLLI(x2, xRCX, 32); - OR(x4, x4, x2); - MARKLOCK; - LR_D(x2, wback, 1, 1); - BNE_MARK(x2, x3); // EDX_EAX != Ed - SC_D(x5, x4, wback, 1, 1); - BNEZ_MARKLOCK(x5); - ORI(xFlags, xFlags, 1 << F_ZF); - B_MARK3_nocond; - MARK; - SLLI(xRDX, x2, 32); - ZEXTW2(xRAX, x2); - MARK3; - SMDMB(); + // unlock + AMOSWAP_W(xZR, xZR, x7, 1, 1); + break; } break; default: @@ -542,12 +567,12 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MARK3; // Unaligned ANDI(x5, wback, -(1 << (rex.w + 2))); - MARK2; // Use MARK2 as a "MARKLOCK" since we're running out of marks. + MARKLOCK2; LDxw(x1, wback, 0); LRxw(x6, x5, 1, 1); ADDxw(x4, x1, x7); SCxw(x3, x6, x5, 1, 1); - BNEZ_MARK2(x3); + BNEZ_MARKLOCK2(x3); SDxw(x4, wback, 0); MARK; IFXORNAT (X_ALL | X_PEND) @@ -637,12 +662,12 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MARK3; // Unaligned ANDI(x5, wback, -(1 << (rex.w + 2))); - MARK2; // Use MARK2 as a "MARKLOCK" since we're running out of marks. + MARKLOCK2; LDxw(x1, wback, 0); LRxw(x6, x5, 1, 1); SUBxw(x4, x1, x7); SCxw(x3, x6, x5, 1, 1); - BNEZ_MARK2(x3); + BNEZ_MARKLOCK2(x3); SDxw(x4, wback, 0); MARK; IFXORNAT (X_ALL | X_PEND) diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index d78761cc..b3eb5811 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -705,10 +705,12 @@ #define MARKF2 MARKFi(1) #define GETMARKF2 GETMARKFi(1) -#define MARKSEG dyn->insts[ninst].markseg = dyn->native_size -#define GETMARKSEG dyn->insts[ninst].markseg -#define MARKLOCK dyn->insts[ninst].marklock = dyn->native_size -#define GETMARKLOCK dyn->insts[ninst].marklock +#define MARKSEG dyn->insts[ninst].markseg = dyn->native_size +#define GETMARKSEG dyn->insts[ninst].markseg +#define MARKLOCK dyn->insts[ninst].marklock = dyn->native_size +#define GETMARKLOCK dyn->insts[ninst].marklock +#define MARKLOCK2 dyn->insts[ninst].marklock2 = dyn->native_size +#define GETMARKLOCK2 dyn->insts[ninst].marklock2 #define Bxx_gen(OP, M, reg1, reg2) \ j64 = GET##M - dyn->native_size; \ @@ -768,6 +770,10 @@ #define BNE_MARKLOCK(reg1, reg2) Bxx_gen(NE, MARKLOCK, reg1, reg2) // Branch to MARKLOCK if reg1!=0 (use j64) #define BNEZ_MARKLOCK(reg) BNE_MARKLOCK(reg, xZR) +// Branch to MARKLOCK2 if reg1!=reg2 (use j64) +#define BNE_MARKLOCK2(reg1, reg2) Bxx_gen(NE, MARKLOCK2, reg1, reg2) +// Branch to MARKLOCK2 if reg1!=0 (use j64) +#define BNEZ_MARKLOCK2(reg) BNE_MARKLOCK2(reg, xZR) // Branch to NEXT if reg1==reg2 (use j64) #define BEQ_NEXT(reg1, reg2) \ diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h index aecea9e9..1368b498 100644 --- a/src/dynarec/rv64/dynarec_rv64_private.h +++ b/src/dynarec/rv64/dynarec_rv64_private.h @@ -112,6 +112,7 @@ typedef struct instruction_rv64_s { uintptr_t markf[2]; uintptr_t markseg; uintptr_t marklock; + uintptr_t marklock2; int pass2choice;// value for choices that are fixed on pass2 for pass3 uintptr_t natcall; uint16_t retn; |