diff options
| author | Yang Liu <numbksco@gmail.com> | 2024-09-27 01:45:40 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-09-26 19:45:40 +0200 |
| commit | 0fb0ac7994e53700d781f406612bd0f96df97933 (patch) | |
| tree | 631a1da93d3a77d5b442cb2635c555d9421e8e6c /src | |
| parent | 24f278c9618509038e6e278586a10a21c35803c6 (diff) | |
| download | box64-0fb0ac7994e53700d781f406612bd0f96df97933.tar.gz box64-0fb0ac7994e53700d781f406612bd0f96df97933.zip | |
[LA64_DYNAREC] Added unaligned support to CMPXCHG8B (#1877)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_f0.c | 93 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_helper.h | 14 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_private.h | 1 |
3 files changed, 79 insertions, 29 deletions
diff --git a/src/dynarec/la64/dynarec_la64_f0.c b/src/dynarec/la64/dynarec_la64_f0.c index bfc918e8..a6cad8dc 100644 --- a/src/dynarec/la64/dynarec_la64_f0.c +++ b/src/dynarec/la64/dynarec_la64_f0.c @@ -188,21 +188,29 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } break; case 0xC7: - switch (rep) { - case 0: + // rep has no impact here + nextop = F8; + switch ((nextop >> 3) & 7) { + case 1: if (rex.w) { INST_NAME("LOCK CMPXCHG16B Gq, Eq"); + if (!la64_scq) { + static int warned = 0; + PASS3(if (!warned) dynarec_log(LOG_INFO, "Warning, LOCK CMPXCHG16B is not well supported on LoongArch without SCQ and issues are expected.\n")); + warned = 1; + } } else { INST_NAME("LOCK CMPXCHG8B Gq, Eq"); } SETFLAGS(X_ZF, SF_SUBSET); - nextop = F8; addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, LOCK_LOCK, 0, 0); - if (la64_lbt) { - X64_SET_EFLAGS(xZR, X_ZF); - } else { - ADDI_D(x2, xZR, ~(1 << F_ZF)); - AND(xFlags, xFlags, x2); + UFLAG_IF { + if (la64_lbt) { + X64_SET_EFLAGS(xZR, X_ZF); + } else { + ADDI_D(x2, xZR, ~(1 << F_ZF)); + AND(xFlags, xFlags, x2); + } } if (rex.w) { if (la64_scq) { @@ -215,11 +223,13 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MV(x5, xRBX); SC_Q(x5, xRCX, wback); BEQZ_MARKLOCK(x5); - if (la64_lbt) { - ADDI_D(x5, xZR, -1); - X64_SET_EFLAGS(x5, X_ZF); - } else { - ORI(xFlags, xFlags, 1 << F_ZF); + UFLAG_IF { + if (la64_lbt) { + ADDI_D(x5, xZR, -1); + X64_SET_EFLAGS(x5, X_ZF); + } else { + ORI(xFlags, xFlags, 1 << F_ZF); + } } B_MARK3_nocond; MARK; @@ -243,11 +253,13 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni BNE_MARK(x3, xRDX); ST_D(xRBX, wback, 0); ST_D(xRCX, wback, 8); - if (la64_lbt) { - ADDI_D(x5, xZR, -1); - X64_SET_EFLAGS(x5, X_ZF); - } else { - ORI(xFlags, xFlags, 1 << F_ZF); + UFLAG_IF { + if (la64_lbt) { + ADDI_D(x5, xZR, -1); + X64_SET_EFLAGS(x5, X_ZF); + } else { + ORI(xFlags, xFlags, 1 << F_ZF); + } } B_MARK3_nocond; MARK; @@ -260,23 +272,46 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } } else { SMDMB(); - ZEROUP2(x3, xRAX); - SLLI_D(x2, xRDX, 32); - OR(x3, x3, x2); - ZEROUP2(x4, xRBX); - SLLI_D(x2, xRCX, 32); - OR(x4, x4, x2); + BSTRINS_D(x3, xRAX, 31, 0); + BSTRINS_D(x3, xRDX, 63, 32); + BSTRINS_D(x4, xRBX, 31, 0); + BSTRINS_D(x4, xRCX, 63, 32); + ANDI(x2, wback, 7); + BNEZ_MARK2(x2); + // Aligned MARKLOCK; LL_D(x2, wback, 0); BNE_MARK(x2, x3); // EDX_EAX != Ed MV(x5, x4); SC_D(x5, wback, 0); BEQZ_MARKLOCK(x5); - if (la64_lbt) { - ADDI_D(x5, xZR, -1); - X64_SET_EFLAGS(x5, X_ZF); - } else { - ORI(xFlags, xFlags, 1 << F_ZF); + UFLAG_IF { + if (la64_lbt) { + ADDI_D(x5, xZR, -1); + X64_SET_EFLAGS(x5, X_ZF); + } else { + ORI(xFlags, xFlags, 1 << F_ZF); + } + } + B_MARK3_nocond; + MARK2; + // Unaligned + ADDI_W(x5, xZR, 0xFF8); + AND(x5, wback, x5); + MARKLOCK2; + LD_D(x2, wback, 0); + LL_D(x6, x5, 0); + BNE_MARK(x2, x3); // EDX_EAX != Ed + SC_D(x6, x5, 0); + BEQZ_MARKLOCK2(x6); + ST_D(x4, wback, 0); + UFLAG_IF { + if (la64_lbt) { + ADDI_D(x5, xZR, -1); + X64_SET_EFLAGS(x5, X_ZF); + } else { + ORI(xFlags, xFlags, 1 << F_ZF); + } } B_MARK3_nocond; MARK; diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h index 89cc8249..21f3c2af 100644 --- a/src/dynarec/la64/dynarec_la64_helper.h +++ b/src/dynarec/la64/dynarec_la64_helper.h @@ -441,6 +441,8 @@ #define GETMARKSEG dyn->insts[ninst].markseg #define MARKLOCK dyn->insts[ninst].marklock = dyn->native_size #define GETMARKLOCK dyn->insts[ninst].marklock +#define MARKLOCK2 dyn->insts[ninst].marklock2 = dyn->native_size +#define GETMARKLOCK2 dyn->insts[ninst].marklock2 #define Bxx_gen(OP, M, reg1, reg2) \ j64 = GET##M - dyn->native_size; \ @@ -462,6 +464,8 @@ #define BNE_MARK3(reg1, reg2) Bxx_gen(NE, MARK3, reg1, reg2) // Branch to MARKLOCK if reg1!=reg2 (use j64) #define BNE_MARKLOCK(reg1, reg2) Bxx_gen(NE, MARKLOCK, reg1, reg2) +// Branch to MARKLOCK2 if reg1!=reg2 (use j64) +#define BNE_MARKLOCK2(reg1, reg2) Bxx_gen(NE, MARKLOCK2, reg1, reg2) // Branch to MARK if reg1==reg2 (use j64) #define BEQ_MARK(reg1, reg2) Bxx_gen(EQ, MARK, reg1, reg2) @@ -471,12 +475,16 @@ #define BEQ_MARK3(reg1, reg2) Bxx_gen(EQ, MARK3, reg1, reg2) // Branch to MARKLOCK if reg1==reg2 (use j64) #define BEQ_MARKLOCK(reg1, reg2) Bxx_gen(EQ, MARKLOCK, reg1, reg2) +// Branch to MARKLOCK2 if reg1==reg2 (use j64) +#define BEQ_MARKLOCK2(reg1, reg2) Bxx_gen(EQ, MARKLOCK2, reg1, reg2) // Branch to MARK if reg1==0 (use j64) #define BEQZ_MARK(reg) BxxZ_gen(EQ, MARK, reg) // Branch to MARK2 if reg1==0 (use j64) #define BEQZ_MARK2(reg) BxxZ_gen(EQ, MARK2, reg) // Branch to MARKLOCK if reg1==0 (use j64) #define BEQZ_MARKLOCK(reg) BxxZ_gen(EQ, MARKLOCK, reg) +// Branch to MARKLOCK2 if reg1==0 (use j64) +#define BEQZ_MARKLOCK2(reg) BxxZ_gen(EQ, MARKLOCK2, reg) // Branch to MARK if reg1!=0 (use j64) #define BNEZ_MARK(reg) BxxZ_gen(NE, MARK, reg) @@ -486,6 +494,8 @@ #define BNEZ_MARK3(reg) BxxZ_gen(NE, MARK3, reg) // Branch to MARKLOCK if reg1!=0 (use j64) #define BNEZ_MARKLOCK(reg) BxxZ_gen(NE, MARKLOCK, reg) +// Branch to MARKLOCK2 if reg1!=0 (use j64) +#define BNEZ_MARKLOCK2(reg) BxxZ_gen(NE, MARKLOCK2, reg) // Branch to MARK if fcc!=0 (use j64) #define BCNEZ_MARK(fcc) BCxxZ_gen(NE, MARK, fcc) @@ -495,6 +505,8 @@ #define BCNEZ_MARK3(fcc) BCxxZ_gen(NE, MARK3, fcc) // Branch to MARKLOCK if fcc!=0 (use j64) #define BCNEZ_MARKLOCK(fcc) BxxZ_gen(NE, MARKLOCK, fcc) +// Branch to MARKLOCK2 if fcc!=0 (use j64) +#define BCNEZ_MARKLOCK2(fcc) BxxZ_gen(NE, MARKLOCK2, fcc) // Branch to MARK if fcc==0 (use j64) #define BCEQZ_MARK(fcc) BCxxZ_gen(EQ, MARK, fcc) @@ -504,6 +516,8 @@ #define BCEQZ_MARK3(fcc) BCxxZ_gen(EQ, MARK3, fcc) // Branch to MARKLOCK if fcc==0 (use j64) #define BCEQZ_MARKLOCK(fcc) BxxZ_gen(EQ, MARKLOCK, fcc) +// Branch to MARKLOCK2 if fcc==0 (use j64) +#define BCEQZ_MARKLOCK2(fcc) BxxZ_gen(EQ, MARKLOCK2, fcc) // Branch to MARK if reg1<reg2 (use j64) #define BLT_MARK(reg1, reg2) Bxx_gen(LT, MARK, reg1, reg2) diff --git a/src/dynarec/la64/dynarec_la64_private.h b/src/dynarec/la64/dynarec_la64_private.h index 27a93a66..3d65de48 100644 --- a/src/dynarec/la64/dynarec_la64_private.h +++ b/src/dynarec/la64/dynarec_la64_private.h @@ -78,6 +78,7 @@ typedef struct instruction_la64_s { uintptr_t markf[2]; uintptr_t markseg; uintptr_t marklock; + uintptr_t marklock2; int pass2choice;// value for choices that are fixed on pass2 for pass3 uintptr_t natcall; uint16_t retn; |