diff options
| author | xctan <xctan@cirno.icu> | 2023-05-28 23:17:09 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-05-28 17:17:09 +0200 |
| commit | 26b01d56f59bc17f48044ce79fbea41a7e2439cb (patch) | |
| tree | 2f4a5c2b808a095b042cec786fee9aa5c922c845 /src | |
| parent | eb687748c5f6cb39b5f3d70721f3e96a3c04549d (diff) | |
| download | box64-26b01d56f59bc17f48044ce79fbea41a7e2439cb.tar.gz box64-26b01d56f59bc17f48044ce79fbea41a7e2439cb.zip | |
[RV64_DYNAREC] Added F3 0F B8 POPCNT opcode and fixed LOCK CMPXCHG16B for ML2 (#808)
* [RV64_DYNAREC] Added lock to F0 48 0F C7 LOCK CMPXCHG16B opcode * [RV64_DYNAREC] Added F3 0F B8 POPCNT opcode * [RV64_DYNAREC] Moved 16B lock to box64context
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f0.c | 14 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f30f.c | 42 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_emitter.h | 4 | ||||
| -rwxr-xr-x | src/include/box64context.h | 3 |
4 files changed, 61 insertions, 2 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_f0.c b/src/dynarec/rv64/dynarec_rv64_f0.c index cafca95d..6bb11fbd 100644 --- a/src/dynarec/rv64/dynarec_rv64_f0.c +++ b/src/dynarec/rv64/dynarec_rv64_f0.c @@ -283,9 +283,16 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, LOCK_LOCK, 0, 0); ANDI(xFlags, xFlags, ~(1<<F_ZF)); if (rex.w) { - // there is no atomic move on 16bytes, so faking it + // there is no atomic move on 16bytes, so implement it with mutex + LD(x9, xEmu, offsetof(x64emu_t, context)); + ADDI(x9, x9, offsetof(box64context_t, mutex_16b)); + ADDI(x4, xZR, 1); + MARKLOCK; + AMOSWAP_W(x4, x4, x9, 1, 1); + // x4 == 1 if locked + BNEZ_MARKLOCK(x4); + SMDMB(); - // MARKLOCK; LD(x2, wback, 0); LD(x3, wback, 8); BNE_MARK(x2, xRAX); @@ -299,6 +306,9 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MV(xRDX, x3); MARK3; SMDMB(); + + // unlock + AMOSWAP_W(xZR, xZR, x9, 1, 1); } else { SMDMB(); MARKLOCK; diff --git a/src/dynarec/rv64/dynarec_rv64_f30f.c b/src/dynarec/rv64/dynarec_rv64_f30f.c index bdb8fa3b..cbf4c555 100644 --- a/src/dynarec/rv64/dynarec_rv64_f30f.c +++ b/src/dynarec/rv64/dynarec_rv64_f30f.c @@ -298,6 +298,48 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SW(x3, gback, i*4); } break; + case 0xB8: + INST_NAME("POPCNT Gd, Ed"); + SETFLAGS(X_ALL, SF_SET); + SET_DFNONE(); + nextop = F8; + GETED(0); + GETGD; + if(!rex.w && MODREG) { + AND(x4, ed, xMASK); + ed = x4; + } + CLEAR_FLAGS(); + BNE_MARK(ed, xZR); + ORI(xFlags, xFlags, 1<<F_ZF); + MOV32w(gd, 0); + B_NEXT_nocond; + MARK; + if(rv64_zbb) { + CPOPxw(gd, ed); + } else { + TABLE64(x1, 0x5555555555555555uLL); + SRLI(x5, ed, 1); + AND(x5, x5, x1); + SUB(x5, ed, x5); + TABLE64(x3, 0x3333333333333333uLL); + SRLI(x1, x5, 2); + AND(x1, x1, x3); + AND(x5, x5, x3); + ADD(x5, x5, x1); + TABLE64(x3, 0x0F0F0F0F0F0F0F0FuLL); + SRLI(x1, x5, 4); + ADD(x5, x5, x1); + AND(x5, x5, x3); + SRLI(x1, x5, 32); + ADDW(x5, x5, x1); + SRLIW(x1, x5, 16); + ADDW(x5, x5, x1); + SRLIW(x1, x5, 8); + ADDW(x5, x5, x1); + ANDI(gd, x5, 0x7F); + } + break; case 0xBC: INST_NAME("TZCNT Gd, Ed"); SETFLAGS(X_ZF, SF_SUBSET); diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index bc737cc0..35c8bb48 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -360,6 +360,8 @@ f28–31 ft8–11 FP temporaries Caller #define LR_W(rd, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00010, aq, rl), 0, rs1, 0b010, rd, 0b0101111)) #define SC_W(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00011, aq, rl), rs2, rs1, 0b010, rd, 0b0101111)) +#define AMOSWAP_W(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00001, aq, rl), rs2, rs1, 0b010, rd, 0b0101111)) + // RV64A #define LR_D(rd, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00010, aq, rl), 0, rs1, 0b011, rd, 0b0101111)) #define SC_D(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00011, aq, rl), rs2, rs1, 0b011, rd, 0b0101111)) @@ -367,6 +369,8 @@ f28–31 ft8–11 FP temporaries Caller #define LRxw(rd, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00010, aq, rl), 0, rs1, 0b010|rex.w, rd, 0b0101111)) #define SCxw(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00011, aq, rl), rs2, rs1, 0b010|rex.w, rd, 0b0101111)) +#define AMOSWAP_D(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00001, aq, rl), rs2, rs1, 0b011, rd, 0b0101111)) + // RV32F // Read round mode #define FRRM(rd) CSRRS(rd, xZR, 0x002) diff --git a/src/include/box64context.h b/src/include/box64context.h index fb09c004..9b38fa83 100755 --- a/src/include/box64context.h +++ b/src/include/box64context.h @@ -151,6 +151,9 @@ typedef struct box64context_s { uintptr_t max_db_size; // the biggest (in x86_64 instructions bytes) built dynablock int trace_dynarec; pthread_mutex_t mutex_lock; // this is for the Test interpreter + #ifdef __riscv + uint32_t mutex_16b; + #endif #endif library_t *libclib; // shortcut to libc library (if loaded, so probably yes) |