diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-11-13 21:22:25 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-11-13 14:22:25 +0100 |
| commit | 506cb980b10b8850c9a2aaac1e4d97104617ba15 (patch) | |
| tree | a8835f6427387ca4548138192e1311f0672725bb /src | |
| parent | 937e2cf853255dd636388134c882c6277ce74552 (diff) | |
| download | box64-506cb980b10b8850c9a2aaac1e4d97104617ba15.tar.gz box64-506cb980b10b8850c9a2aaac1e4d97104617ba15.zip | |
[RV64_DYNAREC] Prefer AMO* instructions over LR/SC when possible (#2028)
* [RV64_DYNAREC] Prefer AMO* instructions over LR/SC when possible * fixes
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_00_2.c | 22 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_66.c | 2 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f0.c | 160 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_emitter.h | 26 |
4 files changed, 87 insertions, 123 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00_2.c b/src/dynarec/rv64/dynarec_rv64_00_2.c index 3390be54..c31d760a 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_2.c +++ b/src/dynarec/rv64/dynarec_rv64_00_2.c @@ -270,7 +270,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int } break; case 0x87: - INST_NAME("(LOCK)XCHG Ed, Gd"); + INST_NAME("(LOCK) XCHG Ed, Gd"); nextop = F8; if(MODREG) { GETGD; @@ -282,19 +282,23 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGD; addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0); SMDMB(); - ANDI(x3, ed, (1<<(2+rex.w))-1); + ANDI(x3, ed, (1 << (2 + rex.w)) - 1); BNE_MARK(x3, xZR); - MARKLOCK; - LRxw(x1, ed, 1, 0); - SCxw(x3, gd, ed, 0, 1); - BNE_MARKLOCK(x3, xZR); - B_MARK2_nocond; + AMOSWAPxw(gd, gd, ed, 1, 1); + if (!rex.w) ZEROUP(gd); + B_NEXT_nocond; MARK; + // Unaligned + ANDI(x5, EDEADLK, -(1 << (rex.w + 2))); LDxw(x1, ed, 0); + MARKLOCK; + LDxw(x1, wback, 0); + LRxw(x3, x5, 1, 1); + SCxw(x4, x3, x5, 1, 1); + BNEZ_MARKLOCK(x4); SDxw(gd, ed, 0); - MARK2; - SMDMB(); MVxw(gd, x1); + SMDMB(); } break; case 0x88: diff --git a/src/dynarec/rv64/dynarec_rv64_66.c b/src/dynarec/rv64/dynarec_rv64_66.c index a4df72f7..f5559843 100644 --- a/src/dynarec/rv64/dynarec_rv64_66.c +++ b/src/dynarec/rv64/dynarec_rv64_66.c @@ -561,7 +561,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni emit_test16(dyn, ninst, x1, x2, x6, x4, x5); break; case 0x87: - INST_NAME("(LOCK)XCHG Ew, Gw"); + INST_NAME("(LOCK) XCHG Ew, Gw"); nextop = F8; if(MODREG) { GETGD; diff --git a/src/dynarec/rv64/dynarec_rv64_f0.c b/src/dynarec/rv64/dynarec_rv64_f0.c index 3e504ca6..fc8c46ad 100644 --- a/src/dynarec/rv64/dynarec_rv64_f0.c +++ b/src/dynarec/rv64/dynarec_rv64_f0.c @@ -64,11 +64,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni emit_add32(dyn, ninst, rex, ed, gd, x3, x4, x5); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0); - MARKLOCK; - LRxw(x1, wback, 1, 1); - ADDxw(x4, x1, gd); - SCxw(x3, x4, wback, 1, 1); - BNEZ_MARKLOCK(x3); + AMOADDxw(x1, gd, wback, 1, 1); IFX(X_ALL|X_PEND) { emit_add32(dyn, ninst, rex, x1, gd, x3, x4, x5); } @@ -86,11 +82,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni emit_or32(dyn, ninst, rex, ed, gd, x3, x4); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0); - MARKLOCK; - LRxw(x1, wback, 1, 1); - OR(x4, x1, gd); - SCxw(x3, x4, wback, 1, 1); - BNEZ_MARKLOCK(x3); + AMOORxw(x1, gd, wback, 1, 1); IFX(X_ALL|X_PEND) emit_or32(dyn, ninst, rex, x1, gd, x3, x4); } @@ -264,11 +256,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni emit_add32(dyn, ninst, rex, ed, gd, x3, x4, x5); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0); - MARKLOCK; - LRxw(x1, wback, 1, 1); - ADDxw(x4, x1, gd); - SCxw(x3, x4, wback, 1, 1); - BNEZ_MARKLOCK(x3); + AMOADDxw(x1, gd, wback, 1, 1); IFX(X_ALL|X_PEND) { MVxw(x2, x1); emit_add32(dyn, ninst, rex, x2, gd, x3, x4, x5); @@ -463,11 +451,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni emit_and32(dyn, ninst, rex, ed, gd, x3, x4); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0); - MARKLOCK; - LRxw(x1, wback, 1, 1); - AND(x4, x1, gd); - SCxw(x3, x4, wback, 1, 1); - BNEZ_MARKLOCK(x3); + AMOANDxw(x1, gd, wback, 1, 1); IFX(X_ALL|X_PEND) emit_and32(dyn, ninst, rex, x1, gd, x3, x4); } @@ -484,11 +468,8 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni emit_sub32(dyn, ninst, rex, ed, gd, x3, x4, x5); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0); - MARKLOCK; - LRxw(x1, wback, 1, 1); - SUB(x4, x1, gd); - SCxw(x3, x4, wback, 1, 1); - BNEZ_MARKLOCK(x3); + SUBxw(x4, xZR, gd); + AMOADDxw(x1, x4, wback, 1, 1); IFX(X_ALL|X_PEND) emit_sub32(dyn, ninst, rex, x1, gd, x3, x4, x5); } @@ -517,11 +498,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ANDI(x3, wback, ~3); // aligned addr ADDI(x1, xZR, u8); SLL(x1, x1, x2); // Ib << offset - MARKLOCK; - LR_W(x4, x3, 1, 1); - OR(x6, x4, x1); - SC_W(x6, x6, x3, 1, 1); - BNEZ_MARKLOCK(x6); + AMOORxw(x4, x1, x3, 1, 1); IFX(X_ALL|X_PEND) { SRL(x1, x4, x2); ANDI(x1, x1, 0xFF); @@ -553,32 +530,24 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { SMDMB(); addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1); - if(opcode==0x81) i64 = F32S; else i64 = F8S; - if (i64 < -2048 || i64 >= 2048) - MOV64xw(x9, i64); + if (opcode == 0x81) + i64 = F32S; + else + i64 = F8S; + MOV64xw(x9, i64); ANDI(x1, wback, (1 << (rex.w + 2)) - 1); BNEZ_MARK3(x1); // Aligned - MARKLOCK; - LRxw(x1, wback, 1, 1); - if (i64 >= -2048 && i64 < 2048) - ADDIxw(x4, x1, i64); - else - ADDxw(x4, x1, x9); - SCxw(x3, x4, wback, 1, 1); - BNEZ_MARKLOCK(x3); + AMOADDxw(x1, x9, wback, 1, 1); B_MARK_nocond; MARK3; // Unaligned ANDI(x5, wback, -(1 << (rex.w + 2))); MARK2; // Use MARK2 as a "MARKLOCK" since we're running out of marks. - LDxw(x6, wback, 0); - LRxw(x1, x5, 1, 1); - if (i64 >= -2048 && i64 < 2048) - ADDIxw(x4, x6, i64); - else - ADDxw(x4, x6, x9); - SCxw(x3, x1, x5, 1, 1); + LDxw(x1, wback, 0); + LRxw(x6, x5, 1, 1); + ADDxw(x4, x1, x9); + SCxw(x3, x6, x5, 1, 1); BNEZ_MARK2(x3); SDxw(x4, wback, 0); MARK; @@ -600,19 +569,12 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni emit_or32c(dyn, ninst, rex, ed, i64, x3, x4); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1); - if(opcode==0x81) i64 = F32S; else i64 = F8S; - if (i64 < -2048 || i64 >= 2048) - MOV64xw(x9, i64); - MARKLOCK; - LRxw(x1, wback, 1, 1); - if (i64 >= -2048 && i64 < 2048) { - ORI(x4, x1, i64); - } else { - OR(x4, x1, x9); - } - if (!rex.w) ZEROUP(x4); - SCxw(x3, x4, wback, 1, 1); - BNEZ_MARKLOCK(x3); + if (opcode == 0x81) + i64 = F32S; + else + i64 = F8S; + MOV64xw(x4, i64); + AMOORxw(x1, x4, wback, 1, 1); IFX(X_ALL|X_PEND) emit_or32c(dyn, ninst, rex, x1, i64, x3, x4); } @@ -630,19 +592,12 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni emit_and32c(dyn, ninst, rex, ed, i64, x3, x4); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1); - if(opcode==0x81) i64 = F32S; else i64 = F8S; - if (i64 < -2048 || i64 >= 2048) - MOV64xw(x9, i64); - MARKLOCK; - LRxw(x1, wback, 1, 1); - if (i64 >= -2048 && i64 < 2048) { - ANDI(x4, x1, i64); - } else { - AND(x4, x1, x9); - } - if (!rex.w) ZEROUP(x4); - SCxw(x3, x4, wback, 1, 1); - BNEZ_MARKLOCK(x3); + if (opcode == 0x81) + i64 = F32S; + else + i64 = F8S; + MOV64xw(x9, i64); + AMOANDxw(x1, x9, wback, 1, 1); IFX(X_ALL|X_PEND) emit_and32c(dyn, ninst, rex, x1, i64, x3, x4); } @@ -660,34 +615,25 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni emit_sub32c(dyn, ninst, rex, ed, i64, x3, x4, x5, x6); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1); - if(opcode==0x81) i64 = F32S; else i64 = F8S; - if (i64 <= -2048 || i64 > 2048) - MOV64xw(x9, i64); + if (opcode == 0x81) + i64 = F32S; + else + i64 = F8S; + MOV64xw(x9, i64); ANDI(x1, wback, (1 << (rex.w + 2)) - 1); BNEZ_MARK3(x1); // Aligned - MARKLOCK; - LRxw(x1, wback, 1, 1); - if (i64 > -2048 && i64 <= 2048) { - ADDIxw(x4, x1, -i64); - } else { - SUBxw(x4, x1, x9); - } - SCxw(x3, x4, wback, 1, 1); - BNEZ_MARKLOCK(x3); + SUB(x4, xZR, x9); + AMOADDxw(x1, x4, wback, 1, 1); B_MARK_nocond; MARK3; // Unaligned ANDI(x5, wback, -(1 << (rex.w + 2))); MARK2; // Use MARK2 as a "MARKLOCK" since we're running out of marks. - LDxw(x6, wback, 0); - LRxw(x1, x5, 1, 1); - if (i64 > -2048 && i64 <= 2048) { - ADDIxw(x4, x6, -i64); - } else { - SUBxw(x4, x6, x9); - } - SCxw(x3, x1, x5, 1, 1); + LDxw(x1, wback, 0); + LRxw(x6, x5, 1, 1); + SUBxw(x4, x1, x9); + SCxw(x3, x6, x5, 1, 1); BNEZ_MARK2(x3); SDxw(x4, wback, 0); MARK; @@ -716,17 +662,8 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni i64 = F32S; else i64 = F8S; - if (i64 < -2048 || i64 >= 2048) - MOV64xw(x9, i64); - MARKLOCK; - LRxw(x1, wback, 1, 1); - if (i64 >= -2048 && i64 < 2048) { - XORI(x4, x1, i64); - } else { - XOR(x4, x1, x9); - } - SCxw(x3, x4, wback, 1, 1); - BNEZ_MARKLOCK(x3); + MOV64xw(x9, i64); + AMOXORxw(x1, x9, wback, 1, 1); IFX(X_ALL | X_PEND) emit_xor32c(dyn, ninst, rex, x1, i64, x3, x4); } @@ -751,21 +688,18 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ANDI(x1, wback, (1 << (rex.w + 2)) - 1); BNEZ_MARK3(x1); // Aligned - MARKLOCK; - LRxw(x1, wback, 1, 1); - SCxw(x4, gd, wback, 1, 1); - BNEZ_MARKLOCK(x4); - B_MARK_nocond; + AMOSWAPxw(gd, gd, wback, 1, 1); + if (!rex.w) ZEROUP(gd); + B_NEXT_nocond; MARK3; // Unaligned ANDI(x5, wback, -(1 << (rex.w + 2))); - MARK2; // Use MARK2 as a "MARKLOCK" since we're running out of marks. + MARKLOCK; LDxw(x1, wback, 0); LRxw(x3, x5, 1, 1); SCxw(x4, x3, x5, 1, 1); - BNEZ_MARK2(x4); + BNEZ_MARKLOCK(x4); SDxw(gd, wback, 0); - MARK; MVxw(gd, x1); SMDMB(); } diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index f56b68e7..4e199e00 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -561,6 +561,14 @@ f28–31 ft8–11 FP temporaries Caller #define SC_W(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00011, aq, rl), rs2, rs1, 0b010, rd, 0b0101111)) #define AMOSWAP_W(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00001, aq, rl), rs2, rs1, 0b010, rd, 0b0101111)) +#define AMOADD_W(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00000, aq, rl), rs2, rs1, 0b010, rd, 0b0101111)) +#define AMOXOR_W(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00100, aq, rl), rs2, rs1, 0b010, rd, 0b0101111)) +#define AMOAND_W(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b01100, aq, rl), rs2, rs1, 0b010, rd, 0b0101111)) +#define AMOOR_W(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b01000, aq, rl), rs2, rs1, 0b010, rd, 0b0101111)) +#define AMOMIN_W(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b10000, aq, rl), rs2, rs1, 0b010, rd, 0b0101111)) +#define AMOMAX_W(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b10100, aq, rl), rs2, rs1, 0b010, rd, 0b0101111)) +#define AMOMINU_W(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b11000, aq, rl), rs2, rs1, 0b010, rd, 0b0101111)) +#define AMOMAXU_W(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b11100, aq, rl), rs2, rs1, 0b010, rd, 0b0101111)) // RV64A #define LR_D(rd, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00010, aq, rl), 0, rs1, 0b011, rd, 0b0101111)) @@ -570,6 +578,24 @@ f28–31 ft8–11 FP temporaries Caller #define SCxw(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00011, aq, rl), rs2, rs1, 0b010 | rex.w, rd, 0b0101111)) #define AMOSWAP_D(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00001, aq, rl), rs2, rs1, 0b011, rd, 0b0101111)) +#define AMOADD_D(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00000, aq, rl), rs2, rs1, 0b011, rd, 0b0101111)) +#define AMOXOR_D(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00100, aq, rl), rs2, rs1, 0b011, rd, 0b0101111)) +#define AMOAND_D(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b01100, aq, rl), rs2, rs1, 0b011, rd, 0b0101111)) +#define AMOOR_D(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b01000, aq, rl), rs2, rs1, 0b011, rd, 0b0101111)) +#define AMOMIN_D(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b10000, aq, rl), rs2, rs1, 0b011, rd, 0b0101111)) +#define AMOMAX_D(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b10100, aq, rl), rs2, rs1, 0b011, rd, 0b0101111)) +#define AMOMINU_D(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b11000, aq, rl), rs2, rs1, 0b011, rd, 0b0101111)) +#define AMOMAXU_D(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b11100, aq, rl), rs2, rs1, 0b011, rd, 0b0101111)) + +#define AMOSWAPxw(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00001, aq, rl), rs2, rs1, 0b010 | rex.w, rd, 0b0101111)) +#define AMOADDxw(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00000, aq, rl), rs2, rs1, 0b010 | rex.w, rd, 0b0101111)) +#define AMOXORxw(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00100, aq, rl), rs2, rs1, 0b010 | rex.w, rd, 0b0101111)) +#define AMOANDxw(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b01100, aq, rl), rs2, rs1, 0b010 | rex.w, rd, 0b0101111)) +#define AMOORxw(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b01000, aq, rl), rs2, rs1, 0b010 | rex.w, rd, 0b0101111)) +#define AMOMINxw(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b10000, aq, rl), rs2, rs1, 0b010 | rex.w, rd, 0b0101111)) +#define AMOMAXxw(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b10100, aq, rl), rs2, rs1, 0b010 | rex.w, rd, 0b0101111)) +#define AMOMINUxw(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b11000, aq, rl), rs2, rs1, 0b010 | rex.w, rd, 0b0101111)) +#define AMOMAXUxw(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b11100, aq, rl), rs2, rs1, 0b010 | rex.w, rd, 0b0101111)) // RV32F // Read round mode |