diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2025-05-26 16:39:19 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-05-26 10:39:19 +0200 |
| commit | abd1ebb425d23e693847b5796ab207453e181bd6 (patch) | |
| tree | 1b99cce25bb60984812e664a857fbb7b36c97af4 /src | |
| parent | e994c651167c9ae3fab34ff5ffbeb8823baeffe9 (diff) | |
| download | box64-abd1ebb425d23e693847b5796ab207453e181bd6.tar.gz box64-abd1ebb425d23e693847b5796ab207453e181bd6.zip | |
[RV64_DYNAREC] Minor nativeflags optim to LEA and CMOVcc opcodes (#2669)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_00_2.c | 1 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f.c | 5 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_67.c | 1 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.c | 93 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 4 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_emitter.h | 42 |
6 files changed, 118 insertions, 28 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00_2.c b/src/dynarec/rv64/dynarec_rv64_00_2.c index 7551cce4..ec484941 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_2.c +++ b/src/dynarec/rv64/dynarec_rv64_00_2.c @@ -495,6 +495,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int if (MODREG) { // reg <= reg? that's an invalid operation DEFAULT; } else { // mem <= reg + SCRATCH_USAGE(0); addr = geted(dyn, addr, ninst, nextop, &ed, gd, x1, &fixedaddress, rex, NULL, 0, 0); if (gd != ed) { MVxw(gd, ed); diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index 9429b701..40de22ce 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -937,11 +937,10 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (MODREG) { \ ed = TO_NAT((nextop & 7) + (rex.b << 3)); \ if (dyn->insts[ninst].nat_flags_fusion) { \ - NATIVEJUMP(NATNO, 8); \ + NATIVEMV(NATYES, gd, ed); \ } else { \ - B##NO(tmp1, 8); \ + MV##YES(gd, ed, tmp1); \ } \ - MV(gd, ed); \ if (!rex.w) ZEROUP(gd); \ } else { \ addr = geted(dyn, addr, ninst, nextop, &ed, tmp2, tmp3, &fixedaddress, rex, NULL, 1, 0); \ diff --git a/src/dynarec/rv64/dynarec_rv64_67.c b/src/dynarec/rv64/dynarec_rv64_67.c index aeb6535d..fcc1a1e9 100644 --- a/src/dynarec/rv64/dynarec_rv64_67.c +++ b/src/dynarec/rv64/dynarec_rv64_67.c @@ -739,6 +739,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (MODREG) { // reg <= reg? that's an invalid operation DEFAULT; } else { // mem <= reg + SCRATCH_USAGE(0); addr = geted32(dyn, addr, ninst, nextop, &ed, gd, x1, &fixedaddress, rex, NULL, 0, 0); ZEXTW2(gd, ed); } diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index 8de757df..a6a047ca 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -66,7 +66,7 @@ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, } else { if (sib >> 6) { SLLI(ret, TO_NAT(sib_reg), (sib >> 6)); - SCRATCH_USAGE(1); + SCRATCH_USAGE(!IS_GPR(ret)); } else ret = TO_NAT(sib_reg); *fixaddress = tmp; @@ -79,12 +79,24 @@ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, break; } MOV64x(ret, tmp); - SCRATCH_USAGE(1); + SCRATCH_USAGE(!IS_GPR(ret)); } } else { if (sib_reg != 4) { - ADDSL(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg), sib >> 6, scratch); - SCRATCH_USAGE(1); + if (!(sib >> 6)) { + ADD(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg)); + SCRATCH_USAGE(!IS_GPR(ret)); + } else if (rv64_zba) { + SHxADD(ret, TO_NAT(sib_reg), sib >> 6, TO_NAT(sib_reg2)); + SCRATCH_USAGE(!IS_GPR(ret)); + } else if (rv64_xtheadba) { + TH_ADDSL(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg), sib >> 6); + SCRATCH_USAGE(!IS_GPR(ret)); + } else { + SLLI(scratch, TO_NAT(sib_reg), sib >> 6); + ADD(ret, TO_NAT(sib_reg2), scratch); + SCRATCH_USAGE(1); + } } else { ret = TO_NAT(sib_reg2); } @@ -102,23 +114,24 @@ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, SCRATCH_USAGE(1); } else if (adj && (tmp + adj >= -2048) && (tmp + adj <= maxval)) { ADDI(ret, xRIP, tmp + adj); - SCRATCH_USAGE(1); + SCRATCH_USAGE(!IS_GPR(ret)); } else if ((tmp >= -2048) && (tmp <= maxval)) { GETIP(addr + delta, scratch); ADDI(ret, xRIP, tmp); SCRATCH_USAGE(1); } else if (tmp + addr + delta < 0x100000000LL) { MOV64x(ret, tmp + addr + delta); - SCRATCH_USAGE(1); + SCRATCH_USAGE(!IS_GPR(ret)); } else { if (adj) { MOV64x(ret, tmp + adj); + SCRATCH_USAGE(!IS_GPR(ret)); } else { MOV64x(ret, tmp); GETIP(addr + delta, scratch); + SCRATCH_USAGE(1); } ADD(ret, ret, xRIP); - SCRATCH_USAGE(1); } switch (lock) { case 1: addLockAddress(addr + delta + tmp); break; @@ -146,8 +159,20 @@ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, *fixaddress = i64; if ((nextop & 7) == 4) { if (sib_reg != 4) { - ADDSL(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg), sib >> 6, scratch); - SCRATCH_USAGE(1); + if (!(sib >> 6)) { + ADD(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg)); + SCRATCH_USAGE(!IS_GPR(ret)); + } else if (rv64_zba) { + SHxADD(ret, TO_NAT(sib_reg), sib >> 6, TO_NAT(sib_reg2)); + SCRATCH_USAGE(!IS_GPR(ret)); + } else if (rv64_xtheadba) { + TH_ADDSL(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg), sib >> 6); + SCRATCH_USAGE(!IS_GPR(ret)); + } else { + SLLI(scratch, TO_NAT(sib_reg), sib >> 6); + ADD(ret, TO_NAT(sib_reg2), scratch); + SCRATCH_USAGE(1); + } } else { ret = TO_NAT(sib_reg2); } @@ -157,30 +182,43 @@ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, if (i64 >= -2048 && i64 <= 2047) { if ((nextop & 7) == 4) { if (sib_reg != 4) { - ADDSL(scratch, TO_NAT(sib_reg2), TO_NAT(sib_reg), sib >> 6, scratch); + if (!(sib >> 6)) { + ADD(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg)); + SCRATCH_USAGE(!IS_GPR(ret)); + } else if (rv64_zba) { + SHxADD(ret, TO_NAT(sib_reg), sib >> 6, TO_NAT(sib_reg2)); + SCRATCH_USAGE(!IS_GPR(ret)); + } else if (rv64_xtheadba) { + TH_ADDSL(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg), sib >> 6); + SCRATCH_USAGE(!IS_GPR(ret)); + } else { + SLLI(scratch, TO_NAT(sib_reg), sib >> 6); + ADD(ret, TO_NAT(sib_reg2), scratch); + SCRATCH_USAGE(1); + } + ADDI(ret, ret, i64); } else { - scratch = TO_NAT(sib_reg2); + ADDI(ret, TO_NAT(sib_reg2), i64); + SCRATCH_USAGE(!IS_GPR(ret)); } - } else - scratch = TO_NAT((nextop & 0x07) + (rex.b << 3)); - ADDI(ret, scratch, i64); - SCRATCH_USAGE(1); + } else { + ADDI(ret, TO_NAT((nextop & 0x07) + (rex.b << 3)), i64); + SCRATCH_USAGE(!IS_GPR(ret)); + } } else { MOV64x(scratch, i64); + SCRATCH_USAGE(1); if ((nextop & 7) == 4) { if (sib_reg != 4) { ADD(scratch, scratch, TO_NAT(sib_reg2)); ADDSL(ret, scratch, TO_NAT(sib_reg), sib >> 6, ret); - SCRATCH_USAGE(1); } else { PASS3(int tmp = TO_NAT(sib_reg2)); ADD(ret, tmp, scratch); - SCRATCH_USAGE(1); } } else { PASS3(int tmp = TO_NAT((nextop & 0x07) + (rex.b << 3))); ADD(ret, tmp, scratch); - SCRATCH_USAGE(1); } } } @@ -226,7 +264,7 @@ static uintptr_t geted_32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_ } else { if (sib >> 6) { SLLI(ret, TO_NAT(sib_reg), (sib >> 6)); - SCRATCH_USAGE(1); + SCRATCH_USAGE(!IS_GPR(ret)); } else ret = TO_NAT(sib_reg); *fixaddress = tmp; @@ -239,18 +277,19 @@ static uintptr_t geted_32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_ break; } MOV32w(ret, tmp); - SCRATCH_USAGE(1); + SCRATCH_USAGE(!IS_GPR(ret)); } } else { if (sib_reg != 4) { if ((sib >> 6)) { SLLI(scratch, TO_NAT(sib_reg), (sib >> 6)); ADDW(ret, scratch, TO_NAT(sib_reg2)); + SCRATCH_USAGE(1); } else { ADDW(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg)); + SCRATCH_USAGE(!IS_GPR(ret)); } ZEROUP(ret); - SCRATCH_USAGE(1); } else { ret = TO_NAT(sib_reg2); } @@ -258,7 +297,7 @@ static uintptr_t geted_32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_ } else if ((nextop & 7) == 5) { uint32_t tmp = F32; MOV32w(ret, tmp); - SCRATCH_USAGE(1); + SCRATCH_USAGE(!IS_GPR(ret)); switch (lock) { case 1: addLockAddress(tmp); break; case 2: @@ -291,11 +330,12 @@ static uintptr_t geted_32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_ if (sib >> 6) { SLLI(scratch, TO_NAT(sib_reg), (sib >> 6)); ADDW(ret, scratch, TO_NAT(sib_reg2)); + SCRATCH_USAGE(1); } else { ADDW(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg)); + SCRATCH_USAGE(!IS_GPR(ret)); } ZEROUP(ret); - SCRATCH_USAGE(1); } else { ret = TO_NAT(sib_reg2); } @@ -311,14 +351,17 @@ static uintptr_t geted_32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_ ADDW(scratch, scratch, TO_NAT(sib_reg2)); } else ADDW(scratch, TO_NAT(sib_reg2), TO_NAT(sib_reg)); + SCRATCH_USAGE(1); } else { scratch = TO_NAT(sib_reg2); + SCRATCH_USAGE(!IS_GPR(ret)); } - } else + } else { scratch = TO_NAT(nextop & 0x07); + SCRATCH_USAGE(!IS_GPR(ret)); + } ADDIW(ret, scratch, i32); ZEROUP(ret); - SCRATCH_USAGE(1); } else { // no need to zero up, as we did it below rv64_move32(dyn, ninst, scratch, i32, 0); diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 37e8518c..0fd8cf75 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -1823,6 +1823,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, #define B__safe(a, b, c) XOR(xZR, xZR, xZR) #define B_(a, b, c) XOR(xZR, xZR, xZR) #define S_(a, b, c) XOR(xZR, xZR, xZR) +#define MV_(a, b, c, d) XOR(xZR, xZR, xZR) #define NATIVEJUMP_safe(COND, val) \ B##COND##_safe(dyn->insts[ninst].nat_flags_op1, dyn->insts[ninst].nat_flags_op2, val); @@ -1833,6 +1834,9 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, #define NATIVESET(COND, rd) \ S##COND(rd, dyn->insts[ninst].nat_flags_op1, dyn->insts[ninst].nat_flags_op2); +#define NATIVEMV(COND, rd, rs) \ + MV##COND(rd, rs, dyn->insts[ninst].nat_flags_op1, dyn->insts[ninst].nat_flags_op2); + #define NOTEST(s1) \ if (BOX64ENV(dynarec_test)) { \ SW(xZR, xEmu, offsetof(x64emu_t, test.test)); \ diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index 9dd094f7..27ae0427 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -251,6 +251,48 @@ #define SGTU(rd, rs1, rs2) SLTU(rd, rs2, rs1); #define SLEU(rd, rs1, rs2) SGEU(rd, rs2, rs1); +#define MVEQ(rd, rs1, rs2, rs3) \ + if (rv64_xtheadcondmov && (rs2 == xZR || rs3 == xZR)) { \ + TH_MVEQZ(rd, rs1, ((rs2 == xZR) ? rs3 : rs2)); \ + } else { \ + BNE(rs2, rs3, 8); \ + MV(rd, rs1); \ + } +#define MVNE(rd, rs1, rs2, rs3) \ + if (rv64_xtheadcondmov && (rs2 == xZR || rs3 == xZR)) { \ + TH_MVNEZ(rd, rs1, ((rs2 == xZR) ? rs3 : rs2)); \ + } else { \ + BEQ(rs2, rs3, 8); \ + MV(rd, rs1); \ + } +#define MVLT(rd, rs1, rs2, rs3) \ + BGE(rs2, rs3, 8); \ + MV(rd, rs1); +#define MVGE(rd, rs1, rs2, rs3) \ + BLT(rs2, rs3, 8); \ + MV(rd, rs1); +#define MVLTU(rd, rs1, rs2, rs3) \ + BGEU(rs2, rs3, 8); \ + MV(rd, rs1); +#define MVGEU(rd, rs1, rs2, rs3) \ + BLTU(rs2, rs3, 8); \ + MV(rd, rs1); +#define MVGT(rd, rs1, rs2, rs3) \ + BGEU(rs3, rs2, 8); \ + MV(rd, rs1); +#define MVLE(rd, rs1, rs2, rs3) \ + BLT(rs3, rs2, 8); \ + MV(rd, rs1); +#define MVGTU(rd, rs1, rs2, rs3) \ + BGEU(rs3, rs2, 8); \ + MV(rd, rs1); +#define MVLEU(rd, rs1, rs2, rs3) \ + BLTU(rs3, rs2, 8); \ + MV(rd, rs1); + +#define MVEQZ(rd, rs1, rs2) MVEQ(rd, rs1, rs2, xZR) +#define MVNEZ(rd, rs1, rs2) MVNE(rd, rs1, rs2, xZR) + #define BEQ_safe(rs1, rs2, imm) \ if ((imm) > -0x1000 && (imm) < 0x1000) { \ BEQ(rs1, rs2, imm); \ |