From 2798c3c8cde74ee98b7e5144303905eee0c638ae Mon Sep 17 00:00:00 2001 From: Yang Liu Date: Fri, 16 May 2025 20:12:56 +0800 Subject: [RV64_DYNAREC] Enabled native flags optimization for SETcc opcodes (#2640) --- src/dynarec/rv64/dynarec_rv64_0f.c | 60 ++++++++++++++++++---------------- src/dynarec/rv64/dynarec_rv64_helper.h | 4 +++ src/dynarec/rv64/dynarec_rv64_pass0.h | 2 +- src/dynarec/rv64/rv64_emitter.h | 51 +++++++++++++++++++++++++++++ 4 files changed, 87 insertions(+), 30 deletions(-) (limited to 'src') diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index 5627707b..ae66e1b2 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -1772,35 +1772,37 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GOCOND(0x80, "J", "Id"); #undef GO -#define GO(GETFLAGS, NO, YES, NATNO, NATYES, F) \ - READFLAGS(F); \ - tmp1 = x1; \ - tmp3 = x3; \ - GETFLAGS; \ - nextop = F8; \ - S##YES(x3, x1); \ - if (MODREG) { \ - if (rex.rex) { \ - eb1 = TO_NAT((nextop & 7) + (rex.b << 3)); \ - eb2 = 0; \ - } else { \ - ed = (nextop & 7); \ - eb2 = (ed >> 2) * 8; \ - eb1 = TO_NAT(ed & 3); \ - } \ - if (eb2) { \ - LUI(x1, 0xffff0); \ - ORI(x1, x1, 0xff); \ - AND(eb1, eb1, x1); \ - SLLI(x3, x3, 8); \ - } else { \ - ANDI(eb1, eb1, 0xf00); \ - } \ - OR(eb1, eb1, x3); \ - } else { \ - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); \ - SB(x3, ed, fixedaddress); \ - SMWRITE(); \ +#define GO(GETFLAGS, NO, YES, NATNO, NATYES, F) \ + READFLAGS_FUSION(F, x1, x2, x3, x4, x5); \ + if (!dyn->insts[ninst].nat_flags_fusion) { GETFLAGS; } \ + nextop = F8; \ + if (dyn->insts[ninst].nat_flags_fusion) { \ + NATIVESET(NATYES, tmp3); \ + } else { \ + S##YES(tmp3, tmp1); \ + } \ + if (MODREG) { \ + if (rex.rex) { \ + eb1 = TO_NAT((nextop & 7) + (rex.b << 3)); \ + eb2 = 0; \ + } else { \ + ed = (nextop & 7); \ + eb2 = (ed >> 2) * 8; \ + eb1 = TO_NAT(ed & 3); \ + } \ + if (eb2) { \ + LUI(tmp1, 0xffff0); \ + ORI(tmp1, tmp1, 0xff); \ + AND(eb1, eb1, tmp1); \ + SLLI(tmp3, tmp3, 8); \ + } else { \ + ANDI(eb1, eb1, 0xf00); \ + } \ + OR(eb1, eb1, tmp3); \ + } else { \ + addr = geted(dyn, addr, ninst, nextop, &ed, tmp2, tmp1, &fixedaddress, rex, NULL, 1, 0); \ + SB(tmp3, ed, fixedaddress); \ + SMWRITE(); \ } GOCOND(0x90, "SET", "Eb"); #undef GO diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 900245fb..5eb8ac69 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -1814,6 +1814,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, // Dummy macros #define B__safe(a, b, c) XOR(xZR, xZR, xZR) #define B_(a, b, c) XOR(xZR, xZR, xZR) +#define S_(a, b, c) XOR(xZR, xZR, xZR) #define NATIVEJUMP_safe(COND, val) \ B##COND##_safe(dyn->insts[ninst].nat_flags_op1, dyn->insts[ninst].nat_flags_op2, val); @@ -1821,6 +1822,9 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, #define NATIVEJUMP(COND, val) \ B##COND(dyn->insts[ninst].nat_flags_op1, dyn->insts[ninst].nat_flags_op2, val); +#define NATIVESET(COND, rd) \ + S##COND(rd, dyn->insts[ninst].nat_flags_op1, dyn->insts[ninst].nat_flags_op2); + #define NOTEST(s1) \ if (BOX64ENV(dynarec_test)) { \ SW(xZR, xEmu, offsetof(x64emu_t, test.test)); \ diff --git a/src/dynarec/rv64/dynarec_rv64_pass0.h b/src/dynarec/rv64/dynarec_rv64_pass0.h index ed213560..416e8bb0 100644 --- a/src/dynarec/rv64/dynarec_rv64_pass0.h +++ b/src/dynarec/rv64/dynarec_rv64_pass0.h @@ -15,7 +15,7 @@ dyn->f.pending = SF_SET #define READFLAGS_FUSION(A, s1, s2, s3, s4, s5) \ - if (BOX64ENV(dynarec_nativeflags) && ninst > 0 && !dyn->insts[ninst - 1].nat_flags_nofusion) { \ + if (BOX64ENV(dynarec_nativeflags) && ninst > 0 && !dyn->insts[ninst - 1].nat_flags_nofusion) { \ if ((A) == (X_ZF)) \ dyn->insts[ninst].nat_flags_fusion = 1; \ else if (dyn->insts[ninst - 1].nat_flags_carry && ((A) == (X_CF) || (A) == (X_CF | X_ZF))) \ diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index e274d9af..c1dc6fc7 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -200,6 +200,57 @@ #define BGTU(rs1, rs2, imm13) BLTU(rs2, rs1, imm13) #define BLEU(rs1, rs2, imm13) BGEU(rs2, rs1, imm13) +#define SEQ(rd, rs1, rs2) \ + do { \ + if (rs1 == xZR) { \ + SEQZ(rd, rs2); \ + } else if (rs2 == xZR) { \ + SEQZ(rd, rs1); \ + } else { \ + XOR(rd, rs1, rs2); \ + SEQZ(rd, rd); \ + } \ + } while (0) + +#define SNE(rd, rs1, rs2) \ + do { \ + if (rs1 == xZR) { \ + SNEZ(rd, rs2); \ + } else if (rs2 == xZR) { \ + SNEZ(rd, rs1); \ + } else { \ + XOR(rd, rs1, rs2); \ + SNEZ(rd, rd); \ + } \ + } while (0) + +#define SGE(rd, rs1, rs2) \ + do { \ + if (rs1 == xZR) { \ + SLTI(rd, rs2, 1); \ + } else { \ + SLT(rd, rs1, rs2); \ + XORI(rd, rd, 1); \ + } \ + } while (0) + +#define SGEU(rd, rs1, rs2) \ + do { \ + if (rs1 == xZR) { \ + SEQZ(rd, rs2); \ + } else if (rs2 == xZR) { \ + ADDI(rd, xZR, 1); \ + } else { \ + SLTU(rd, rs1, rs2); \ + XORI(rd, rd, 1); \ + } \ + } while (0) + +#define SGT(rd, rs1, rs2) SLT(rd, rs2, rs1); +#define SLE(rd, rs1, rs2) SGE(rd, rs2, rs1); +#define SGTU(rd, rs1, rs2) SLTU(rd, rs2, rs1); +#define SLEU(rd, rs1, rs2) SGEU(rd, rs2, rs1); + #define BEQ_safe(rs1, rs2, imm) \ if ((imm) > -0x1000 && (imm) < 0x1000) { \ BEQ(rs1, rs2, imm); \ -- cgit 1.4.1