From 64313d5aa6a0bafe84ddda61fe7cd541f81b42e7 Mon Sep 17 00:00:00 2001 From: Yang Liu Date: Mon, 17 Feb 2025 16:16:15 +0800 Subject: [LA64_DYNAREC] Minor optims and fixes to some opcodes (#2371) --- src/dynarec/la64/dynarec_la64_00.c | 59 ++++++++++++++++------ src/dynarec/la64/dynarec_la64_0f.c | 8 +-- src/dynarec/la64/dynarec_la64_64.c | 4 +- src/dynarec/la64/dynarec_la64_emit_tests.c | 80 +++++++++++++++++++++++++++--- src/dynarec/la64/dynarec_la64_helper.h | 4 +- 5 files changed, 127 insertions(+), 28 deletions(-) (limited to 'src') diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c index ed708062..2b6c65ee 100644 --- a/src/dynarec/la64/dynarec_la64_00.c +++ b/src/dynarec/la64/dynarec_la64_00.c @@ -477,10 +477,10 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i64 = F32S; if (i64) { - MOV64xw(x2, i64); + MOV64x(x2, i64); emit_cmp32(dyn, ninst, rex, xRAX, x2, x3, x4, x5, x6); } else - emit_cmp32_0(dyn, ninst, rex, xRAX, x3, x4); + emit_cmp32_0(dyn, ninst, rex, nextop, xRAX, x3, x4, x5); break; case 0x40: case 0x41: @@ -946,15 +946,10 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni else i64 = F8S; if (i64) { - MOV64xw(x2, i64); + MOV64x(x2, i64); emit_cmp32(dyn, ninst, rex, ed, x2, x3, x4, x5, x6); - } else { - if (!rex.w && MODREG) { - ZEROUP2(x1, ed); - ed = x1; - } - emit_cmp32_0(dyn, ninst, rex, ed, x3, x4); - } + } else + emit_cmp32_0(dyn, ninst, rex, nextop, ed, x3, x4, x5); break; } break; @@ -1179,7 +1174,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); - LD_HU(x1, wback, fixedaddress); + LD_HU(x1, ed, fixedaddress); ed = x1; } ST_H(ed, xEmu, offsetof(x64emu_t, segs[u8])); @@ -2194,9 +2189,21 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 6: INST_NAME("SHL Ed, CL"); SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined + if (!dyn->insts[ninst].x64.gen_flags) { + GETED(0); + SLL_D(ed, ed, xRCX); + if (dyn->insts[ninst].nat_flags_fusion) { + if (!rex.w) ZEROUP(ed); + NAT_FLAGS_OPS(ed, xZR); + } else if (!rex.w && MODREG) { + ZEROUP(ed); + } + WBACK; + break; + } ANDI(x3, xRCX, rex.w ? 0x3f : 0x1f); GETED(0); - if (!rex.w && MODREG) { ZEROUP(ed); } + if (!rex.w && MODREG) ZEROUP(ed); CBZ_NEXT(x3); emit_shl32(dyn, ninst, rex, ed, x3, x5, x4, x6); WBACK; @@ -2204,9 +2211,21 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 5: INST_NAME("SHR Ed, CL"); SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined + if (!dyn->insts[ninst].x64.gen_flags) { + GETED(0); + SRL_D(ed, ed, xRCX); + if (dyn->insts[ninst].nat_flags_fusion) { + if (!rex.w) ZEROUP(ed); + NAT_FLAGS_OPS(ed, xZR); + } else if (!rex.w && MODREG) { + ZEROUP(ed); + } + WBACK; + break; + } ANDI(x3, xRCX, rex.w ? 0x3f : 0x1f); GETED(0); - if (!rex.w && MODREG) { ZEROUP(ed); } + if (!rex.w && MODREG) ZEROUP(ed); CBZ_NEXT(x3); emit_shr32(dyn, ninst, rex, ed, x3, x5, x4); WBACK; @@ -2498,10 +2517,18 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 1: INST_NAME("TEST Eb, Ib"); SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); - GETEB(x1, 1); + if (MODREG && (rex.rex || (((nextop & 7) >> 2) == 0))) { + // quick path for low 8bit registers + if (rex.rex) + ed = TO_NAT((nextop & 7) + (rex.b << 3)); + else + ed = TO_NAT(nextop & 3); + } else { + GETEB(x1, 1); + ed = x1; + } u8 = F8; - MOV32w(x2, u8); - emit_test8(dyn, ninst, x1, x2, x3, x4, x5); + emit_test8c(dyn, ninst, ed, u8, x3, x4, x5); break; case 2: INST_NAME("NOT Eb"); diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c index 192c9445..6bb5bb7a 100644 --- a/src/dynarec/la64/dynarec_la64_0f.c +++ b/src/dynarec/la64/dynarec_la64_0f.c @@ -494,7 +494,10 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni else \ B##NO(tmp1, 8); \ } \ - MV(gd, ed); \ + if (rex.w) \ + MV(gd, ed); \ + else \ + ZEROUP2(gd, ed); \ } else { \ addr = geted(dyn, addr, ninst, nextop, &ed, tmp2, tmp3, &fixedaddress, rex, NULL, 1, 0); \ if (dyn->insts[ninst].nat_flags_fusion) { \ @@ -506,8 +509,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni B##NO(tmp1, 8); \ } \ LDxw(gd, ed, fixedaddress); \ - } \ - if (!rex.w) ZEROUP(gd); + } GOCOND(0x40, "CMOV", "Gd, Ed"); diff --git a/src/dynarec/la64/dynarec_la64_64.c b/src/dynarec/la64/dynarec_la64_64.c index 04633316..2af080ef 100644 --- a/src/dynarec/la64/dynarec_la64_64.c +++ b/src/dynarec/la64/dynarec_la64_64.c @@ -323,10 +323,10 @@ uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni else i64 = F8S; if (i64) { - MOV64xw(x2, i64); + MOV64x(x2, i64); emit_cmp32(dyn, ninst, rex, ed, x2, x3, x4, x5, x6); } else - emit_cmp32_0(dyn, ninst, rex, ed, x3, x4); + emit_cmp32_0(dyn, ninst, rex, nextop, ed, x3, x4, x5); break; } break; diff --git a/src/dynarec/la64/dynarec_la64_emit_tests.c b/src/dynarec/la64/dynarec_la64_emit_tests.c index 687a3f80..ea04f36a 100644 --- a/src/dynarec/la64/dynarec_la64_emit_tests.c +++ b/src/dynarec/la64/dynarec_la64_emit_tests.c @@ -354,7 +354,7 @@ void emit_cmp32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s } // emit CMP32 instruction, from cmp s1, 0, using s3 and s4 as scratch -void emit_cmp32_0(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4) +void emit_cmp32_0(dynarec_la64_t* dyn, int ninst, rex_t rex, uint8_t nextop, int s1, int s3, int s4, int s5) { IFX_PENDOR0 { ST_D(s1, xEmu, offsetof(x64emu_t, op1)); @@ -380,10 +380,13 @@ void emit_cmp32_0(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s3, int else { if (dyn->insts[ninst].nat_flags_needsign) { SEXT_W(s3, s1); - } else { + NAT_FLAGS_OPS(s3, xZR); + } else if (MODREG) { ZEROUP2(s3, s1); + NAT_FLAGS_OPS(s3, xZR); + } else { + NAT_FLAGS_OPS(s1, xZR); } - NAT_FLAGS_OPS(s3, xZR); } } return; @@ -399,12 +402,19 @@ void emit_cmp32_0(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s3, int } ORI(xFlags, xFlags, 1 << F_SF); } + int res = s1; + IFX (X_ZF | X_PF) { + if (!rex.w && MODREG) { + ZEROUP2(s5, s1); + res = s5; + } + } IFX(X_ZF) { - BNEZ(s1, 8); + BNEZ(res, 8); ORI(xFlags, xFlags, 1 << F_ZF); } IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); + emit_pf(dyn, ninst, res, s3, s4); } if (dyn->insts[ninst].nat_flags_fusion) { if (rex.w) @@ -412,10 +422,13 @@ void emit_cmp32_0(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s3, int else { if (dyn->insts[ninst].nat_flags_needsign) { SEXT_W(s3, s1); + NAT_FLAGS_OPS(s3, xZR); + } else if (res == s5) { // zero-up'd case + NAT_FLAGS_OPS(res, xZR); } else { ZEROUP2(s3, s1); + NAT_FLAGS_OPS(s3, xZR); } - NAT_FLAGS_OPS(s3, xZR); } } } @@ -474,6 +487,61 @@ void emit_test8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, } } +// emit TEST8 instruction, from test s1, c, using s3, s4 and s5 as scratch +void emit_test8c(dynarec_la64_t* dyn, int ninst, int s1, uint8_t c, int s3, int s4, int s5) +{ + IFX_PENDOR0 { + SET_DF(s3, d_tst8); + } else { + SET_DFNONE(); + } + + NAT_FLAGS_ENABLE_SIGN(); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s3, xZR); + + if (la64_lbt) { + IFX (X_ALL) { + ADDI_D(s3, xZR, c); + X64_AND_B(s1, s3); + } + + if (dyn->insts[ninst].nat_flags_fusion) { + ANDI(s3, s1, c); + if (dyn->insts[ninst].nat_flags_needsign) { + EXT_W_B(s3, s3); + } + } + + IFX_PENDOR0 { + if (!dyn->insts[ninst].nat_flags_fusion) ANDI(s3, s1, c); + ST_B(s3, xEmu, offsetof(x64emu_t, res)); + } + return; + } + + CLEAR_FLAGS(s3); + ANDI(s3, s1, c); // res = s1 & c + + IFX_PENDOR0 { + ST_D(s3, xEmu, offsetof(x64emu_t, res)); + } + IFX (X_SF) { + SRLI_D(s4, s3, 7); + BEQZ(s4, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + IFX (X_ZF) { + BNEZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX (X_PF) { + emit_pf(dyn, ninst, s3, s4, s5); + } + if (dyn->insts[ninst].nat_flags_fusion && dyn->insts[ninst].nat_flags_needsign) { + EXT_W_B(s3, s3); + } +} + // emit TEST16 instruction, from test s1, s2, using s3, s4 and s5 as scratch void emit_test16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) { diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h index fc5ce861..b912fe8a 100644 --- a/src/dynarec/la64/dynarec_la64_helper.h +++ b/src/dynarec/la64/dynarec_la64_helper.h @@ -865,6 +865,7 @@ void* la64_next(x64emu_t* emu, uintptr_t addr); #define emit_cmp8 STEPNAME(emit_cmp8) #define emit_cmp8_0 STEPNAME(emit_cmp8_0) #define emit_test8 STEPNAME(emit_test8) +#define emit_test8c STEPNAME(emit_test8c) #define emit_test16 STEPNAME(emit_test16) #define emit_test32 STEPNAME(emit_test32) #define emit_test32c STEPNAME(emit_test32c) @@ -974,8 +975,9 @@ void emit_cmp16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, void emit_cmp32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5, int s6); void emit_cmp8_0(dynarec_la64_t* dyn, int ninst, int s1, int s3, int s4); void emit_cmp16_0(dynarec_la64_t* dyn, int ninst, int s1, int s3, int s4); -void emit_cmp32_0(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4); +void emit_cmp32_0(dynarec_la64_t* dyn, int ninst, rex_t rex, uint8_t nextop, int s1, int s3, int s4, int s5); void emit_test8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); +void emit_test8c(dynarec_la64_t* dyn, int ninst, int s1, uint8_t c, int s3, int s4, int s5); void emit_test16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); void emit_test32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5); void emit_test32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4, int s5); -- cgit 1.4.1