From b179bd24154f890fe28e44c94dd667ea99ccbf45 Mon Sep 17 00:00:00 2001 From: Yang Liu Date: Wed, 21 Feb 2024 18:34:44 +0800 Subject: [DYNAREC_RV64] Minor OF flag fixes and optimizations for emit shift utils (#1275) * Minor OF flag fixes and optimizations for emit shift utils * original operand --- src/dynarec/rv64/dynarec_rv64_0f.c | 6 +- src/dynarec/rv64/dynarec_rv64_emit_shift.c | 201 +++++++++++++++-------------- src/dynarec/rv64/dynarec_rv64_helper.h | 11 +- 3 files changed, 118 insertions(+), 100 deletions(-) (limited to 'src') diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index 3cd3e92f..f048936c 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -1211,7 +1211,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETED(1); GETGD; u8 = F8; - emit_shld32c(dyn, ninst, rex, ed, gd, u8, x3, x4, x5); + emit_shld32c(dyn, ninst, rex, ed, gd, u8, x3, x4); WBACK; break; case 0xA5: @@ -1225,7 +1225,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if(!rex.w && !rex.is32bits && MODREG) { ZEROUP(ed); } ANDI(x3, xRCX, rex.w ? 0x3f : 0x1f); BEQ_NEXT(x3, xZR); - emit_shld32(dyn, ninst, rex, ed, gd, x3, x4, x5); + emit_shld32(dyn, ninst, rex, ed, gd, x3, x4, x5, x6); WBACK; break; case 0xAB: @@ -1280,7 +1280,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (!rex.w && !rex.is32bits && MODREG) { ZEROUP(ed); } ANDI(x3, xRCX, rex.w ? 0x3f : 0x1f); BEQ_NEXT(x3, xZR); - emit_shrd32(dyn, ninst, rex, ed, gd, x3, x5, x4); + emit_shrd32(dyn, ninst, rex, ed, gd, x3, x5, x4, x6); WBACK; break; case 0xAE: diff --git a/src/dynarec/rv64/dynarec_rv64_emit_shift.c b/src/dynarec/rv64/dynarec_rv64_emit_shift.c index 23bf1097..9008ca19 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_shift.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_shift.c @@ -38,7 +38,7 @@ void emit_shl32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SUBI(s5, s2, rex.w?64:32); NEG(s5, s5); SRL(s3, s1, s5); - ANDI(s5, s3, 1); // F_CF + ANDI(s5, s3, 1); // LSB == F_CF IFX(X_CF) { OR(xFlags, xFlags, s5); } @@ -61,10 +61,13 @@ void emit_shl32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s ORI(xFlags, xFlags, 1 << F_ZF); } IFX(X_OF) { + // OF flag is affected only on 1-bit shifts + ADDI(s3, s2, -1); + BNEZ(s3, 4 + 4 * 4); SRLIxw(s3, s1, rex.w?63:31); XOR(s3, s3, s5); - BEQZ(s3, 8); - ORI(xFlags, xFlags, 1 << F_OF2); + SLLI(s3, s3, F_OF2); + OR(xFlags, xFlags, s3); } IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); @@ -94,12 +97,10 @@ void emit_shl32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, IFX(X_CF|X_OF) { if (c > 0) { SRLI(s3, s1, (rex.w?64:32)-c); - ANDI(s5, s3, 1); // F_CF + ANDI(s5, s3, 1); // LSB == F_CF IFX(X_CF) { OR(xFlags, xFlags, s5); } - } else { - IFX(X_OF) MOV64x(s5, 0); } } @@ -120,10 +121,13 @@ void emit_shl32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, ORI(xFlags, xFlags, 1 << F_ZF); } IFX(X_OF) { - SRLIxw(s3, s1, rex.w?63:31); - XOR(s3, s3, s5); - BEQZ(s3, 8); - ORI(xFlags, xFlags, 1 << F_OF2); + // OF flag is affected only on 1-bit shifts + if (c == 1) { + SRLIxw(s3, s1, rex.w?63:31); + XOR(s3, s3, s5); + SLLI(s3, s3, F_OF2); + OR(xFlags, xFlags, s3); + } } IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); @@ -151,6 +155,15 @@ void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s ANDI(s3, s3, 1); // LSB == F_CF OR(xFlags, xFlags, s3); } + IFX(X_OF) { + // OF flag is affected only on 1-bit shifts + // OF flag is set to the most-significant bit of the original operand + ADDI(s3, xZR, 1); + BEQ(s2, s3, 4+4*4); + SRLIxw(s3, s1, rex.w?63:31); + SLLI(s3, s3, F_OF2); + OR(xFlags, xFlags, s3); + } SRL(s1, s1, s2); @@ -168,16 +181,6 @@ void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } - IFX(X_OF) { - ADDI(s3, xZR, 1); - BEQ(s2, s3, 4+6*4); - SRLI(s3, s1, rex.w?62:30); - SRLI(s4, s1, rex.w?63:31); - XOR(s3, s3, s4); - ANDI(s3, s3, 1); - BEQZ(s3, 8); - ORI(xFlags, xFlags, 1 << F_OF2); - } IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } @@ -214,6 +217,15 @@ void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, } OR(xFlags, xFlags, s3); } + IFX(X_OF) { + // OF flag is affected only on 1-bit shifts + // OF flag is set to the most-significant bit of the original operand + if(c==1) { + SRLIxw(s3, s1, rex.w?63:31); + SLLI(s3, s3, F_OF2); + OR(xFlags, xFlags, s3); + } + } SRLIxw(s1, s1, c); @@ -231,16 +243,6 @@ void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } - IFX(X_OF) { - if(c==1) { - SRLI(s3, s1, rex.w?62:30); - SRLI(s4, s1, rex.w?63:31); - XOR(s3, s3, s4); - ANDI(s3, s3, 1); - BEQZ(s3, 8); - ORI(xFlags, xFlags, 1 << F_OF2); - } - } IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } @@ -332,16 +334,16 @@ void emit_rol32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } - IFX(X_CF) { - ANDI(s4, s1, 1< 0) { SRLI(s3, s1, (rex.w?64:32)-c); - ANDI(s5, s3, 1); // F_CF - IFX(X_CF) { - OR(xFlags, xFlags, s5); - } - } else { - IFX(X_OF) MOV64x(s5, 0); + ANDI(s4, s3, 1); // F_CF + OR(xFlags, xFlags, s4); } } + IFX(X_OF) { + // Store sign for later use. + if (c == 1) SRLIxw(s4, s1, rex.w?63:31); + } SLLIxw(s3, s1, c); SRLIxw(s1, s2, (rex.w?64:32)-c); @@ -670,10 +682,13 @@ void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin ORI(xFlags, xFlags, 1 << F_ZF); } IFX(X_OF) { - SRLIxw(s3, s1, rex.w?63:31); - XOR(s3, s3, s5); - BEQZ(s3, 8); - ORI(xFlags, xFlags, 1 << F_OF2); + // the OF flag is set if a sign change occurred + if (c == 1) { + SRLIxw(s3, s1, rex.w?63:31); + XOR(s3, s3, s4); + SLLI(s3, s3, F_OF2); + ORI(xFlags, xFlags, s3); + } } IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); @@ -681,7 +696,7 @@ void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin } -void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4) +void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4, int s6) { int64_t j64; CLEAR_FLAGS(); @@ -699,9 +714,8 @@ void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int OR(xFlags, xFlags, s3); } IFX(X_OF) { - SRLxw(s4, s1, rex.w ? 63 : 31); - BEQZ(s4, 8); - ORI(xFlags, xFlags, 1 << F_OF2); + // Store current sign for later use. + SRLxw(s6, s1, rex.w ? 63 : 31); } ADDI(s4, xZR, (rex.w ? 64 : 32)); SUB(s4, s4, s5); @@ -727,11 +741,9 @@ void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int ADDI(s5, s5, -1); BNEZ_MARK(s5); SRLIxw(s3, s1, rex.w?63:31); - BEXTI(s4, xFlags, F_OF2); - XOR(s3, s3, s4); - ANDI(xFlags, xFlags, ~(1< 0) { SRLI(s3, s1, 16-c); - ANDI(s5, s3, 1); // F_CF - IFX(X_CF) { - OR(xFlags, xFlags, s5); - } - } else { - IFX(X_OF) MOV64x(s5, 0); + ANDI(s5, s3, 1); // LSB == F_CF + OR(xFlags, xFlags, s5); } } + IFX(X_OF) { + // Store sign for later use. + if (c == 1) SRLI(s5, s1, 15); + } SLLIxw(s3, s1, c); SRLIxw(s1, s2, 16-c); @@ -847,10 +856,14 @@ void emit_shld16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin ORI(xFlags, xFlags, 1 << F_ZF); } IFX(X_OF) { - SRLIxw(s3, s1, 15); - XOR(s3, s3, s5); - BEQZ(s3, 8); - ORI(xFlags, xFlags, 1 << F_OF2); + // the OF flag is set if a sign change occurred + if(c==1) { + SRLI(s3, s1, 15); + XOR(s3, s3, s5); + ANDI(s3, s3, 1); + SLLI(s3, s3, F_OF2); + OR(xFlags, xFlags, s3); + } } IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index e1048260..0a1dcfa6 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -734,6 +734,11 @@ j64 = (dyn->insts) ? (dyn->insts[ninst].epilog - (dyn->native_size)) : 0; \ BEQ(reg1, reg2, j64) +// Branch to NEXT if reg1!=reg2 (use j64) +#define BNE_NEXT(reg1, reg2) \ + j64 = (dyn->insts) ? (dyn->insts[ninst].epilog - (dyn->native_size)) : 0; \ + BNE(reg1, reg2, j64) + // Branch to NEXT if reg1==0 (use j64) #define CBZ_NEXT(reg1) \ j64 = (dyn->insts) ? (dyn->insts[ninst].epilog - (dyn->native_size)) : 0; \ @@ -1302,9 +1307,9 @@ void emit_ror32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); void emit_ror32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4); -void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5); -void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4); -void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4); +void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4); +void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4, int s6); +void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4, int s6); void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4); void emit_shld16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5); -- cgit 1.4.1