diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-02-21 18:34:44 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-02-21 11:34:44 +0100 |
| commit | b179bd24154f890fe28e44c94dd667ea99ccbf45 (patch) | |
| tree | df31e9a0d437444f866a259c49318547020ec921 | |
| parent | 64fbd3db46beff7091ede54fa96f245d324c6c9a (diff) | |
| download | box64-b179bd24154f890fe28e44c94dd667ea99ccbf45.tar.gz box64-b179bd24154f890fe28e44c94dd667ea99ccbf45.zip | |
[DYNAREC_RV64] Minor OF flag fixes and optimizations for emit shift utils (#1275)
* Minor OF flag fixes and optimizations for emit shift utils * original operand
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f.c | 6 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_emit_shift.c | 201 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 11 |
3 files changed, 118 insertions, 100 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index 3cd3e92f..f048936c 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -1211,7 +1211,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETED(1); GETGD; u8 = F8; - emit_shld32c(dyn, ninst, rex, ed, gd, u8, x3, x4, x5); + emit_shld32c(dyn, ninst, rex, ed, gd, u8, x3, x4); WBACK; break; case 0xA5: @@ -1225,7 +1225,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if(!rex.w && !rex.is32bits && MODREG) { ZEROUP(ed); } ANDI(x3, xRCX, rex.w ? 0x3f : 0x1f); BEQ_NEXT(x3, xZR); - emit_shld32(dyn, ninst, rex, ed, gd, x3, x4, x5); + emit_shld32(dyn, ninst, rex, ed, gd, x3, x4, x5, x6); WBACK; break; case 0xAB: @@ -1280,7 +1280,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (!rex.w && !rex.is32bits && MODREG) { ZEROUP(ed); } ANDI(x3, xRCX, rex.w ? 0x3f : 0x1f); BEQ_NEXT(x3, xZR); - emit_shrd32(dyn, ninst, rex, ed, gd, x3, x5, x4); + emit_shrd32(dyn, ninst, rex, ed, gd, x3, x5, x4, x6); WBACK; break; case 0xAE: diff --git a/src/dynarec/rv64/dynarec_rv64_emit_shift.c b/src/dynarec/rv64/dynarec_rv64_emit_shift.c index 23bf1097..9008ca19 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_shift.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_shift.c @@ -38,7 +38,7 @@ void emit_shl32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SUBI(s5, s2, rex.w?64:32); NEG(s5, s5); SRL(s3, s1, s5); - ANDI(s5, s3, 1); // F_CF + ANDI(s5, s3, 1); // LSB == F_CF IFX(X_CF) { OR(xFlags, xFlags, s5); } @@ -61,10 +61,13 @@ void emit_shl32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s ORI(xFlags, xFlags, 1 << F_ZF); } IFX(X_OF) { + // OF flag is affected only on 1-bit shifts + ADDI(s3, s2, -1); + BNEZ(s3, 4 + 4 * 4); SRLIxw(s3, s1, rex.w?63:31); XOR(s3, s3, s5); - BEQZ(s3, 8); - ORI(xFlags, xFlags, 1 << F_OF2); + SLLI(s3, s3, F_OF2); + OR(xFlags, xFlags, s3); } IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); @@ -94,12 +97,10 @@ void emit_shl32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, IFX(X_CF|X_OF) { if (c > 0) { SRLI(s3, s1, (rex.w?64:32)-c); - ANDI(s5, s3, 1); // F_CF + ANDI(s5, s3, 1); // LSB == F_CF IFX(X_CF) { OR(xFlags, xFlags, s5); } - } else { - IFX(X_OF) MOV64x(s5, 0); } } @@ -120,10 +121,13 @@ void emit_shl32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, ORI(xFlags, xFlags, 1 << F_ZF); } IFX(X_OF) { - SRLIxw(s3, s1, rex.w?63:31); - XOR(s3, s3, s5); - BEQZ(s3, 8); - ORI(xFlags, xFlags, 1 << F_OF2); + // OF flag is affected only on 1-bit shifts + if (c == 1) { + SRLIxw(s3, s1, rex.w?63:31); + XOR(s3, s3, s5); + SLLI(s3, s3, F_OF2); + OR(xFlags, xFlags, s3); + } } IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); @@ -151,6 +155,15 @@ void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s ANDI(s3, s3, 1); // LSB == F_CF OR(xFlags, xFlags, s3); } + IFX(X_OF) { + // OF flag is affected only on 1-bit shifts + // OF flag is set to the most-significant bit of the original operand + ADDI(s3, xZR, 1); + BEQ(s2, s3, 4+4*4); + SRLIxw(s3, s1, rex.w?63:31); + SLLI(s3, s3, F_OF2); + OR(xFlags, xFlags, s3); + } SRL(s1, s1, s2); @@ -168,16 +181,6 @@ void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } - IFX(X_OF) { - ADDI(s3, xZR, 1); - BEQ(s2, s3, 4+6*4); - SRLI(s3, s1, rex.w?62:30); - SRLI(s4, s1, rex.w?63:31); - XOR(s3, s3, s4); - ANDI(s3, s3, 1); - BEQZ(s3, 8); - ORI(xFlags, xFlags, 1 << F_OF2); - } IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } @@ -214,6 +217,15 @@ void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, } OR(xFlags, xFlags, s3); } + IFX(X_OF) { + // OF flag is affected only on 1-bit shifts + // OF flag is set to the most-significant bit of the original operand + if(c==1) { + SRLIxw(s3, s1, rex.w?63:31); + SLLI(s3, s3, F_OF2); + OR(xFlags, xFlags, s3); + } + } SRLIxw(s1, s1, c); @@ -231,16 +243,6 @@ void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } - IFX(X_OF) { - if(c==1) { - SRLI(s3, s1, rex.w?62:30); - SRLI(s4, s1, rex.w?63:31); - XOR(s3, s3, s4); - ANDI(s3, s3, 1); - BEQZ(s3, 8); - ORI(xFlags, xFlags, 1 << F_OF2); - } - } IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } @@ -332,16 +334,16 @@ void emit_rol32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } - IFX(X_CF) { - ANDI(s4, s1, 1<<F_CF); - OR(xFlags, xFlags, s4); + IFX(X_CF | X_OF) { + ANDI(s4, s1, 1); // LSB == F_CF + IFX(X_CF) OR(xFlags, xFlags, s4); } IFX(X_OF) { + // the OF flag is set to the exclusive OR of the CF bit (after the rotate) and the most-significant bit of the result. ADDI(s3, xZR, 1); - BEQ_NEXT(s2, s3); + BNE_NEXT(s2, s3); SRLIxw(s3, s1, rex.w?63:31); - XOR(s3, s3, s1); - ANDI(s3, s3, 1); + XOR(s3, s3, s4); // s3: MSB, s4: CF bit SLLI(s3, s3, F_OF2); OR(xFlags, xFlags, s3); } @@ -384,10 +386,12 @@ void emit_ror32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s OR(xFlags, xFlags, s3); } IFX(X_OF) { + // the OF flag is set to the exclusive OR of the two most-significant bits of the result ADDI(s3, xZR, 1); - BEQ_NEXT(s2, s3); + BNE_NEXT(s2, s3); SRLIxw(s3, s1, rex.w?63:31); - XOR(s3, s3, s1); + SRLIxw(s4, s1, rex.w?62:30); + XOR(s3, s3, s4); ANDI(s3, s3, 1); SLLI(s3, s3, F_OF2); OR(xFlags, xFlags, s3); @@ -426,15 +430,15 @@ void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } - IFX(X_CF) { + IFX(X_CF | X_OF) { ANDI(s4, s1, 1<<F_CF); - OR(xFlags, xFlags, s4); + IFX(X_CF) OR(xFlags, xFlags, s4); } IFX(X_OF) { + // the OF flag is set to the exclusive OR of the CF bit (after the rotate) and the most-significant bit of the result. if(c==1) { SRLIxw(s3, s1, rex.w?63:31); - XOR(s3, s3, s1); - ANDI(s3, s3, 1); + XOR(s3, s3, s4); SLLI(s3, s3, F_OF2); OR(xFlags, xFlags, s3); } @@ -478,6 +482,7 @@ void emit_ror32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, OR(xFlags, xFlags, s3); } IFX(X_OF) { + // the OF flag is set to the exclusive OR of the two most-significant bits of the result if(c==1) { SRLI(s3, s1, rex.w?62:30); SRLI(s4, s3, 1); @@ -520,6 +525,10 @@ void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin } OR(xFlags, xFlags, s3); } + IFX(X_OF) { + // Store sign for later use. + if (c == 1) SRLIxw(s4, s1, rex.w?63:31); + } SRLIxw(s3, s1, c); SLLIxw(s1, s2, (rex.w?64:32)-c); @@ -540,13 +549,12 @@ void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin ORI(xFlags, xFlags, 1 << F_ZF); } IFX(X_OF) { + // the OF flag is set if a sign change occurred if(c==1) { - SRLI(s3, s1, rex.w?62:30); - SRLI(s4, s1, rex.w?63:31); + SRLI(s3, s1, rex.w?63:31); XOR(s3, s3, s4); - ANDI(s3, s3, 1); - BEQZ(s3, 8); - ORI(xFlags, xFlags, 1 << F_OF2); + SLLI(s3, s3, F_OF2); + OR(xFlags, xFlags, s3); } } IFX(X_PF) { @@ -585,6 +593,10 @@ void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin } OR(xFlags, xFlags, s3); } + IFX(X_OF) { + // Store sign for later use. + if (c == 1) SRLI(s4, s1, 15); + } SRLIxw(s3, s1, c); SLLIxw(s1, s2, 16-c); @@ -592,8 +604,8 @@ void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin ZEXTH(s1, s1); IFX(X_SF) { - SLLIW(s4, s1, 16); - BGE(s4, xZR, 8); + SLLIW(s3, s1, 16); + BGE(s3, xZR, 8); ORI(xFlags, xFlags, 1 << F_SF); } IFX(X_PEND) { @@ -604,13 +616,13 @@ void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin ORI(xFlags, xFlags, 1 << F_ZF); } IFX(X_OF) { + // the OF flag is set if a sign change occurred if(c==1) { - SRLI(s3, s1, 14); - SRLI(s4, s1, 15); + SRLI(s3, s1, 15); XOR(s3, s3, s4); ANDI(s3, s3, 1); - BEQZ(s3, 8); - ORI(xFlags, xFlags, 1 << F_OF2); + SLLI(s3, s3, F_OF2); + OR(xFlags, xFlags, s3); } } IFX(X_PF) { @@ -618,7 +630,7 @@ void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin } } -void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5) +void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4) { c&=(rex.w?0x3f:0x1f); CLEAR_FLAGS(); @@ -639,17 +651,17 @@ void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin } return; } - IFX(X_CF|X_OF) { + IFX(X_CF) { if (c > 0) { SRLI(s3, s1, (rex.w?64:32)-c); - ANDI(s5, s3, 1); // F_CF - IFX(X_CF) { - OR(xFlags, xFlags, s5); - } - } else { - IFX(X_OF) MOV64x(s5, 0); + ANDI(s4, s3, 1); // F_CF + OR(xFlags, xFlags, s4); } } + IFX(X_OF) { + // Store sign for later use. + if (c == 1) SRLIxw(s4, s1, rex.w?63:31); + } SLLIxw(s3, s1, c); SRLIxw(s1, s2, (rex.w?64:32)-c); @@ -670,10 +682,13 @@ void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin ORI(xFlags, xFlags, 1 << F_ZF); } IFX(X_OF) { - SRLIxw(s3, s1, rex.w?63:31); - XOR(s3, s3, s5); - BEQZ(s3, 8); - ORI(xFlags, xFlags, 1 << F_OF2); + // the OF flag is set if a sign change occurred + if (c == 1) { + SRLIxw(s3, s1, rex.w?63:31); + XOR(s3, s3, s4); + SLLI(s3, s3, F_OF2); + ORI(xFlags, xFlags, s3); + } } IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); @@ -681,7 +696,7 @@ void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin } -void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4) +void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4, int s6) { int64_t j64; CLEAR_FLAGS(); @@ -699,9 +714,8 @@ void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int OR(xFlags, xFlags, s3); } IFX(X_OF) { - SRLxw(s4, s1, rex.w ? 63 : 31); - BEQZ(s4, 8); - ORI(xFlags, xFlags, 1 << F_OF2); + // Store current sign for later use. + SRLxw(s6, s1, rex.w ? 63 : 31); } ADDI(s4, xZR, (rex.w ? 64 : 32)); SUB(s4, s4, s5); @@ -727,11 +741,9 @@ void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int ADDI(s5, s5, -1); BNEZ_MARK(s5); SRLIxw(s3, s1, rex.w?63:31); - BEXTI(s4, xFlags, F_OF2); - XOR(s3, s3, s4); - ANDI(xFlags, xFlags, ~(1<<F_OF2)); - BEQZ(s3, 8); - ORI(xFlags, xFlags, 1 << F_OF2); + XOR(s3, s3, s6); + SLLI(s3, s3, F_OF2); + OR(xFlags, xFlags, s3); MARK; } IFX(X_PF) { @@ -739,7 +751,7 @@ void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int } } -void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4) +void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4, int s6) { int64_t j64; CLEAR_FLAGS(); @@ -758,9 +770,8 @@ void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int OR(xFlags, xFlags, s4); } IFX(X_OF) { - SRLxw(s4, s1, rex.w?63:31); - BEQZ(s4, 8); - ORI(xFlags, xFlags, 1 << F_OF2); + // Store current sign for later use. + SRLxw(s6, s1, rex.w ? 63 : 31); } SLLxw(s4, s1, s5); SRLxw(s3, s2, s3); @@ -784,11 +795,9 @@ void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int ADDI(s5, s5, -1); BNEZ_MARK(s5); SRLIxw(s3, s1, rex.w?63:31); - BEXTI(s4, xFlags, F_OF2); - XOR(s3, s3, s4); - ANDI(xFlags, xFlags, ~(1<<F_OF2)); - BEQZ(s3, 8); - ORI(xFlags, xFlags, 1 << F_OF2); + XOR(s3, s3, s6); + SLLI(s3, s3, F_OF2); + OR(xFlags, xFlags, s3); MARK; } IFX(X_PF) { @@ -817,17 +826,17 @@ void emit_shld16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin } return; } - IFX(X_CF|X_OF) { + IFX(X_CF) { if (c > 0) { SRLI(s3, s1, 16-c); - ANDI(s5, s3, 1); // F_CF - IFX(X_CF) { - OR(xFlags, xFlags, s5); - } - } else { - IFX(X_OF) MOV64x(s5, 0); + ANDI(s5, s3, 1); // LSB == F_CF + OR(xFlags, xFlags, s5); } } + IFX(X_OF) { + // Store sign for later use. + if (c == 1) SRLI(s5, s1, 15); + } SLLIxw(s3, s1, c); SRLIxw(s1, s2, 16-c); @@ -847,10 +856,14 @@ void emit_shld16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin ORI(xFlags, xFlags, 1 << F_ZF); } IFX(X_OF) { - SRLIxw(s3, s1, 15); - XOR(s3, s3, s5); - BEQZ(s3, 8); - ORI(xFlags, xFlags, 1 << F_OF2); + // the OF flag is set if a sign change occurred + if(c==1) { + SRLI(s3, s1, 15); + XOR(s3, s3, s5); + ANDI(s3, s3, 1); + SLLI(s3, s3, F_OF2); + OR(xFlags, xFlags, s3); + } } IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index e1048260..0a1dcfa6 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -734,6 +734,11 @@ j64 = (dyn->insts) ? (dyn->insts[ninst].epilog - (dyn->native_size)) : 0; \ BEQ(reg1, reg2, j64) +// Branch to NEXT if reg1!=reg2 (use j64) +#define BNE_NEXT(reg1, reg2) \ + j64 = (dyn->insts) ? (dyn->insts[ninst].epilog - (dyn->native_size)) : 0; \ + BNE(reg1, reg2, j64) + // Branch to NEXT if reg1==0 (use j64) #define CBZ_NEXT(reg1) \ j64 = (dyn->insts) ? (dyn->insts[ninst].epilog - (dyn->native_size)) : 0; \ @@ -1302,9 +1307,9 @@ void emit_ror32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); void emit_ror32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4); -void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5); -void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4); -void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4); +void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4); +void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4, int s6); +void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4, int s6); void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4); void emit_shld16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5); |