diff options
| author | xctan <xctan@cirno.icu> | 2024-04-07 16:39:58 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-04-07 10:39:58 +0200 |
| commit | 1ca68a58fde0ac8b7389379f0862fb69a86ba2e4 (patch) | |
| tree | 5b0674adb4e623f66cc6253cfb7bc46cfd8d79d3 /src | |
| parent | b96139274fcb83be3e9085a1a06084364c938bc5 (diff) | |
| download | box64-1ca68a58fde0ac8b7389379f0862fb69a86ba2e4.tar.gz box64-1ca68a58fde0ac8b7389379f0862fb69a86ba2e4.zip | |
[RV64_DYNAREC] Fixed various bugs in shift instructions (#1426)
* [RV64_DYNAREC] Optimized 8-bit constant shifts * [RV64_DYNAREC] Fixed shl8c when c > 8 * [RV64_DYNAREC] Optimized 16-bit constant shifts * [RV64_DYNAREC] Optimized 8-bit CL shifts * [RV64_DYNAREC] Fixed SF generation of 32-bit SHL Ed, CL * [RV64_DYNAREC] Optimized 16-bit CL shifts * [RV64_DYNAREC] Fixed typo in 8-bit CL SHL and SHR * [RV64_DYNAREC] Fixed the wrong mask in 8-bit SHL Eb, CL * [RV64_DYNAREC] Fixed typo in SAR Ew, CL
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_00_3.c | 143 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_66.c | 109 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_emit_shift.c | 634 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 24 |
4 files changed, 764 insertions, 146 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00_3.c b/src/dynarec/rv64/dynarec_rv64_00_3.c index a3a6680e..df22e054 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_3.c +++ b/src/dynarec/rv64/dynarec_rv64_00_3.c @@ -99,60 +99,44 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 4: case 6: INST_NAME("SHL Eb, Ib"); - GETEB(x1, 1); - u8 = (F8)&0x1f; - if(u8) { - SETFLAGS(X_ALL, SF_PENDING); - UFLAG_IF{ - MOV32w(x4, u8); UFLAG_OP2(x4); - }; - UFLAG_OP1(ed); - SLLIW(ed, ed, u8); - EBBACK(x5, 1); - UFLAG_RES(ed); - UFLAG_DF(x3, d_shl8); + u8 = geted_ib(dyn, addr, ninst, nextop) & 0x1f; + if (u8) { + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEB(x1, 1); + u8 = (F8) & 0x1f; + emit_shl8c(dyn, ninst, ed, u8, x4, x5, x6); + EBBACK(x5, 0); } else { - NOP(); + FAKEED; + F8; } break; case 5: INST_NAME("SHR Eb, Ib"); - GETEB(x1, 1); - u8 = (F8)&0x1f; - if(u8) { - SETFLAGS(X_ALL, SF_PENDING); - UFLAG_IF{ - MOV32w(x4, u8); UFLAG_OP2(x4); - }; - UFLAG_OP1(ed); - if(u8) { - SRLIW(ed, ed, u8); - EBBACK(x5, 1); - } - UFLAG_RES(ed); - UFLAG_DF(x3, d_shr8); + u8 = geted_ib(dyn, addr, ninst, nextop) & 0x1f; + if (u8) { + SETFLAGS(X_ALL, SF_SET_PENDING); + GETEB(x1, 1); + u8 = (F8) & 0x1f; + emit_shr8c(dyn, ninst, ed, u8, x4, x5, x6); + EBBACK(x5, 0); } else { - NOP(); + FAKEED; + F8; } break; case 7: INST_NAME("SAR Eb, Ib"); - GETSEB(x1, 1); - u8 = (F8)&0x1f; - if(u8) { - SETFLAGS(X_ALL, SF_PENDING); - UFLAG_IF{ - MOV32w(x4, u8); UFLAG_OP2(x4); - }; - UFLAG_OP1(ed); - if(u8) { - SRAIW(ed, ed, u8); - EBBACK(x5, 1); - } - UFLAG_RES(ed); - UFLAG_DF(x3, d_sar8); + u8 = geted_ib(dyn, addr, ninst, nextop) & 0x1f; + if (u8) { + SETFLAGS(X_ALL, SF_SET_PENDING); + GETSEB(x1, 1); + u8 = (F8) & 0x1f; + emit_sar8c(dyn, ninst, ed, u8, x4, x5, x6); + EBBACK(x5, 0); } else { - NOP(); + FAKEED; + F8; } break; default: @@ -506,15 +490,12 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("ROL Eb, CL"); ANDI(x2, xRCX, 7); } - SETFLAGS(X_OF|X_CF, SF_PENDING); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + READFLAGS(X_CF); + SETFLAGS(X_OF|X_CF, SF_SET); GETEB(x1, 0); - UFLAG_OP12(ed, x2); - SLL(x3, ed, x2); - SRLI(x4, x3, 8); - OR(ed, x3, x4); - EBBACK(x5, 1); - UFLAG_RES(ed); - UFLAG_DF(x3, d_rol8); + CALL_(rol8, ed, x3); + EBBACK(x5, 0); break; case 1: if(opcode==0xD0) { @@ -524,16 +505,12 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("ROR Eb, CL"); ANDI(x2, xRCX, 7); } - SETFLAGS(X_OF|X_CF, SF_PENDING); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + READFLAGS(X_CF); + SETFLAGS(X_OF|X_CF, SF_SET); GETEB(x1, 0); - UFLAG_OP12(ed, x2); - SRL(x3, ed, x2); - SLLI(x4, ed, 8); - SRL(x4, x4, x2); - OR(ed, x3, x4); - EBBACK(x5, 1); - UFLAG_RES(ed); - UFLAG_DF(x3, d_ror8); + CALL_(ror8, ed, x3); + EBBACK(x5, 0); break; case 2: if(opcode==0xD0) { @@ -572,47 +549,47 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int MOV32w(x2, 1); } else { INST_NAME("SHL Eb, CL"); - ANDI(x2, xRCX, 7); + ANDI(x2, xRCX, 0x1f); + BEQ_NEXT(x2, xZR); } - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + if(box64_dynarec_safeflags>1) + MAYSETFLAGS(); GETEB(x1, 0); - UFLAG_OP12(ed, x2) - SLL(ed, ed, x2); - EBBACK(x5, 1); - UFLAG_RES(ed); - UFLAG_DF(x3, d_shl8); + emit_shl8(dyn, ninst, x1, x2, x5, x4, x6); + EBBACK(x5, 0); break; case 5: if(opcode==0xD0) { INST_NAME("SHR Eb, 1"); - MOV32w(x4, 1); + MOV32w(x2, 1); } else { INST_NAME("SHR Eb, CL"); - ANDI(x4, xRCX, 0x1F); + ANDI(x2, xRCX, 0x1F); + BEQ_NEXT(x2, xZR); } - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + if(box64_dynarec_safeflags>1) + MAYSETFLAGS(); GETEB(x1, 0); - UFLAG_OP12(ed, x4); - SRLW(ed, ed, x4); - EBBACK(x5, 1); - UFLAG_RES(ed); - UFLAG_DF(x3, d_shr8); + emit_shr8(dyn, ninst, x1, x2, x5, x4, x6); + EBBACK(x5, 0); break; case 7: if(opcode==0xD0) { INST_NAME("SAR Eb, 1"); - MOV32w(x4, 1); + MOV32w(x2, 1); } else { INST_NAME("SAR Eb, CL"); - ANDI(x4, xRCX, 0x1f); + ANDI(x2, xRCX, 0x1f); + BEQ_NEXT(x2, xZR); } - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + if(box64_dynarec_safeflags>1) + MAYSETFLAGS(); GETSEB(x1, 0); - UFLAG_OP12(ed, x4) - SRA(ed, ed, x4); - EBBACK(x3, 1); - UFLAG_RES(ed); - UFLAG_DF(x3, d_sar8); + emit_sar8(dyn, ninst, x1, x2, x5, x4, x6); + EBBACK(x5, 0); break; default: DEFAULT; diff --git a/src/dynarec/rv64/dynarec_rv64_66.c b/src/dynarec/rv64/dynarec_rv64_66.c index a17aae3b..edce9940 100644 --- a/src/dynarec/rv64/dynarec_rv64_66.c +++ b/src/dynarec/rv64/dynarec_rv64_66.c @@ -985,50 +985,42 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 4: case 6: INST_NAME("SHL Ew, Ib"); - UFLAG_IF {MESSAGE(LOG_DUMP, "Need Optimization for flags\n");} - SETFLAGS(X_ALL, SF_PENDING); - GETEW(x1, 1); - u8 = F8; - UFLAG_IF {MOV32w(x2, (u8&15));} - UFLAG_OP12(ed, x2) - if(MODREG) { - SLLI(ed, ed, 48+(u8&15)); - SRLI(ed, ed, 48); + if (geted_ib(dyn, addr, ninst, nextop) & 0x1f) { + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + GETEW(x1, 0); + u8 = (F8)&0x1f; + emit_shl16c(dyn, ninst, x1, u8, x5, x4, x6); + EWBACK; } else { - SLLI(ed, ed, u8&15); + FAKEED; + F8; } - EWBACK; - UFLAG_RES(ed); - UFLAG_DF(x3, d_shl16); break; case 5: INST_NAME("SHR Ew, Ib"); - UFLAG_IF {MESSAGE(LOG_DUMP, "Need Optimization for flags\n");} - SETFLAGS(X_ALL, SF_PENDING); - GETEW(x1, 1); - u8 = F8; - UFLAG_IF {MOV32w(x2, (u8&15));} - UFLAG_OP12(ed, x2) - SRLI(ed, ed, u8&15); - EWBACK; - UFLAG_RES(ed); - UFLAG_DF(x3, d_shr16); + if (geted_ib(dyn, addr, ninst, nextop) & 0x1f) { + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + GETEW(x1, 0); + u8 = (F8)&0x1f; + emit_shr16c(dyn, ninst, x1, u8, x5, x4, x6); + EWBACK; + } else { + FAKEED; + F8; + } break; case 7: INST_NAME("SAR Ew, Ib"); - SETFLAGS(X_ALL, SF_PENDING); - UFLAG_IF {MESSAGE(LOG_DUMP, "Need Optimization for flags\n");} - GETSEW(x1, 1); - u8 = F8; - UFLAG_IF {MOV32w(x2, (u8&15));} - UFLAG_OP12(ed, x2) - SRAI(ed, ed, u8&15); - if(MODREG) { - ZEXTH(ed, ed); + if (geted_ib(dyn, addr, ninst, nextop) & 0x1f) { + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + GETSEW(x1, 0); + u8 = (F8)&0x1f; + emit_sar16c(dyn, ninst, x1, u8, x5, x4, x6); + EWBACK; + } else { + FAKEED; + F8; } - EWBACK; - UFLAG_RES(ed); - UFLAG_DF(x3, d_sar16); break; } break; @@ -1115,56 +1107,51 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 5: if(opcode==0xD1) { INST_NAME("SHR Ew, 1"); - MOV32w(x4, 1); + MOV32w(x2, 1); } else { INST_NAME("SHR Ew, CL"); - ANDI(x4, xRCX, 15); + ANDI(x2, xRCX, 0x1f); + BEQ_NEXT(x2, xZR); } - UFLAG_IF {MESSAGE(LOG_DUMP, "Need Optimization for flags\n");} - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + if(box64_dynarec_safeflags>1) + MAYSETFLAGS(); GETEW(x1, 0); - UFLAG_OP12(ed, x4) - SRL(ed, ed, x4); + emit_shr16(dyn, ninst, x1, x2, x5, x4, x6); EWBACK; - UFLAG_RES(ed); - UFLAG_DF(x3, d_shr16); break; case 4: case 6: if(opcode==0xD1) { INST_NAME("SHL Ew, 1"); - MOV32w(x4, 1); + MOV32w(x2, 1); } else { INST_NAME("SHL Ew, CL"); - ANDI(x4, xRCX, 15); + ANDI(x2, xRCX, 0x1f); + BEQ_NEXT(x2, xZR); } - UFLAG_IF {MESSAGE(LOG_DUMP, "Need Optimization for flags\n");} - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + if(box64_dynarec_safeflags>1) + MAYSETFLAGS(); GETEW(x1, 0); - UFLAG_OP12(ed, x4) - SLL(ed, ed, x4); - ZEXTH(ed, ed); + emit_shl16(dyn, ninst, x1, x2, x5, x4, x6); EWBACK; - UFLAG_RES(ed); - UFLAG_DF(x3, d_shl16); break; case 7: if(opcode==0xD1) { INST_NAME("SAR Ew, 1"); - MOV32w(x4, 1); + MOV32w(x2, 1); } else { INST_NAME("SAR Ew, CL"); - ANDI(x4, xRCX, 15); + ANDI(x2, xRCX, 0x1f); + BEQ_NEXT(x2, xZR); } - UFLAG_IF {MESSAGE(LOG_DUMP, "Need Optimization for flags\n");} - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + if(box64_dynarec_safeflags>1) + MAYSETFLAGS(); GETSEW(x1, 0); - UFLAG_OP12(ed, x4); - SRA(ed, ed, x4); - ZEXTH(ed, ed); + emit_sar16(dyn, ninst, x1, x2, x5, x4, x6); EWBACK; - UFLAG_RES(ed); - UFLAG_DF(x3, d_sar16); break; default: DEFAULT; diff --git a/src/dynarec/rv64/dynarec_rv64_emit_shift.c b/src/dynarec/rv64/dynarec_rv64_emit_shift.c index 69e6a08f..a4bd4e19 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_shift.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_shift.c @@ -21,6 +21,636 @@ #include "dynarec_rv64_functions.h" #include "dynarec_rv64_helper.h" +// emit SHL8 instruction, from s1 , constant c, store result in s1 using s3, s4 and s5 as scratch +void emit_shl8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5) +{ + if (!c) return; + // c != 0 + + CLEAR_FLAGS(); + IFX(X_PEND) { + MOV64x(s3, c); + SB(s3, xEmu, offsetof(x64emu_t, op2)); + SB(s1, xEmu, offsetof(x64emu_t, op1)); + SET_DF(s4, d_shl8); + } else IFX(X_ALL) { + SET_DFNONE(); + } + + if (c < 8) { + IFX(X_CF|X_OF) { + SRLI(s3, s1, 8-c); + ANDI(s5, s3, 1); // LSB == F_CF + IFX(X_CF) { + OR(xFlags, xFlags, s5); + } + } + + SLLI(s1, s1, c+56); + IFX(X_SF) { + BGE(s1, xZR, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + SRLI(s1, s1, 56); + + IFX(X_PEND) { + SB(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_OF) { + // OF flag is affected only on 1-bit shifts + if (c == 1) { + SRLI(s3, s1, 7); + XOR(s3, s3, s5); + SLLI(s3, s3, F_OF2); + OR(xFlags, xFlags, s3); + } + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } + } else { + IFX(X_CF) { + if (c == 8) { + ANDI(s3, s1, 1); + OR(xFlags, xFlags, s3); // F_CF == 0 + } + } + MV(s1, xZR); + // OF nop + // SF nop + // AF nop + IFX(X_PF | X_ZF) { + IFX(X_ZF) { + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_PF) { + ORI(xFlags, xFlags, 1 << F_PF); + } + } + } +} + +// emit SHR8 instruction, from s1 , constant c, store result in s1 using s3, s4 and s5 as scratch +void emit_shr8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5) +{ + if (!c) return; + // c != 0 + CLEAR_FLAGS(); + IFX(X_PEND) { + MOV64x(s3, c); + SB(s3, xEmu, offsetof(x64emu_t, op2)); + SB(s1, xEmu, offsetof(x64emu_t, op1)); + SET_DF(s4, d_shr8); + } else IFX(X_ALL) { + SET_DFNONE(); + } + IFX(X_CF) { + if (c > 1) { + SRAI(s3, s1, c-1); + ANDI(s3, s3, 1); // LSB == F_CF + } else { + // no need to shift + ANDI(s3, s1, 1); // LSB == F_CF + } + OR(xFlags, xFlags, s3); + } + IFX(X_OF) { + // OF flag is affected only on 1-bit shifts + // OF flag is set to the most-significant bit of the original operand + if (c == 1) { + SRLI(s3, s1, 7); + SLLI(s3, s3, F_OF2); + OR(xFlags, xFlags, s3); + } + } + + SRLI(s1, s1, c); + ANDI(s1, s1, 0xff); + + // SF should be unset + IFX(X_PEND) { + SB(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit SAR8 instruction, from s1 , constant c, store result in s1 using s3, s4 and s5 as scratch +void emit_sar8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5) +{ + if (!c) return; + // c != 0 + CLEAR_FLAGS(); + IFX(X_PEND) { + MOV64x(s3, c); + SB(s3, xEmu, offsetof(x64emu_t, op2)); + SB(s1, xEmu, offsetof(x64emu_t, op1)); + SET_DF(s4, d_sar8); + } else IFX(X_ALL) { + SET_DFNONE(); + } + IFX(X_CF) { + if (c > 1) { + SRAI(s3, s1, c-1); + ANDI(s3, s3, 1); // LSB == F_CF + } else { + // no need to shift + ANDI(s3, s1, 1); // LSB == F_CF + } + OR(xFlags, xFlags, s3); + } + // For the SAR instruction, the OF flag is cleared for all 1-bit shifts. + // OF nop + IFX(X_SF) { + // SF is the same as the original operand + BGE(s1, xZR, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + + SRLI(s1, s1, c); + ANDI(s1, s1, 0xff); + + IFX(X_PEND) { + SB(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit SHL8 instruction, from s1 , shift s2, store result in s1 using s3, s4 and s5 as scratch +void emit_shl8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) +{ + // s2 is not 0 here and is 1..1f/3f + CLEAR_FLAGS(); + IFX(X_PEND) { + SB(s1, xEmu, offsetof(x64emu_t, op1)); + SB(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, d_shl8); + } else IFX(X_ALL) { + SET_DFNONE(); + } + + SLL(s1, s1, s2); + + IFX(X_CF | X_OF) { + SRLI(s5, s1, 8); + ANDI(s5, s5, 1); // LSB == F_CF + IFX(X_CF) { + OR(xFlags, xFlags, s5); + } + } + + SLLI(s1, s1, 56); + IFX(X_SF) { + BGE(s1, xZR, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + SRLI(s1, s1, 56); + + IFX(X_PEND) { + SB(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_OF) { + // OF flag is affected only on 1-bit shifts + ADDI(s3, s2, -1); + BNEZ(s3, 4 + 4 * 4); + SRLI(s3, s1, 7); + XOR(s3, s3, s5); + SLLI(s3, s3, F_OF2); + OR(xFlags, xFlags, s3); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit SHR8 instruction, from s1 , shift s2 (!0 and and'd already), store result in s1 using s3 and s4 as scratch +void emit_shr8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) +{ + int64_t j64; + + CLEAR_FLAGS(); + + IFX(X_PEND) { + SB(s2, xEmu, offsetof(x64emu_t, op2)); + SB(s1, xEmu, offsetof(x64emu_t, op1)); + SET_DF(s4, d_shr8); + } else IFX(X_ALL) { + SET_DFNONE(); + } + + IFX(X_CF) { + SUBI(s3, s2, 1); + SRA(s3, s1, s3); + ANDI(s3, s3, 1); // LSB == F_CF + OR(xFlags, xFlags, s3); + } + IFX(X_OF) { + // OF flag is affected only on 1-bit shifts + // OF flag is set to the most-significant bit of the original operand + ADDI(s3, xZR, 1); + BEQ(s2, s3, 4+3*4); + SRLI(s3, s1, 7); + SLLI(s3, s3, F_OF2); + OR(xFlags, xFlags, s3); + } + + SRL(s1, s1, s2); + ANDI(s1, s1, 0xff); + + // SF should be unset + IFX(X_PEND) { + SB(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit SAR8 instruction, from s1 , shift s2 (!0 and and'd already), store result in s1 using s3, s4 and s5 as scratch +void emit_sar8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) +{ + int64_t j64; + + CLEAR_FLAGS(); + + IFX(X_PEND) { + SB(s2, xEmu, offsetof(x64emu_t, op2)); + SB(s1, xEmu, offsetof(x64emu_t, op1)); + SET_DF(s4, d_sar8); + } else IFX(X_ALL) { + SET_DFNONE(); + } + IFX(X_CF) { + SUBI(s3, s2, 1); + SRA(s3, s1, s3); + ANDI(s3, s3, 1); // LSB == F_CF + OR(xFlags, xFlags, s3); + } + // For the SAR instruction, the OF flag is cleared for all 1-bit shifts. + // OF nop + IFX(X_SF) { + // SF is the same as the original operand + BGE(s1, xZR, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + + SRL(s1, s1, s2); + ANDI(s1, s1, 0xff); + + IFX(X_PEND) { + SB(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit SHL16 instruction, from s1 , constant c, store result in s1 using s3, s4 and s5 as scratch +void emit_shl16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5) +{ + if (!c) return; + // c != 0 + + CLEAR_FLAGS(); + IFX(X_PEND) { + MOV64x(s3, c); + SH(s3, xEmu, offsetof(x64emu_t, op2)); + SH(s1, xEmu, offsetof(x64emu_t, op1)); + SET_DF(s4, d_shl16); + } else IFX(X_ALL) { + SET_DFNONE(); + } + + if (c < 16) { + IFX(X_CF|X_OF) { + SRLI(s3, s1, 16-c); + ANDI(s5, s3, 1); // LSB == F_CF + IFX(X_CF) { + OR(xFlags, xFlags, s5); + } + } + + SLLI(s1, s1, c+48); + IFX(X_SF) { + BGE(s1, xZR, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + SRLI(s1, s1, 48); + + IFX(X_PEND) { + SH(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_OF) { + // OF flag is affected only on 1-bit shifts + if (c == 1) { + SRLI(s3, s1, 15); + XOR(s3, s3, s5); + SLLI(s3, s3, F_OF2); + OR(xFlags, xFlags, s3); + } + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } + } else { + IFX(X_CF) { + if (c == 16) { + ANDI(s3, s1, 1); + OR(xFlags, xFlags, s3); // F_CF == 0 + } + } + MV(s1, xZR); + // OF nop + // SF nop + // AF nop + IFX(X_PF | X_ZF) { + IFX(X_ZF) { + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_PF) { + ORI(xFlags, xFlags, 1 << F_PF); + } + } + } +} + +// emit SHR16 instruction, from s1 , constant c, store result in s1 using s3, s4 and s5 as scratch +void emit_shr16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5) +{ + if (!c) return; + // c != 0 + CLEAR_FLAGS(); + IFX(X_PEND) { + MOV64x(s3, c); + SH(s3, xEmu, offsetof(x64emu_t, op2)); + SH(s1, xEmu, offsetof(x64emu_t, op1)); + SET_DF(s4, d_shr16); + } else IFX(X_ALL) { + SET_DFNONE(); + } + IFX(X_CF) { + if (c > 1) { + SRAI(s3, s1, c-1); + ANDI(s3, s3, 1); // LSB == F_CF + } else { + // no need to shift + ANDI(s3, s1, 1); // LSB == F_CF + } + OR(xFlags, xFlags, s3); + } + IFX(X_OF) { + // OF flag is affected only on 1-bit shifts + // OF flag is set to the most-significant bit of the original operand + if (c == 1) { + SRLI(s3, s1, 15); + SLLI(s3, s3, F_OF2); + OR(xFlags, xFlags, s3); + } + } + + SRLI(s1, s1, c); + // SF should be unset + + IFX(X_PEND) { + SH(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit SAR16 instruction, from s1 , constant c, store result in s1 using s3, s4 and s5 as scratch +void emit_sar16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5) +{ + if (!c) return; + // c != 0 + CLEAR_FLAGS(); + IFX(X_PEND) { + MOV64x(s3, c); + SH(s3, xEmu, offsetof(x64emu_t, op2)); + SH(s1, xEmu, offsetof(x64emu_t, op1)); + SET_DF(s4, d_sar16); + } else IFX(X_ALL) { + SET_DFNONE(); + } + IFX(X_CF) { + if (c > 1) { + SRAI(s3, s1, c-1); + ANDI(s3, s3, 1); // LSB == F_CF + } else { + // no need to shift + ANDI(s3, s1, 1); // LSB == F_CF + } + OR(xFlags, xFlags, s3); + } + IFX(X_OF) { + // OF flag is affected only on 1-bit shifts + // OF flag is set to the most-significant bit of the original operand + if (c == 1) { + SRLI(s3, s1, 15); + ANDI(s3, s3, 1); + SLLI(s3, s3, F_OF2); + OR(xFlags, xFlags, s3); + } + } + IFX(X_SF) { + // SF is the same as the original operand + BGE(s1, xZR, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + + SRLI(s1, s1, c); + ZEXTH(s1, s1); + + IFX(X_PEND) { + SH(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + + +// emit SHL16 instruction, from s1 , shift s2, store result in s1 using s3, s4 and s5 as scratch +void emit_shl16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) +{ + // s2 is not 0 here and is 1..1f/3f + CLEAR_FLAGS(); + IFX(X_PEND) { + SH(s1, xEmu, offsetof(x64emu_t, op1)); + SH(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, d_shl16); + } else IFX(X_ALL) { + SET_DFNONE(); + } + + SLL(s1, s1, s2); + + IFX(X_CF | X_OF) { + SRLI(s5, s1, 16); + ANDI(s5, s5, 1); // LSB == F_CF + IFX(X_CF) { + OR(xFlags, xFlags, s5); + } + } + + SLLI(s1, s1, 48); + IFX(X_SF) { + BGE(s1, xZR, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + SRLI(s1, s1, 48); + + IFX(X_PEND) { + SH(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_OF) { + // OF flag is affected only on 1-bit shifts + ADDI(s3, s2, -1); + BNEZ(s3, 4 + 4 * 4); + SRLI(s3, s1, 15); + XOR(s3, s3, s5); + SLLI(s3, s3, F_OF2); + OR(xFlags, xFlags, s3); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit SHR16 instruction, from s1 , shift s2 (!0 and and'd already), store result in s1 using s3 and s4 as scratch +void emit_shr16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) +{ + int64_t j64; + + CLEAR_FLAGS(); + + IFX(X_PEND) { + SH(s2, xEmu, offsetof(x64emu_t, op2)); + SH(s1, xEmu, offsetof(x64emu_t, op1)); + SET_DF(s4, d_shr16); + } else IFX(X_ALL) { + SET_DFNONE(); + } + + IFX(X_CF) { + SUBI(s3, s2, 1); + SRA(s3, s1, s3); + ANDI(s3, s3, 1); // LSB == F_CF + OR(xFlags, xFlags, s3); + } + IFX(X_OF) { + // OF flag is affected only on 1-bit shifts + // OF flag is set to the most-significant bit of the original operand + ADDI(s3, xZR, 1); + BEQ(s2, s3, 4+3*4); + SRLI(s3, s1, 15); + SLLI(s3, s3, F_OF2); + OR(xFlags, xFlags, s3); + } + + SRL(s1, s1, s2); + ZEXTH(s1, s1); + + // SF should be unset + IFX(X_PEND) { + SH(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit SAR16 instruction, from s1 , shift s2 (!0 and and'd already), store result in s1 using s3, s4 and s5 as scratch +void emit_sar16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) +{ + int64_t j64; + + CLEAR_FLAGS(); + + IFX(X_PEND) { + SH(s2, xEmu, offsetof(x64emu_t, op2)); + SH(s1, xEmu, offsetof(x64emu_t, op1)); + SET_DF(s4, d_sar8); + } else IFX(X_ALL) { + SET_DFNONE(); + } + IFX(X_CF) { + SUBI(s3, s2, 1); + SRA(s3, s1, s3); + ANDI(s3, s3, 1); // LSB == F_CF + OR(xFlags, xFlags, s3); + } + // For the SAR instruction, the OF flag is cleared for all 1-bit shifts. + // OF nop + IFX(X_SF) { + // SF is the same as the original operand + BGE(s1, xZR, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + + SRL(s1, s1, s2); + ZEXTH(s1, s1); + + IFX(X_PEND) { + SH(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + // emit SHL32 instruction, from s1 , shift s2, store result in s1 using s3, s4 and s5 as scratch void emit_shl32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5) { @@ -44,7 +674,7 @@ void emit_shl32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s } } - SLL(s1, s1, s2); + SLLxw(s1, s1, s2); IFX(X_SF) { BGE(s1, xZR, 8); @@ -155,7 +785,7 @@ void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s // OF flag is affected only on 1-bit shifts // OF flag is set to the most-significant bit of the original operand ADDI(s3, xZR, 1); - BEQ(s2, s3, 4+4*4); + BEQ(s2, s3, 4+3*4); SRLIxw(s3, s1, rex.w?63:31); SLLI(s3, s3, F_OF2); OR(xFlags, xFlags, s3); diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 1f2f43c5..3292ea2f 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -1158,6 +1158,18 @@ void* rv64_next(x64emu_t* emu, uintptr_t addr); #define emit_neg32 STEPNAME(emit_neg32) #define emit_neg16 STEPNAME(emit_neg16) #define emit_neg8 STEPNAME(emit_neg8) +#define emit_shl8c STEPNAME(emit_shl8c) +#define emit_shr8c STEPNAME(emit_shr8c) +#define emit_sar8c STEPNAME(emit_sar8c) +#define emit_shl8 STEPNAME(emit_shl8) +#define emit_shr8 STEPNAME(emit_shr8) +#define emit_sar8 STEPNAME(emit_sar8) +#define emit_shl16c STEPNAME(emit_shl16c) +#define emit_shr16c STEPNAME(emit_shr16c) +#define emit_sar16c STEPNAME(emit_sar16c) +#define emit_shl16 STEPNAME(emit_shl16) +#define emit_shr16 STEPNAME(emit_shr16) +#define emit_sar16 STEPNAME(emit_sar16) #define emit_shl32 STEPNAME(emit_shl32) #define emit_shl32c STEPNAME(emit_shl32c) #define emit_shr32 STEPNAME(emit_shr32) @@ -1297,6 +1309,18 @@ void emit_sbb16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3); void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4); void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4); +void emit_shl8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5); +void emit_shr8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5); +void emit_sar8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5); +void emit_shl8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); +void emit_shr8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); +void emit_sar8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); +void emit_shl16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5); +void emit_shr16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5); +void emit_sar16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5); +void emit_shl16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); +void emit_shr16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); +void emit_sar16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); void emit_shl32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5); void emit_shl32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4, int s5); void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); |