diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-12-23 17:05:05 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-12-23 10:05:05 +0100 |
| commit | cad9450e16a0c145d49a66b275b091db4e8d1308 (patch) | |
| tree | e724ff681abcaf7e81b043a5e27de60d91719494 /src | |
| parent | 0c7cc657cc3ef0b11330f51e96ca851393acbcec (diff) | |
| download | box64-cad9450e16a0c145d49a66b275b091db4e8d1308.tar.gz box64-cad9450e16a0c145d49a66b275b091db4e8d1308.zip | |
[LA64_DYNAREC] Optimized some 16bit shift opcodes (#2192)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_66.c | 61 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_emit_shift.c | 227 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_helper.h | 6 |
3 files changed, 261 insertions, 33 deletions
diff --git a/src/dynarec/la64/dynarec_la64_66.c b/src/dynarec/la64/dynarec_la64_66.c index d5482728..ef8f5c8d 100644 --- a/src/dynarec/la64/dynarec_la64_66.c +++ b/src/dynarec/la64/dynarec_la64_66.c @@ -650,48 +650,43 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 4: case 6: INST_NAME("SHL Ew, Ib"); - UFLAG_IF { MESSAGE(LOG_DUMP, "Need Optimization for flags\n"); } - SETFLAGS(X_ALL, SF_PENDING); - GETEW(x1, 1); - u8 = F8; - UFLAG_IF { MOV32w(x2, (u8 & 15)); } - UFLAG_OP12(ed, x2) - if (MODREG) { - SLLI_D(ed, ed, 48 + (u8 & 15)); - SRLI_D(ed, ed, 48); + if (geted_ib(dyn, addr, ninst, nextop) & 0x1f) { + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + GETEW(x1, 0); + u8 = (F8) & 0x1f; + emit_shl16c(dyn, ninst, x1, u8, x5, x4, x6); + EWBACK; } else { - SLLI_D(ed, ed, u8 & 15); + FAKEED; + F8; } - EWBACK; - UFLAG_RES(ed); - UFLAG_DF(x3, d_shl16); break; case 5: INST_NAME("SHR Ew, Ib"); - UFLAG_IF { MESSAGE(LOG_DUMP, "Need Optimization for flags\n"); } - SETFLAGS(X_ALL, SF_PENDING); - GETEW(x1, 1); - u8 = F8; - UFLAG_IF { MOV32w(x2, (u8 & 15)); } - UFLAG_OP12(ed, x2) - SRLI_D(ed, ed, u8 & 15); - EWBACK; - UFLAG_RES(ed); - UFLAG_DF(x3, d_shr16); + if (geted_ib(dyn, addr, ninst, nextop) & 0x1f) { + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + GETEW(x1, 0); + u8 = (F8) & 0x1f; + emit_shr16c(dyn, ninst, x1, u8, x5, x4, x6); + EWBACK; + } else { + FAKEED; + F8; + } break; case 7: INST_NAME("SAR Ew, Ib"); SETFLAGS(X_ALL, SF_PENDING); - UFLAG_IF { MESSAGE(LOG_DUMP, "Need Optimization for flags\n"); } - GETSEW(x1, 1); - u8 = F8; - UFLAG_IF { MOV32w(x2, (u8 & 15)); } - UFLAG_OP12(ed, x2) - SRAI_D(ed, ed, u8 & 15); - if (MODREG) BSTRPICK_D(ed, ed, 15, 0); - EWBACK; - UFLAG_RES(ed); - UFLAG_DF(x3, d_sar16); + if (geted_ib(dyn, addr, ninst, nextop) & 0x1f) { + SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + GETSEW(x1, 0); + u8 = (F8) & 0x1f; + emit_sar16c(dyn, ninst, x1, u8, x5, x4, x6); + EWBACK; + } else { + FAKEED; + F8; + } break; default: DEFAULT; diff --git a/src/dynarec/la64/dynarec_la64_emit_shift.c b/src/dynarec/la64/dynarec_la64_emit_shift.c index 9d891bdd..a032be16 100644 --- a/src/dynarec/la64/dynarec_la64_emit_shift.c +++ b/src/dynarec/la64/dynarec_la64_emit_shift.c @@ -86,6 +86,101 @@ void emit_shl16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, } } +// emit SHL16 instruction, from s1 , constant c, store result in s1 using s3, s4 and s5 as scratch +void emit_shl16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5) +{ + if (!c) return; + // c != 0 + + IFX (X_PEND) { + MOV64x(s3, c); + ST_H(s3, xEmu, offsetof(x64emu_t, op2)); + ST_H(s1, xEmu, offsetof(x64emu_t, op1)); + SET_DF(s4, d_shl16); + } else IFX (X_ALL) { + SET_DFNONE(); + } + + if (la64_lbt) { + IFX (X_PEND) { + } else { + MOV64x(s3, c); + } + IFX (X_ALL) { + X64_SLL_H(s1, s3); + } + + SLLI_D(s1, s1, c); + BSTRPICK_D(s1, s1, 15, 0); + + IFX (X_PEND) { + ST_H(s1, xEmu, offsetof(x64emu_t, res)); + } + return; + } + + CLEAR_FLAGS(s3); + if (c < 16) { + IFX (X_CF | X_OF) { + SRLI_D(s3, s1, 16 - c); + ANDI(s5, s3, 1); // LSB == F_CF + IFX (X_CF) { + OR(xFlags, xFlags, s5); + } + } + + SLLI_D(s1, s1, c + 48); + IFX (X_SF) { + BGE(s1, xZR, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + SRLI_D(s1, s1, 48); + + IFX (X_PEND) { + ST_H(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX (X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX (X_OF) { + // OF flag is affected only on 1-bit shifts + if (c == 1) { + SRLI_D(s3, s1, 15); + XOR(s3, s3, s5); + SLLI_D(s3, s3, F_OF); + OR(xFlags, xFlags, s3); + } + } + IFX (X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } + } else { + IFX (X_CF) { + if (c == 16) { + ANDI(s3, s1, 1); + OR(xFlags, xFlags, s3); // F_CF == 0 + } + } + MV(s1, xZR); + + IFX (X_PEND) { + ST_H(s1, xEmu, offsetof(x64emu_t, res)); + } + // OF nop + // SF nop + // AF nop + IFX (X_PF | X_ZF) { + IFX (X_ZF) { + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX (X_PF) { + ORI(xFlags, xFlags, 1 << F_PF); + } + } + } +} + // emit SHL32 instruction, from s1 , shift s2, store result in s1 using s3, s4 and s5 as scratch void emit_shl32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5) { @@ -354,6 +449,72 @@ void emit_shr16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, } } +// emit SHR16 instruction, from s1 , constant c, store result in s1 using s3, s4 and s5 as scratch +void emit_shr16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5) +{ + if (!c) return; + // c != 0 + + IFX (X_PEND) { + MOV64x(s3, c); + ST_H(s3, xEmu, offsetof(x64emu_t, op2)); + ST_H(s1, xEmu, offsetof(x64emu_t, op1)); + SET_DF(s4, d_shr16); + } else IFX (X_ALL) { + SET_DFNONE(); + } + + if (la64_lbt) { + IFX (X_PEND) { + } else { + MOV64x(s3, c); + } + IFX (X_ALL) { + X64_SRL_H(s1, s3); + } + SRLI_D(s1, s1, c); + IFX (X_PEND) { + ST_H(s1, xEmu, offsetof(x64emu_t, res)); + } + return; + } + + CLEAR_FLAGS(s3); + IFX (X_CF) { + if (c > 1) { + SRAI_D(s3, s1, c - 1); + ANDI(s3, s3, 1); // LSB == F_CF + } else { + // no need to shift + ANDI(s3, s1, 1); // LSB == F_CF + } + OR(xFlags, xFlags, s3); + } + IFX (X_OF) { + // OF flag is affected only on 1-bit shifts + // OF flag is set to the most-significant bit of the original operand + if (c == 1) { + SRLI_D(s3, s1, 15); + SLLI_D(s3, s3, F_OF); + OR(xFlags, xFlags, s3); + } + } + + SRLI_D(s1, s1, c); + + // SF should be unset + IFX (X_PEND) { + ST_H(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX (X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX (X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + // emit SHR32 instruction, from s1 , shift s2 (!0 and and'd already), store result in s1 using s3 and s4 as scratch void emit_shr32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4) { @@ -563,6 +724,72 @@ void emit_sar16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, } } + +// emit SAR16 instruction, from s1 , constant c, store result in s1 using s3, s4 and s5 as scratch +void emit_sar16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5) +{ + if (!c) return; + // c != 0 + + IFX (X_PEND) { + MOV64x(s3, c); + ST_H(s3, xEmu, offsetof(x64emu_t, op2)); + ST_H(s1, xEmu, offsetof(x64emu_t, op1)); + SET_DF(s4, d_sar16); + } else IFX (X_ALL) { + SET_DFNONE(); + } + + if (la64_lbt) { + IFX (X_PEND) { + } else { + MOV64x(s3, c); + } + IFX (X_ALL) { + X64_SRA_H(s1, s3); + } + SRLI_D(s1, s1, c); + BSTRPICK_D(s1, s1, 15, 0); + IFX (X_PEND) { + ST_H(s1, xEmu, offsetof(x64emu_t, res)); + } + return; + } + + CLEAR_FLAGS(s3); + IFX (X_CF) { + if (c > 1) { + SRAI_D(s3, s1, c - 1); + ANDI(s3, s3, 1); // LSB == F_CF + } else { + // no need to shift + ANDI(s3, s1, 1); // LSB == F_CF + } + OR(xFlags, xFlags, s3); + } + // For the SAR instruction, the OF flag is cleared for all 1-bit shifts. + // OF nop + IFX (X_SF) { + // SF is the same as the original operand + BGE(s1, xZR, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + + SRLI_D(s1, s1, c); + BSTRPICK_D(s1, s1, 15, 0); + + IFX (X_PEND) { + ST_H(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX (X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX (X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + // emit SAR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch void emit_sar32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4) { diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h index b4b33443..630f3873 100644 --- a/src/dynarec/la64/dynarec_la64_helper.h +++ b/src/dynarec/la64/dynarec_la64_helper.h @@ -862,13 +862,16 @@ void* la64_next(x64emu_t* emu, uintptr_t addr); #define emit_and32 STEPNAME(emit_and32) #define emit_and32c STEPNAME(emit_and32c) #define emit_shl16 STEPNAME(emit_shl16) +#define emit_shl16c STEPNAME(emit_shl16c) #define emit_shl32 STEPNAME(emit_shl32) #define emit_shl32c STEPNAME(emit_shl32c) #define emit_shr8 STEPNAME(emit_shr8) #define emit_shr16 STEPNAME(emit_shr16) +#define emit_shr16c STEPNAME(emit_shr16c) #define emit_shr32 STEPNAME(emit_shr32) #define emit_shr32c STEPNAME(emit_shr32c) #define emit_sar16 STEPNAME(emit_sar16) +#define emit_sar16c STEPNAME(emit_sar16c) #define emit_sar32c STEPNAME(emit_sar32c) #define emit_shld32c STEPNAME(emit_shld32c) #define emit_shrd32c STEPNAME(emit_shrd32c) @@ -967,13 +970,16 @@ void emit_and16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4); void emit_and32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); void emit_and32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4); void emit_shl16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); +void emit_shl16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5); void emit_shl32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5); void emit_shl32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4, int s5); void emit_shr8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); void emit_shr16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); +void emit_shr16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5); void emit_shr32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); void emit_shr32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); void emit_sar16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); +void emit_sar16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5); void emit_sar32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); void emit_shld32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4); void emit_shrd32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4); |