diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-08-28 02:23:42 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-08-27 20:23:42 +0200 |
| commit | 875a2ef3a9c943edbf497088df72e5fbcacd8728 (patch) | |
| tree | 46622576593264affa330da7eb72566ba51b6fbf | |
| parent | bfbf18688beec811b9e936908db12e0ca1671430 (diff) | |
| download | box64-875a2ef3a9c943edbf497088df72e5fbcacd8728.tar.gz box64-875a2ef3a9c943edbf497088df72e5fbcacd8728.zip | |
[RV64_DYNAREC] Added a fast path for some 16bit opcodes (#1765)
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_66.c | 112 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 19 |
2 files changed, 73 insertions, 58 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_66.c b/src/dynarec/rv64/dynarec_rv64_66.c index a90bdb77..5ad815af 100644 --- a/src/dynarec/rv64/dynarec_rv64_66.c +++ b/src/dynarec/rv64/dynarec_rv64_66.c @@ -63,6 +63,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("ADD Ew, Gw"); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; + FAST_16BIT_OPERATION(ed, gd, x1, ADD(ed, ed, x1)); GETGW(x2); GETEW(x1, 0); emit_add16(dyn, ninst, x1, x2, x4, x5, x6); @@ -72,6 +73,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("ADD Gw, Ew"); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; + FAST_16BIT_OPERATION(gd, ed, x1, ADD(gd, gd, x1)); GETGW(x1); GETEW(x2, 0); emit_add16(dyn, ninst, x1, x2, x5, x4, x6); @@ -101,6 +103,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("OR Ew, Gw"); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; + FAST_16BIT_OPERATION(ed, gd, x1, OR(ed, ed, x1)); GETGW(x2); GETEW(x1, 0); emit_or16(dyn, ninst, x1, x2, x4, x5); @@ -110,6 +113,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("OR Gw, Ew"); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; + FAST_16BIT_OPERATION(gd, ed, x1, OR(gd, gd, x1)); GETGW(x1); GETEW(x2, 0); emit_or16(dyn, ninst, x1, x2, x4, x5); @@ -143,6 +147,12 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni READFLAGS(X_CF); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; + FAST_16BIT_OPERATION(ed, gd, x1, { + ADD(ed, ed, x1); + ANDI(x2, xFlags, 1 << F_CF); + SLLI(x2, x2, 64 - 16); + ADD(ed, ed, x2); + }); GETGW(x2); GETEW(x1, 0); emit_adc16(dyn, ninst, x1, x2, x4, x6, x5); @@ -153,6 +163,12 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni READFLAGS(X_CF); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; + FAST_16BIT_OPERATION(gd, ed, x1, { + ADD(gd, gd, x1); + ANDI(x2, xFlags, 1 << F_CF); + SLLI(x2, x2, 64 - 16); + ADD(gd, gd, x2); + }); GETGW(x1); GETEW(x2, 0); emit_adc16(dyn, ninst, x1, x2, x4, x6, x5); @@ -173,6 +189,12 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni READFLAGS(X_CF); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; + FAST_16BIT_OPERATION(ed, gd, x1, { + SUB(ed, ed, x1); + ANDI(x2, xFlags, 1 << F_CF); + SLLI(x2, x2, 64 - 16); + SUB(ed, ed, x2); + }); GETGW(x2); GETEW(x1, 0); emit_sbb16(dyn, ninst, x1, x2, x4, x5, x6); @@ -183,6 +205,12 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni READFLAGS(X_CF); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; + FAST_16BIT_OPERATION(gd, ed, x1, { + SUB(gd, gd, x1); + ANDI(x2, xFlags, 1 << F_CF); + SLLI(x2, x2, 64 - 16); + SUB(gd, gd, x2); + }); GETGW(x1); GETEW(x2, 0); emit_sbb16(dyn, ninst, x1, x2, x6, x4, x5); @@ -240,6 +268,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("SUB Ew, Gw"); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; + FAST_16BIT_OPERATION(ed, gd, x1, SUB(ed, ed, x1)); GETGW(x1); GETEW(x2, 0); emit_sub16(dyn, ninst, x2, x1, x4, x5, x6); @@ -249,6 +278,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("SUB Gw, Ew"); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; + FAST_16BIT_OPERATION(gd, ed, x1, SUB(gd, gd, x1)); GETGW(x1); GETEW(x2, 0); emit_sub16(dyn, ninst, x1, x2, x6, x4, x5); @@ -267,73 +297,39 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("XOR Ew, Gw"); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; - // try to determine ed and gd - ed = 0; - GETGD; - if (MODREG) { - GETED(0); - } - if (ed == gd) { - // optimize XOR Gw, Gw - CLEAR_FLAGS(); - IFX(X_PEND) { - SET_DF(x6, d_xor16); - } else IFX(X_ALL) { - SET_DFNONE(); - } - SRLI(ed, ed, 16); - SLLI(ed, ed, 16); - IFX(X_PEND) { - SH(ed, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_ZF) { - ORI(xFlags, xFlags, 1 << F_ZF); - } - IFX(X_PF) { - ORI(xFlags, xFlags, 1 << F_PF); + if (MODREG && !dyn->insts[ninst].x64.gen_flags) { + gd = xRAX + ((nextop & 0x38) >> 3) + (rex.r << 3); + ed = xRAX + (nextop & 7) + (rex.b << 3); + if (ed == gd) { + SRLI(ed, ed, 16); + SLLI(ed, ed, 16); + break; } - } else { - GETGW(x2); - GETEW(x1, 0); - emit_xor16(dyn, ninst, x1, x2, x4, x5, x6); - EWBACK; } + FAST_16BIT_OPERATION(ed, gd, x1, XOR(ed, ed, x1)); + GETGW(x2); + GETEW(x1, 0); + emit_xor16(dyn, ninst, x1, x2, x4, x5, x6); + EWBACK; break; case 0x33: INST_NAME("XOR Gw, Ew"); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; - // try to determine ed and gd - ed = 0; - GETGD; - if (MODREG) { - GETED(0); - } - if (ed == gd) { - // optimize XOR Gw, Gw - CLEAR_FLAGS(); - IFX(X_PEND) { - SET_DF(x6, d_xor16); - } else IFX(X_ALL) { - SET_DFNONE(); - } - SRLI(ed, ed, 16); - SLLI(ed, ed, 16); - IFX(X_PEND) { - SH(ed, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_ZF) { - ORI(xFlags, xFlags, 1 << F_ZF); - } - IFX(X_PF) { - ORI(xFlags, xFlags, 1 << F_PF); + if (MODREG && !dyn->insts[ninst].x64.gen_flags) { + gd = xRAX + ((nextop & 0x38) >> 3) + (rex.r << 3); + ed = xRAX + (nextop & 7) + (rex.b << 3); + if (ed == gd) { + SRLI(gd, gd, 16); + SLLI(gd, gd, 16); + break; } - } else { - GETGW(x1); - GETEW(x2, 0); - emit_xor16(dyn, ninst, x1, x2, x4, x5, x6); - GWBACK; } + FAST_16BIT_OPERATION(gd, ed, x1, XOR(gd, gd, x1)); + GETGW(x1); + GETEW(x2, 0); + emit_xor16(dyn, ninst, x1, x2, x4, x5, x6); + GWBACK; break; case 0x35: INST_NAME("XOR AX, Iw"); diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 200d7f11..b2579454 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -1774,4 +1774,23 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i break; \ } +#define FAST_16BIT_OPERATION(dst, src, s1, OP) \ + if (MODREG && (rv64_zbb || rv64_xtheadbb) && !dyn->insts[ninst].x64.gen_flags) { \ + gd = xRAX + ((nextop & 0x38) >> 3) + (rex.r << 3); \ + ed = xRAX + (nextop & 7) + (rex.b << 3); \ + SLLI(s1, src, 64 - 16); \ + if (rv64_zbb) { \ + RORI(dst, dst, 16); \ + } else { \ + TH_SRRI(dst, dst, 16); \ + } \ + OP; \ + if (rv64_zbb) { \ + RORI(dst, dst, 64 - 16); \ + } else { \ + TH_SRRI(dst, dst, 64 - 16); \ + } \ + break; \ + } + #endif //__DYNAREC_RV64_HELPER_H__ |