From 9de694c46bcd665ea6a91cce848d49144e6cec2f Mon Sep 17 00:00:00 2001 From: Yang Liu Date: Wed, 28 Aug 2024 00:55:11 +0800 Subject: [RV64_DYNAREC] Added a fast path for some 8bit opcodes (#1763) * [RV64_DYNAREC] Added a fast path for some 8bit opcodes * fix * more * more --- src/dynarec/rv64/dynarec_rv64_00_0.c | 34 +++++++++++++++++++++++++++++++++- src/dynarec/rv64/dynarec_rv64_67.c | 12 +++++++++++- src/dynarec/rv64/dynarec_rv64_helper.h | 31 +++++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/dynarec/rv64/dynarec_rv64_00_0.c b/src/dynarec/rv64/dynarec_rv64_00_0.c index 24eabbb9..cb50df7f 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_0.c +++ b/src/dynarec/rv64/dynarec_rv64_00_0.c @@ -33,7 +33,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int int32_t i32, tmp; int64_t i64, j64; uint8_t u8; - uint8_t gb1, gb2, eb1, eb2; + uint8_t gb, gb1, gb2, eb1, eb2; uint32_t u32; uint64_t u64; uint8_t wback, wb1, wb2, wb; @@ -54,6 +54,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("ADD Eb, Gb"); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; + FAST_8BIT_OPERATION(wb, gb, x1, ADD(wb, wb, x1)); GETEB(x1, 0); GETGB(x2); emit_add8(dyn, ninst, x1, x2, x4, x5); @@ -72,6 +73,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("ADD Gb, Eb"); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; + FAST_8BIT_OPERATION(gb, wb, x1, ADD(gb, gb, x1)); GETEB(x1, 0); GETGB(x2); emit_add8(dyn, ninst, x2, x1, x4, x5); @@ -123,6 +125,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("OR Eb, Gb"); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; + FAST_8BIT_OPERATION(wb, gb, x1, OR(wb, wb, x1)); GETEB(x1, 0); GETGB(x2); emit_or8(dyn, ninst, x1, x2, x4, x5); @@ -141,6 +144,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("OR Gb, Eb"); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; + FAST_8BIT_OPERATION(gb, wb, x1, OR(gb, gb, x1)); GETEB(x1, 0); GETGB(x2); emit_or8(dyn, ninst, x2, x1, x4, x5); @@ -190,6 +194,12 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int READFLAGS(X_CF); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; + FAST_8BIT_OPERATION(wb, gb, x1, { + ADD(wb, wb, x1); + ANDI(x2, xFlags, 1 << F_CF); + SLLI(x2, x2, 64 - 8); + ADD(wb, wb, x2); + }); GETEB(x1, 0); GETGB(x2); emit_adc8(dyn, ninst, x1, x2, x4, x5, x6); @@ -210,6 +220,12 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int READFLAGS(X_CF); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; + FAST_8BIT_OPERATION(gb, wb, x1, { + ADD(gb, gb, x1); + ANDI(x2, xFlags, 1 << F_CF); + SLLI(x2, x2, 64 - 8); + ADD(gb, gb, x2); + }); GETEB(x2, 0); GETGB(x1); emit_adc8(dyn, ninst, x1, x2, x4, x6, x5); @@ -268,6 +284,12 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int READFLAGS(X_CF); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; + FAST_8BIT_OPERATION(wb, gb, x1, { + SUB(wb, wb, x1); + ANDI(x2, xFlags, 1 << F_CF); + SLLI(x2, x2, 64 - 8); + SUB(wb, wb, x2); + }); GETEB(x1, 0); GETGB(x2); emit_sbb8(dyn, ninst, x1, x2, x4, x5, x6); @@ -288,6 +310,12 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int READFLAGS(X_CF); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; + FAST_8BIT_OPERATION(gb, wb, x1, { + SUB(gb, gb, x1); + ANDI(x2, xFlags, 1 << F_CF); + SLLI(x2, x2, 64 - 8); + SUB(gb, gb, x2); + }); GETEB(x2, 0); GETGB(x1); emit_sbb8(dyn, ninst, x1, x2, x6, x4, x5); @@ -395,6 +423,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("SUB Eb, Gb"); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; + FAST_8BIT_OPERATION(wb, gb, x1, SUB(wb, wb, x1)); GETEB(x1, 0); GETGB(x2); emit_sub8(dyn, ninst, x1, x2, x4, x5, x6); @@ -413,6 +442,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("SUB Gb, Eb"); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; + FAST_8BIT_OPERATION(gb, wb, x1, SUB(gb, gb, x1)); GETEB(x1, 0); GETGB(x2); emit_sub8(dyn, ninst, x2, x1, x4, x5, x6); @@ -445,6 +475,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("XOR Eb, Gb"); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; + FAST_8BIT_OPERATION(wb, gb, x1, XOR(wb, wb, x1)); GETEB(x1, 0); GETGB(x2); emit_xor8(dyn, ninst, x1, x2, x4, x5); @@ -465,6 +496,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("XOR Gb, Eb"); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; + FAST_8BIT_OPERATION(gb, wb, x1, XOR(gb, gb, x1)); GETEB(x1, 0); GETGB(x2); emit_xor8(dyn, ninst, x2, x1, x4, x5); diff --git a/src/dynarec/rv64/dynarec_rv64_67.c b/src/dynarec/rv64/dynarec_rv64_67.c index f1dd0549..59fc507a 100644 --- a/src/dynarec/rv64/dynarec_rv64_67.c +++ b/src/dynarec/rv64/dynarec_rv64_67.c @@ -28,7 +28,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni uint8_t opcode = F8; uint8_t nextop; - uint8_t gd, ed, wback, wb, wb1, wb2, gb1, gb2, eb1, eb2; + uint8_t gd, ed, wback, wb, wb1, wb2, gb, gb1, gb2, eb1, eb2; int64_t fixedaddress; int unscaled; int8_t i8; @@ -75,6 +75,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("ADD Gb, Eb"); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; + FAST_8BIT_OPERATION(gb, wb, x1, ADD(gb, gb, x1)); GETEB32(x2, 0); GETGB(x1); emit_add8(dyn, ninst, x1, x2, x3, x4); @@ -109,6 +110,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("OR Gb, Eb"); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; + FAST_8BIT_OPERATION(gb, wb, x1, OR(gb, gb, x1)); GETEB32(x2, 0); GETGB(x1); emit_or8(dyn, ninst, x1, x2, x3, x4); @@ -268,6 +270,12 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni READFLAGS(X_CF); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; + FAST_8BIT_OPERATION(gb, wb, x1, { + SUB(gb, gb, x1); + ANDI(x2, xFlags, 1 << F_CF); + SLLI(x2, x2, 64 - 8); + SUB(gb, gb, x2); + }); GETEB32(x2, 0); GETGB(x1); emit_sbb8(dyn, ninst, x1, x2, x3, x4, x5); @@ -339,6 +347,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("SUB Gb, Eb"); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; + FAST_8BIT_OPERATION(gb, wb, x1, SUB(gb, gb, x1)); GETEB32(x2, 0); GETGB(x1); emit_sub8(dyn, ninst, x1, x2, x3, x4, x5); @@ -373,6 +382,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("XOR Gb, Eb"); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; + FAST_8BIT_OPERATION(gb, wb, x1, XOR(gb, gb, x1)); GETEB32(x2, 0); GETGB(x1); emit_xor8(dyn, ninst, x1, x2, x3, x4); diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index e1082c1f..200d7f11 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -1743,4 +1743,35 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i BLT(reg, s, 4 + 4); \ ADDIW(reg, s, -1); +#define FAST_8BIT_OPERATION(dst, src, s1, OP) \ + if (MODREG && (rv64_zbb || rv64_xtheadbb) && !dyn->insts[ninst].x64.gen_flags) { \ + if (rex.rex) { \ + wb = xRAX + (nextop & 7) + (rex.b << 3); \ + wb2 = 0; \ + gb = xRAX + ((nextop & 0x38) >> 3) + (rex.r << 3); \ + gb2 = 0; \ + } else { \ + wb = (nextop & 7); \ + wb2 = (wb >> 2) * 8; \ + wb = xRAX + (wb & 3); \ + gd = (nextop & 0x38) >> 3; \ + gb2 = ((gd & 4) >> 2) * 8; \ + gb = xRAX + (gd & 3); \ + } \ + if (src##2) { ANDI(s1, src, 0xf00); } \ + SLLI(s1, (src##2 ? s1 : src), 64 - src##2 - 8); \ + if (rv64_zbb) { \ + RORI(dst, dst, 8 + dst##2); \ + } else { \ + TH_SRRI(dst, dst, 8 + dst##2); \ + } \ + OP; \ + if (rv64_zbb) { \ + RORI(dst, dst, 64 - 8 - dst##2); \ + } else { \ + TH_SRRI(dst, dst, 64 - 8 - dst##2); \ + } \ + break; \ + } + #endif //__DYNAREC_RV64_HELPER_H__ -- cgit 1.4.1