diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2025-05-23 00:57:09 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-05-22 18:57:09 +0200 |
| commit | c9f3b990b41c73cf1f893e52e37db6aa3e7afff0 (patch) | |
| tree | 97f22bdde5dead412e675e46cc0a53bb689d54d1 /src | |
| parent | 6592b63800a97e851e5fd4d4a05b06663f1d286d (diff) | |
| download | box64-c9f3b990b41c73cf1f893e52e37db6aa3e7afff0.tar.gz box64-c9f3b990b41c73cf1f893e52e37db6aa3e7afff0.zip | |
[RV64_DYNAREC] Enable nativeflags optimization for more patterns (#2659)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_00_1.c | 3 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_00_2.c | 6 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_00_3.c | 9 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f.c | 1 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_66.c | 4 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_67.c | 7 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_emit_logic.c | 42 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_emit_math.c | 61 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_emit_shift.c | 89 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_emit_tests.c | 47 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_functions.c | 31 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.c | 26 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 25 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_pass0.h | 23 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_private.h | 2 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_mapping.h | 1 |
16 files changed, 201 insertions, 176 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00_1.c b/src/dynarec/rv64/dynarec_rv64_00_1.c index 03ecffa1..3e4fd754 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_1.c +++ b/src/dynarec/rv64/dynarec_rv64_00_1.c @@ -86,6 +86,7 @@ uintptr_t dynarec64_00_1(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x56: case 0x57: INST_NAME("PUSH reg"); + SCRATCH_USAGE(0); gd = TO_NAT((opcode & 0x07) + (rex.b << 3)); PUSH1z(gd); break; @@ -98,6 +99,7 @@ uintptr_t dynarec64_00_1(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x5E: case 0x5F: INST_NAME("POP reg"); + SCRATCH_USAGE(0); gd = TO_NAT((opcode & 0x07) + (rex.b << 3)); POP1z(gd); break; @@ -151,6 +153,7 @@ uintptr_t dynarec64_00_1(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("MOVSXD Gd, Ed"); nextop = F8; GETGD; + SCRATCH_USAGE(0); if (rex.w) { if (MODREG) { // reg <= reg ADDIW(gd, TO_NAT((nextop & 7) + (rex.b << 3)), 0); diff --git a/src/dynarec/rv64/dynarec_rv64_00_2.c b/src/dynarec/rv64/dynarec_rv64_00_2.c index ffc4d43a..7551cce4 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_2.c +++ b/src/dynarec/rv64/dynarec_rv64_00_2.c @@ -400,6 +400,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGD; if (MODREG) { // reg <= reg + SCRATCH_USAGE(0); MVxw(TO_NAT((nextop & 7) + (rex.b << 3)), gd); } else { // mem <= reg IF_UNALIGNED(ip) { @@ -413,6 +414,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int } } } else { + SCRATCH_USAGE(0); addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, &lock, 1, 0); SDxw(gd, ed, fixedaddress); } @@ -464,6 +466,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("MOV Gd, Ed"); nextop = F8; GETGD; + SCRATCH_USAGE(0); if (MODREG) { MVxw(gd, TO_NAT((nextop & 7) + (rex.b << 3))); } else { @@ -476,6 +479,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("MOV Ed, Seg"); nextop = F8; if (MODREG) { + SCRATCH_USAGE(0); LHU(TO_NAT((nextop & 7) + (rex.b << 3)), xEmu, offsetof(x64emu_t, segs[(nextop & 0x38) >> 3])); } else { addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); @@ -1042,6 +1046,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0xB3: INST_NAME("MOV xL, Ib"); u8 = F8; + SCRATCH_USAGE(0); if (rex.rex) gb1 = TO_NAT((opcode & 7) + (rex.b << 3)); else @@ -1078,6 +1083,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0xBF: INST_NAME("MOV Reg, Id"); gd = TO_NAT((opcode & 7) + (rex.b << 3)); + SCRATCH_USAGE(0); if (rex.w) { u64 = F64; MOV64x(gd, u64); diff --git a/src/dynarec/rv64/dynarec_rv64_00_3.c b/src/dynarec/rv64/dynarec_rv64_00_3.c index c10742db..868e55bb 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_3.c +++ b/src/dynarec/rv64/dynarec_rv64_00_3.c @@ -373,9 +373,11 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ORI(eb1, eb1, u8); } } else { // mem <= u8 + SCRATCH_USAGE(0); addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, &lock, 1, 1); u8 = F8; if (u8) { + SCRATCH_USAGE(1); ADDI(x3, xZR, u8); ed = x3; } else @@ -388,6 +390,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("MOV Ed, Id"); nextop = F8; if (MODREG) { // reg <= i32 + SCRATCH_USAGE(0); i64 = F32S; ed = TO_NAT((nextop & 7) + (rex.b << 3)); MOV64xw(ed, i64); @@ -409,9 +412,11 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int } } } else { + SCRATCH_USAGE(0); addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, &lock, 1, 4); i64 = F32S; if (i64) { + SCRATCH_USAGE(1); MOV64x(x3, i64); ed = x3; } else @@ -802,7 +807,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SLLW(ed, ed, xRCX); if (dyn->insts[ninst].nat_flags_fusion) { if (!rex.w) ZEROUP(ed); - NAT_FLAGS_OPS(ed, xZR); + NAT_FLAGS_OPS(ed, xZR, x5, xZR); } else if (!rex.w && MODREG) { ZEROUP(ed); } @@ -827,7 +832,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SRLW(ed, ed, xRCX); if (dyn->insts[ninst].nat_flags_fusion) { if (!rex.w) ZEROUP(ed); - NAT_FLAGS_OPS(ed, xZR); + NAT_FLAGS_OPS(ed, xZR, x5, xZR); } else if (!rex.w && MODREG) { ZEROUP(ed); } diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index 82f8f632..b2f8c720 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -2077,6 +2077,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("MOVZX Gd, Eb"); nextop = F8; GETGD; + SCRATCH_USAGE(0); if (MODREG) { if (rex.rex) { eb1 = TO_NAT((nextop & 7) + (rex.b << 3)); diff --git a/src/dynarec/rv64/dynarec_rv64_66.c b/src/dynarec/rv64/dynarec_rv64_66.c index f7134d6e..68ed8f84 100644 --- a/src/dynarec/rv64/dynarec_rv64_66.c +++ b/src/dynarec/rv64/dynarec_rv64_66.c @@ -403,6 +403,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x56: case 0x57: INST_NAME("PUSH reg"); + SCRATCH_USAGE(0); gd = TO_NAT((opcode & 0x07) + (rex.b << 3)); PUSH1_16(gd); break; @@ -683,6 +684,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INSHz(ed, gd, x2, x3, 1, 1); } } else { + SCRATCH_USAGE(0); addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, &lock, 1, 0); SH(gd, ed, fixedaddress); SMWRITELOCK(lock); @@ -1176,7 +1178,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } wb1 = 0; EWBACK; - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(ed, xZR); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(ed, xZR, x5, xZR); } else { GETEW(x1, 0); u8 = (F8) & 0x1f; diff --git a/src/dynarec/rv64/dynarec_rv64_67.c b/src/dynarec/rv64/dynarec_rv64_67.c index 8fd60088..aeb6535d 100644 --- a/src/dynarec/rv64/dynarec_rv64_67.c +++ b/src/dynarec/rv64/dynarec_rv64_67.c @@ -206,6 +206,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("MOVXZ Gd, Eb"); nextop = F8; GETGD; + SCRATCH_USAGE(0); if (MODREG) { if (rex.rex) { eb1 = TO_NAT((nextop & 7) + (rex.b << 3)); @@ -231,6 +232,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("MOVZX Gd, Ew"); nextop = F8; GETGD; + SCRATCH_USAGE(0); if (MODREG) { ed = TO_NAT((nextop & 7) + (rex.b << 3)); ZEXTH(gd, ed); @@ -482,6 +484,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("MOVSXD Gd, Ed"); nextop = F8; GETGD; + SCRATCH_USAGE(0); if (rex.w) { if (MODREG) { // reg <= reg ADDIW(gd, TO_NAT((nextop & 7) + (rex.b << 3)), 0); @@ -516,6 +519,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni OR(ed, ed, x2); } } else { + SCRATCH_USAGE(0); addr = geted32(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, &lock, 1, 0); SH(gd, ed, fixedaddress); SMWRITELOCK(lock); @@ -706,6 +710,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("MOV Ed, Gd"); nextop = F8; GETGD; + SCRATCH_USAGE(0); if (MODREG) { // reg <= reg MVxw(TO_NAT((nextop & 7) + (rex.b << 3)), gd); } else { // mem <= reg @@ -718,6 +723,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("MOV Gd, Ed"); nextop = F8; GETGD; + SCRATCH_USAGE(0); if (MODREG) { MVxw(gd, TO_NAT((nextop & 7) + (rex.b << 3))); } else { @@ -770,6 +776,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("MOV Ed, Id"); nextop = F8; if (MODREG) { // reg <= i32 + SCRATCH_USAGE(0); i64 = F32S; ed = TO_NAT((nextop & 7) + (rex.b << 3)); MOV64xw(ed, i64); diff --git a/src/dynarec/rv64/dynarec_rv64_emit_logic.c b/src/dynarec/rv64/dynarec_rv64_emit_logic.c index 32edad65..ba4e991b 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_logic.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_logic.c @@ -31,8 +31,6 @@ void emit_xor8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) XOR(s1, s1, s2); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_SF) { SRLI(s3, s1, 7); SET_FLAGS_NEZ(s3, F_SF, s4); @@ -46,6 +44,7 @@ void emit_xor8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit XOR8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch @@ -59,8 +58,6 @@ void emit_xor8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s } XORI(s1, s1, c & 0xff); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_SF) { SRLI(s3, s1, 7); SET_FLAGS_NEZ(s3, F_SF, s4); @@ -74,6 +71,7 @@ void emit_xor8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit XOR32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch @@ -88,8 +86,6 @@ void emit_xor32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s XOR(s1, s1, s2); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - // test sign bit before zeroup. IFX (X_SF) { if (!rex.w) SEXT_W(s1, s1); @@ -108,6 +104,7 @@ void emit_xor32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit XOR32 instruction, from s1, c, store result in s1 using s3 and s4 as scratch @@ -127,8 +124,6 @@ void emit_xor32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i XOR(s1, s1, s3); } - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - // test sign bit before zeroup. IFX (X_SF) { if (!rex.w) SEXT_W(s1, s1); @@ -146,6 +141,7 @@ void emit_xor32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit XOR16 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch, s4 can be same as s2 (and so s2 destroyed) @@ -160,8 +156,6 @@ void emit_xor16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, XOR(s1, s1, s2); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, res)); } @@ -178,6 +172,7 @@ void emit_xor16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit OR16 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch, s4 can be same as s2 (and so s2 destroyed) @@ -192,8 +187,6 @@ void emit_or16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) OR(s1, s1, s2); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_PEND) { SD(s1, xEmu, offsetof(x64emu_t, res)); } @@ -209,6 +202,7 @@ void emit_or16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit OR32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch @@ -223,8 +217,6 @@ void emit_or32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 OR(s1, s1, s2); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_SF) { if (!rex.w) SEXT_W(s1, s1); SET_FLAGS_LTZ(s1, F_SF, s3, s4); @@ -243,6 +235,7 @@ void emit_or32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit OR32 instruction, from s1, c, store result in s1 using s3 and s4 as scratch @@ -262,8 +255,6 @@ void emit_or32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in OR(s1, s1, s3); } - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_SF) { if (!rex.w) SEXT_W(s1, s1); SET_FLAGS_LTZ(s1, F_SF, s3, s4); @@ -282,6 +273,7 @@ void emit_or32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } @@ -297,8 +289,6 @@ void emit_and8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) AND(s1, s1, s2); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_PEND) { SB(s1, xEmu, offsetof(x64emu_t, res)); } @@ -312,6 +302,7 @@ void emit_and8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } @@ -327,8 +318,6 @@ void emit_and8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s ANDI(s1, s1, c & 0xff); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_PEND) { SD(s1, xEmu, offsetof(x64emu_t, res)); } @@ -342,6 +331,7 @@ void emit_and8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } void emit_and16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) @@ -355,8 +345,6 @@ void emit_and16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) AND(s1, s1, s2); // res = s1 & s2 - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, res)); } @@ -370,6 +358,7 @@ void emit_and16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit AND32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch @@ -385,8 +374,6 @@ void emit_and32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s AND(s1, s1, s2); // res = s1 & s2 if (!rex.w) ZEROUP(s1); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } @@ -400,6 +387,7 @@ void emit_and32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit AND32 instruction, from s1, c, store result in s1 using s3 and s4 as scratch @@ -420,8 +408,6 @@ void emit_and32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i } if (!rex.w && c < 0) ZEROUP(s1); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } @@ -435,6 +421,7 @@ void emit_and32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit OR8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch, s4 can be same as s2 (and so s2 destroyed) @@ -449,8 +436,6 @@ void emit_or8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) OR(s1, s1, s2); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_PEND) { SB(s1, xEmu, offsetof(x64emu_t, res)); } @@ -464,6 +449,7 @@ void emit_or8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit OR8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch diff --git a/src/dynarec/rv64/dynarec_rv64_emit_math.c b/src/dynarec/rv64/dynarec_rv64_emit_math.c index 131564a3..9c2291a9 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_math.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_math.c @@ -60,8 +60,6 @@ void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s ADDxw(s1, s1, s2); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } @@ -97,6 +95,7 @@ void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX (X_ZF) { SET_FLAGS_EQZ(s1, F_ZF, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit ADD32 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch @@ -164,8 +163,6 @@ void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i ADDxw(s1, s1, s2); } - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } @@ -201,6 +198,7 @@ void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i IFX (X_ZF) { SET_FLAGS_EQZ(s1, F_ZF, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit ADD16 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch @@ -220,8 +218,6 @@ void emit_add16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, } ADD(s1, s1, s2); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_PEND) { SW(s1, xEmu, offsetof(x64emu_t, res)); } @@ -263,6 +259,7 @@ void emit_add16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit ADD8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch @@ -282,8 +279,6 @@ void emit_add8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i } ADD(s1, s1, s2); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_AF | X_OF) { if (rv64_zbb) { ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) @@ -322,6 +317,7 @@ void emit_add8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit ADD8 instruction, from s1, const c, store result in s1 using s3 and s4 as scratch @@ -346,8 +342,6 @@ void emit_add8c(dynarec_rv64_t* dyn, int ninst, int s1, int c, int s2, int s3, i } ADDI(s1, s1, c); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_AF | X_OF) { if (rv64_zbb) { ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) @@ -386,6 +380,7 @@ void emit_add8c(dynarec_rv64_t* dyn, int ninst, int s1, int c, int s2, int s3, i IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit SUB8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch @@ -408,8 +403,6 @@ void emit_sub8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i SUB(s1, s1, s2); ANDI(s1, s1, 0xff); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_SF) { SRLI(s3, s1, 7); SET_FLAGS_NEZ(s3, F_SF, s4); @@ -424,6 +417,7 @@ void emit_sub8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit SUB8 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch @@ -451,7 +445,6 @@ void emit_sub16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, } SUBW(s1, s1, s2); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, res)); @@ -469,6 +462,7 @@ void emit_sub16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } @@ -490,7 +484,6 @@ void emit_sub32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s } SUBxw(s1, s1, s2); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); @@ -508,6 +501,7 @@ void emit_sub32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } @@ -551,7 +545,6 @@ void emit_sub32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i } SUBxw(s1, s1, s2); } - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_AF | X_CF | X_OF) { IFX (X_PEND) { @@ -575,6 +568,7 @@ void emit_sub32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit INC8 instruction, from s1, store result in s1 using s2, s3 and s4 as scratch @@ -595,7 +589,6 @@ void emit_inc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) } ADDIW(s1, s1, 1); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { SB(s1, xEmu, offsetof(x64emu_t, res)); @@ -631,6 +624,7 @@ void emit_inc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) IFX (X_ZF) { SET_FLAGS_EQZ(s1, F_ZF, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } @@ -653,7 +647,6 @@ void emit_dec8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) } ADDIW(s1, s1, -1); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { SB(s1, xEmu, offsetof(x64emu_t, res)); @@ -684,6 +677,7 @@ void emit_dec8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) IFX (X_ZF) { SET_FLAGS_EQZ(s1, F_ZF, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit INC32 instruction, from s1, store result in s1 using s3 and s4 as scratch @@ -704,7 +698,6 @@ void emit_inc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s } ADDIxw(s1, s1, 1); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); @@ -741,6 +734,7 @@ void emit_inc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX (X_ZF) { SET_FLAGS_EQZ(s1, F_ZF, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit DEC32 instruction, from s1, store result in s1 using s3 and s4 as scratch @@ -762,7 +756,6 @@ void emit_dec32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s } ADDIxw(s1, s1, -1); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); @@ -794,6 +787,7 @@ void emit_dec32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX (X_ZF) { SET_FLAGS_EQZ(s1, F_ZF, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit INC16 instruction, from s1, store result in s1 using s3 and s4 as scratch @@ -814,7 +808,6 @@ void emit_inc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, } ADDI(s1, s1, 1); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, res)); @@ -852,6 +845,7 @@ void emit_inc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit DEC16 instruction, from s1, store result in s1 using s3 and s4 as scratch @@ -874,8 +868,6 @@ void emit_dec16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, ADDIW(s1, s1, -1); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, res)); } @@ -905,6 +897,7 @@ void emit_dec16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, IFX (X_ZF) { SET_FLAGS_EQZ(s1, F_ZF, s5); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit SBB8 instruction, from s1, s2, store result in s1 using s3, s4 and s5 as scratch @@ -928,8 +921,6 @@ void emit_sbb8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i SUBW(s1, s1, s3); ANDI(s1, s1, 0xff); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - CLEAR_FLAGS(); IFX (X_PEND) { SB(s1, xEmu, offsetof(x64emu_t, res)); @@ -946,6 +937,7 @@ void emit_sbb8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit ADC8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch @@ -967,8 +959,6 @@ void emit_adc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i ANDI(s3, xFlags, 1 << F_CF); ADD(s1, s1, s3); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - CLEAR_FLAGS(); IFX (X_PEND) { SW(s1, xEmu, offsetof(x64emu_t, res)); @@ -1010,6 +1000,7 @@ void emit_adc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit ADC8 instruction, from s1, const c, store result in s1 using s3, s4, s5 and s6 as scratch @@ -1046,8 +1037,6 @@ void emit_sbb16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, ANDI(s3, xFlags, 1 << F_CF); SUBW(s1, s1, s3); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - CLEAR_FLAGS(); SLLIW(s1, s1, 16); IFX (X_SF) { @@ -1066,6 +1055,7 @@ void emit_sbb16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit SBB32 instruction, from s1, s2, store result in s1 using s3, s4 and s5 as scratch @@ -1088,8 +1078,6 @@ void emit_sbb32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s ANDI(s3, xFlags, 1 << F_CF); SUBxw(s1, s1, s3); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - CLEAR_FLAGS(); IFX (X_SF) { SET_FLAGS_LTZ(s1, F_SF, s4, s5); @@ -1109,6 +1097,7 @@ void emit_sbb32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit NEG32 instruction, from s1, store result in s1 using s2 and s3 as scratch @@ -1126,7 +1115,6 @@ void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s } NEGxw(s1, s1); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); @@ -1164,6 +1152,7 @@ void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX (X_ZF) { SET_FLAGS_EQZ(s1, F_ZF, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit NEG16 instruction, from s1, store result in s1 using s2 and s3 as scratch @@ -1182,7 +1171,6 @@ void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) NEG(s1, s1); ZEXTH(s1, s1); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, res)); @@ -1219,6 +1207,7 @@ void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) IFX (X_ZF) { SET_FLAGS_EQZ(s1, F_ZF, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit NEG8 instruction, from s1, store result in s1 using s2 and s3 as scratch @@ -1237,7 +1226,6 @@ void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) NEG(s1, s1); ANDI(s1, s1, 0xff); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { SB(s1, xEmu, offsetof(x64emu_t, res)); @@ -1273,6 +1261,7 @@ void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) IFX (X_ZF) { SET_FLAGS_EQZ(s1, F_ZF, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit ADC16 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch @@ -1294,8 +1283,6 @@ void emit_adc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, ANDI(s3, xFlags, 1 << F_CF); ADD(s1, s1, s3); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - CLEAR_FLAGS(); IFX (X_PEND) { SW(s1, xEmu, offsetof(x64emu_t, res)); @@ -1337,6 +1324,7 @@ void emit_adc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit ADC32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch @@ -1384,8 +1372,6 @@ void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s ANDI(s3, xFlags, 1 << F_CF); ADDxw(s1, s1, s3); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - CLEAR_FLAGS(); IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); @@ -1425,4 +1411,5 @@ void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX (X_ZF) { SET_FLAGS_EQZ(s1, F_ZF, s5); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } diff --git a/src/dynarec/rv64/dynarec_rv64_emit_shift.c b/src/dynarec/rv64/dynarec_rv64_emit_shift.c index ed3e4adf..1bba5289 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_shift.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_shift.c @@ -49,7 +49,6 @@ void emit_shl8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int SET_FLAGS_LTZ(s1, F_SF, s3, s4); } SRLI(s1, s1, 56); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { SB(s1, xEmu, offsetof(x64emu_t, res)); @@ -69,6 +68,7 @@ void emit_shl8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } else { IFX (X_CF) { if (c == 8) { @@ -77,7 +77,6 @@ void emit_shl8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int } } MV(s1, xZR); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(xZR, xZR); IFX (X_PEND) { SB(s1, xEmu, offsetof(x64emu_t, res)); @@ -93,6 +92,7 @@ void emit_shl8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int ORI(xFlags, xFlags, 1 << F_PF); } } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(xZR, xZR, xZR, xZR); } } @@ -133,7 +133,6 @@ void emit_shr8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int SRLI(s1, s1, c); ANDI(s1, s1, 0xff); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); // SF should be unset IFX (X_PEND) { @@ -145,6 +144,7 @@ void emit_shr8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit SAR8 instruction, from s1 , constant c, store result in s1 using s3, s4 and s5 as scratch @@ -181,8 +181,6 @@ void emit_sar8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int SRLI(s1, s1, c); ANDI(s1, s1, 0xff); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_PEND) { SB(s1, xEmu, offsetof(x64emu_t, res)); } @@ -192,6 +190,7 @@ void emit_sar8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit SHL8 instruction, from s1 , shift s2, store result in s1 using s3, s4 and s5 as scratch @@ -223,8 +222,6 @@ void emit_shl8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i } SRLI(s1, s1, 56); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_PEND) { SB(s1, xEmu, offsetof(x64emu_t, res)); } @@ -243,6 +240,7 @@ void emit_shl8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit SHR8 instruction, from s1 , shift s2 (!0 and and'd already), store result in s1 using s3 and s4 as scratch @@ -279,8 +277,6 @@ void emit_shr8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i SRL(s1, s1, s2); ANDI(s1, s1, 0xff); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - // SF should be unset IFX (X_PEND) { SB(s1, xEmu, offsetof(x64emu_t, res)); @@ -291,6 +287,7 @@ void emit_shr8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit SAR8 instruction, from s1 , shift s2 (!0 and and'd already), store result in s1 using s3, s4 and s5 as scratch @@ -323,8 +320,6 @@ void emit_sar8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i SRL(s1, s1, s2); ANDI(s1, s1, 0xff); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_PEND) { SB(s1, xEmu, offsetof(x64emu_t, res)); } @@ -334,6 +329,7 @@ void emit_sar8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit SHL16 instruction, from s1 , constant c, store result in s1 using s3, s4 and s5 as scratch @@ -367,8 +363,6 @@ void emit_shl16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int } SRLI(s1, s1, 48); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, res)); } @@ -387,6 +381,7 @@ void emit_shl16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } else { IFX (X_CF) { if (c == 16) { @@ -395,7 +390,6 @@ void emit_shl16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int } } MV(s1, xZR); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(xZR, xZR); IFX (X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, res)); @@ -411,6 +405,7 @@ void emit_shl16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int ORI(xFlags, xFlags, 1 << F_PF); } } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(xZR, xZR, xZR, xZR); } } @@ -450,8 +445,6 @@ void emit_shr16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int SRLI(s1, s1, c); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - // SF should be unset IFX (X_PEND) { @@ -463,6 +456,7 @@ void emit_shr16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit SAR16 instruction, from s1 , constant c, store result in s1 using s3, s4 and s5 as scratch @@ -499,8 +493,6 @@ void emit_sar16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int SRLI(s1, s1, c); ZEXTH(s1, s1); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, res)); } @@ -510,6 +502,7 @@ void emit_sar16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } @@ -542,8 +535,6 @@ void emit_shl16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, } SRLI(s1, s1, 48); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, res)); } @@ -562,6 +553,7 @@ void emit_shl16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit SHR16 instruction, from s1 , shift s2 (!0 and and'd already), store result in s1 using s3 and s4 as scratch @@ -598,8 +590,6 @@ void emit_shr16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SRL(s1, s1, s2); ZEXTH(s1, s1); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - // SF should be unset IFX (X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, res)); @@ -610,6 +600,7 @@ void emit_shr16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit SAR16 instruction, from s1 , shift s2 (!0 and and'd already), store result in s1 using s3, s4 and s5 as scratch @@ -642,8 +633,6 @@ void emit_sar16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SRL(s1, s1, s2); ZEXTH(s1, s1); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, res)); } @@ -653,6 +642,7 @@ void emit_sar16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit SHL32 instruction, from s1 , shift s2, store result in s1 using s3, s4 and s5 as scratch @@ -684,8 +674,6 @@ void emit_shl32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SLLW(s1, s1, s2); } - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_SF) { SET_FLAGS_LTZ(s1, F_SF, s3, s4); } @@ -710,6 +698,7 @@ void emit_shl32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit SHL32 instruction, from s1 , constant c, store result in s1 using s3, s4 and s5 as scratch void emit_shl32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4, int s5) @@ -743,8 +732,6 @@ void emit_shl32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, SLLIW(s1, s1, c); } - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_SF) { SET_FLAGS_LTZ(s1, F_SF, s3, s4); } @@ -769,6 +756,7 @@ void emit_shl32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit SHR32 instruction, from s1 , shift s2 (!0 and and'd already), store result in s1 using s3 and s4 as scratch @@ -804,8 +792,6 @@ void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SRL(s1, s1, s2); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_SF) { SET_FLAGS_LTZ(s1, F_SF, s3, s4); } @@ -821,6 +807,7 @@ void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit SHR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch @@ -870,8 +857,6 @@ void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, SRLIW(s1, s1, c); } - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_SF) { SET_FLAGS_LTZ(s1, F_SF, s3, s4); } @@ -887,6 +872,7 @@ void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit SAR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch @@ -929,8 +915,6 @@ void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, SRAIW(s1, s1, c); } - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - // SRAIW sign-extends, so test sign bit before clearing upper bits IFX (X_SF) { SET_FLAGS_LTZ(s1, F_SF, s3, s4); @@ -947,6 +931,7 @@ void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit ROL32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch @@ -979,7 +964,6 @@ void emit_rol32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SRLxw(s1, s1, s4); OR(s1, s3, s1); } - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); @@ -1001,6 +985,7 @@ void emit_rol32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s OR(xFlags, xFlags, s3); MARK; } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit ROR32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch @@ -1033,7 +1018,6 @@ void emit_ror32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SLLxw(s1, s1, s4); OR(s1, s3, s1); } - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); @@ -1057,6 +1041,7 @@ void emit_ror32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s OR(xFlags, xFlags, s3); MARK; } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit ROL16 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch @@ -1073,8 +1058,6 @@ void emit_rol16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int ZEXTH(s1, s1); } - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_CF | X_OF) { ANDI(xFlags, xFlags, ~(1UL << F_CF | 1UL << F_OF2)); ANDI(s4, s1, 1 << F_CF); @@ -1088,6 +1071,7 @@ void emit_rol16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int OR(xFlags, xFlags, s3); } } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit ROL32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch @@ -1121,8 +1105,6 @@ void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, if (!rex.w) ZEROUP(s1); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } @@ -1139,6 +1121,7 @@ void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, OR(xFlags, xFlags, s3); } } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit ROR16 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch @@ -1155,8 +1138,6 @@ void emit_ror16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int ZEXTH(s1, s1); } - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_CF | X_OF) { ANDI(xFlags, xFlags, ~(1UL << F_CF | 1UL << F_OF2)); } @@ -1176,6 +1157,7 @@ void emit_ror16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int OR(xFlags, xFlags, s3); } } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit ROR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch @@ -1209,8 +1191,6 @@ void emit_ror32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, if (!rex.w) ZEROUP(s1); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } @@ -1229,6 +1209,7 @@ void emit_ror32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, OR(xFlags, xFlags, s3); } } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit SHRD32 instruction, from s1, fill s2 , constant c, store result in s1 using s3 and s4 as scratch @@ -1273,8 +1254,6 @@ void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin SLLIxw(s1, s2, (rex.w ? 64 : 32) - c); OR(s1, s1, s3); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - if (!rex.w) { ZEROUP(s1); } @@ -1300,6 +1279,7 @@ void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5) @@ -1352,7 +1332,6 @@ void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin OR(s1, s1, s5); } ZEXTH(s1, s1); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, res)); @@ -1377,6 +1356,7 @@ void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5) @@ -1423,7 +1403,6 @@ void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin if (!rex.w) { ZEROUP(s1); } - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); @@ -1447,6 +1426,7 @@ void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } @@ -1477,8 +1457,6 @@ void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int SLLxw(s4, s2, s4); OR(s1, s4, s3); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } @@ -1504,6 +1482,7 @@ void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4, int s6) @@ -1539,7 +1518,6 @@ void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int if (!rex.w) { ZEROUP(s1); } - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_OF) { ADDI(s5, s5, -1); @@ -1560,6 +1538,7 @@ void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } void emit_shld16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5) @@ -1611,7 +1590,6 @@ void emit_shld16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin OR(s1, s1, s3); } ZEXTH(s1, s1); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, res)); @@ -1636,6 +1614,7 @@ void emit_shld16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit RCL16 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch @@ -1657,8 +1636,6 @@ void emit_rcl16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int SRLI(s1, s1, 17 - c); OR(s1, s1, s3); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_CF | X_OF) { ANDI(xFlags, xFlags, ~(1UL << F_CF | 1UL << F_OF2)); SRLI(s4, s4, 63); @@ -1673,6 +1650,7 @@ void emit_rcl16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int OR(xFlags, xFlags, s3); } } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit RCR16 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch @@ -1699,8 +1677,6 @@ void emit_rcr16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int SLLI(s4, s1, 47); ZEXTH(s1, s1); - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_CF | X_OF) { ANDI(xFlags, xFlags, ~(1UL << F_CF | 1UL << F_OF2)); } @@ -1720,4 +1696,5 @@ void emit_rcr16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int OR(xFlags, xFlags, s3); } } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } diff --git a/src/dynarec/rv64/dynarec_rv64_emit_tests.c b/src/dynarec/rv64/dynarec_rv64_emit_tests.c index acdb9275..23404365 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_tests.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_tests.c @@ -65,9 +65,9 @@ void emit_cmp8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i SRAI(s3, s3, 56); SLLI(s4, s2, 56); SRAI(s4, s4, 56); - NAT_FLAGS_OPS(s3, s4); + NAT_FLAGS_OPS(s3, s4, s5, xZR); } else { - NAT_FLAGS_OPS(s1, s2); + NAT_FLAGS_OPS(s1, s2, s3, xZR); } } } @@ -101,9 +101,9 @@ void emit_cmp8_0(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4) if (dyn->insts[ninst].nat_flags_needsign) { SLLI(s3, s1, 56); SRAI(s3, s3, 56); - NAT_FLAGS_OPS(s3, xZR); + NAT_FLAGS_OPS(s3, xZR, s4, xZR); } else { - NAT_FLAGS_OPS(s1, xZR); + NAT_FLAGS_OPS(s1, xZR, s3, xZR); } } } @@ -154,9 +154,9 @@ void emit_cmp16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SRAI(s3, s3, 48); SLLI(s4, s2, 48); SRAI(s4, s4, 48); - NAT_FLAGS_OPS(s3, s4); + NAT_FLAGS_OPS(s3, s4, s5, xZR); } else { - NAT_FLAGS_OPS(s1, s2); + NAT_FLAGS_OPS(s1, s2, s3, xZR); } } } @@ -190,9 +190,9 @@ void emit_cmp16_0(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4) if (dyn->insts[ninst].nat_flags_needsign) { SLLI(s3, s1, 48); SRAI(s3, s3, 48); - NAT_FLAGS_OPS(s3, xZR); + NAT_FLAGS_OPS(s3, xZR, s4, xZR); } else { - NAT_FLAGS_OPS(s1, xZR); + NAT_FLAGS_OPS(s1, xZR, s3, xZR); } } } @@ -237,9 +237,9 @@ void emit_cmp32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s NAT_FLAGS_ENABLE_CARRY(); NAT_FLAGS_ENABLE_SIGN(); if (dyn->insts[ninst].nat_flags_fusion) { - if (rex.w) - NAT_FLAGS_OPS(s1, s2); - else { + if (rex.w) { + NAT_FLAGS_OPS(s1, s2, s3, s4); + } else { if (dyn->insts[ninst].nat_flags_needsign) { SEXT_W(s3, s1); SEXT_W(s4, s2); @@ -247,7 +247,7 @@ void emit_cmp32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s ZEXTW2(s3, s1); ZEXTW2(s4, s2); } - NAT_FLAGS_OPS(s3, s4); + NAT_FLAGS_OPS(s3, s4, s5, xZR); } } } @@ -290,16 +290,16 @@ void emit_cmp32_0(dynarec_rv64_t* dyn, int ninst, rex_t rex, uint8_t nextop, int NAT_FLAGS_ENABLE_SIGN(); if (dyn->insts[ninst].nat_flags_fusion) { if (rex.w) - NAT_FLAGS_OPS(s1, xZR); + NAT_FLAGS_OPS(s1, xZR, s3, xZR); else { if (dyn->insts[ninst].nat_flags_needsign) { SEXT_W(s3, s1); - NAT_FLAGS_OPS(s3, xZR); + NAT_FLAGS_OPS(s3, xZR, s4, xZR); } else if (res == s5) { // zero-up'd case - NAT_FLAGS_OPS(s5, xZR); + NAT_FLAGS_OPS(s5, xZR, s4, xZR); } else { ZEXTW2(s3, s1); - NAT_FLAGS_OPS(s3, xZR); + NAT_FLAGS_OPS(s3, xZR, s4, xZR); } } } @@ -321,8 +321,6 @@ void emit_test8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SD(s1, xEmu, offsetof(x64emu_t, res)); } - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); - IFX (X_SF) { SRLI(s4, s1, 7); SET_FLAGS_NEZ(s4, F_SF, s5); @@ -339,6 +337,7 @@ void emit_test8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SLLI(s1, s1, 56); SRAI(s1, s1, 56); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } // emit TEST8 instruction, from test s1, c, using s3, s4 and s5 as scratch @@ -357,8 +356,6 @@ void emit_test8c(dynarec_rv64_t* dyn, int ninst, int s1, uint8_t c, int s3, int SD(s3, xEmu, offsetof(x64emu_t, res)); } - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s3, xZR); - IFX (X_SF) { SRLI(s4, s3, 7); SET_FLAGS_NEZ(s4, F_SF, s5); @@ -375,6 +372,7 @@ void emit_test8c(dynarec_rv64_t* dyn, int ninst, int s1, uint8_t c, int s3, int SLLI(s3, s3, 56); SRAI(s3, s3, 56); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s3, xZR, xZR, xZR); } // emit TEST16 instruction, from test s1, s2, using s3, s4 and s5 as scratch @@ -393,8 +391,6 @@ void emit_test16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SH(s3, xEmu, offsetof(x64emu_t, res)); } - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s3, xZR); - IFX (X_SF) { SRLI(s4, s3, 15); SET_FLAGS_NEZ(s4, F_SF, s5); @@ -411,6 +407,7 @@ void emit_test16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SLLI(s3, s3, 48); SRAI(s3, s3, 48); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s3, xZR, xZR, xZR); } // emit TEST32 instruction, from test s1, s2, using s3 and s4 as scratch @@ -433,8 +430,6 @@ void emit_test32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int if (!rex.w) ZEROUP(s3); } - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s3, xZR); - IFX (X_SF) { SRLI(s4, s3, rex.w ? 63 : 31); SET_FLAGS_NEZ(s4, F_SF, s5); @@ -456,6 +451,7 @@ void emit_test32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int ZEROUP(s3); } } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s3, xZR, xZR, xZR); } // emit TEST32 instruction, from test s1, s2, using s3 and s4 as scratch @@ -483,8 +479,6 @@ void emit_test32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, SDxw(s3, xEmu, offsetof(x64emu_t, res)); } - if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s3, xZR); - IFX (X_SF) { SRLI(s4, s3, rex.w ? 63 : 31); SET_FLAGS_NEZ(s4, F_SF, s5); @@ -506,4 +500,5 @@ void emit_test32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, ZEROUP(s3); } } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s3, xZR, xZR, xZR); } diff --git a/src/dynarec/rv64/dynarec_rv64_functions.c b/src/dynarec/rv64/dynarec_rv64_functions.c index f6f9a683..b5149486 100644 --- a/src/dynarec/rv64/dynarec_rv64_functions.c +++ b/src/dynarec/rv64/dynarec_rv64_functions.c @@ -705,7 +705,7 @@ void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t r if (!dyn->need_dump && !BOX64ENV(dynarec_gdbjit) && !BOX64ENV(dynarec_perf_map)) return; static char buf[256]; - int length = sprintf(buf, "barrier=%d state=%d/%d(%d), %s=%X/%X, use=%X, need=%X/%X, fuse=%d, sm=%d(%d/%d), sew@entry=%d, sew@exit=%d", + int length = sprintf(buf, "barrier=%d state=%d/%d(%d), %s=%X/%X, use=%X, need=%X/%X, fuse=%d/%d, sm=%d(%d/%d), sew@entry=%d, sew@exit=%d", dyn->insts[ninst].x64.barrier, dyn->insts[ninst].x64.state_flags, dyn->f.pending, @@ -717,6 +717,7 @@ void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t r dyn->insts[ninst].x64.need_before, dyn->insts[ninst].x64.need_after, dyn->insts[ninst].nat_flags_fusion, + dyn->insts[ninst].no_scratch_usage, dyn->smwrite, dyn->insts[ninst].will_write, dyn->insts[ninst].last_write, dyn->insts[ninst].vector_sew_entry, dyn->insts[ninst].vector_sew_exit); if (dyn->insts[ninst].pred_sz) { @@ -858,15 +859,27 @@ void updateNativeFlags(dynarec_rv64_t* dyn) return; for (int i = 1; i < dyn->size; ++i) if (dyn->insts[i].nat_flags_fusion) { - if (dyn->insts[i].pred_sz == 1 && dyn->insts[i].pred[0] == i - 1 - && (dyn->insts[i].x64.use_flags & dyn->insts[i - 1].x64.set_flags) == dyn->insts[i].x64.use_flags) { - dyn->insts[i - 1].nat_flags_fusion = 1; - if (dyn->insts[i].x64.use_flags & X_SF) { - dyn->insts[i - 1].nat_flags_needsign = 1; + int j = i - 1; + int found = 0; + if (dyn->insts[i].pred_sz == 1 && dyn->insts[i].pred[0] == j) { + while (j >= 0) { + if (dyn->insts[j].x64.set_flags && (dyn->insts[i].x64.use_flags & dyn->insts[j].x64.set_flags) == dyn->insts[i].x64.use_flags) { + dyn->insts[j].nat_flags_fusion = 1; + if (dyn->insts[i].x64.use_flags & X_SF) { + dyn->insts[j].nat_flags_needsign = 1; + } + dyn->insts[i].x64.use_flags = 0; + dyn->insts[j].nat_next_inst = i; + found = 1; + break; + } else if (j && dyn->insts[j].pred_sz == 1 && dyn->insts[j].pred[0] == j - 1 + && dyn->insts[j].no_scratch_usage && !dyn->insts[j].x64.set_flags && !dyn->insts[j].x64.use_flags) { + j -= 1; + } else + break; } - dyn->insts[i].x64.use_flags = 0; - } else - dyn->insts[i].nat_flags_fusion = 0; + } + if (!found) dyn->insts[i].nat_flags_fusion = 0; } } diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index 73b0ea73..ce95d9e4 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -62,9 +62,11 @@ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, if (tmp && ((tmp < -2048) || (tmp > maxval) || !i12)) { MOV64x(scratch, tmp); ADDSL(ret, scratch, TO_NAT(sib_reg), sib >> 6, ret); + SCRATCH_USAGE(1); } else { if (sib >> 6) { SLLI(ret, TO_NAT(sib_reg), (sib >> 6)); + SCRATCH_USAGE(1); } else ret = TO_NAT(sib_reg); *fixaddress = tmp; @@ -77,10 +79,12 @@ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, break; } MOV64x(ret, tmp); + SCRATCH_USAGE(1); } } else { if (sib_reg != 4) { ADDSL(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg), sib >> 6, scratch); + SCRATCH_USAGE(1); } else { ret = TO_NAT(sib_reg2); } @@ -95,13 +99,17 @@ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, GETIP(addr + delta, scratch); ret = xRIP; *fixaddress = tmp; + SCRATCH_USAGE(1); } else if (adj && (tmp + adj >= -2048) && (tmp + adj <= maxval)) { ADDI(ret, xRIP, tmp + adj); + SCRATCH_USAGE(1); } else if ((tmp >= -2048) && (tmp <= maxval)) { GETIP(addr + delta, scratch); ADDI(ret, xRIP, tmp); + SCRATCH_USAGE(1); } else if (tmp + addr + delta < 0x100000000LL) { MOV64x(ret, tmp + addr + delta); + SCRATCH_USAGE(1); } else { if (adj) { MOV64x(ret, tmp + adj); @@ -110,6 +118,7 @@ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, GETIP(addr + delta, scratch); } ADD(ret, ret, xRIP); + SCRATCH_USAGE(1); } switch (lock) { case 1: addLockAddress(addr + delta + tmp); break; @@ -138,6 +147,7 @@ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, if ((nextop & 7) == 4) { if (sib_reg != 4) { ADDSL(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg), sib >> 6, scratch); + SCRATCH_USAGE(1); } else { ret = TO_NAT(sib_reg2); } @@ -154,19 +164,23 @@ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, } else scratch = TO_NAT((nextop & 0x07) + (rex.b << 3)); ADDI(ret, scratch, i64); + SCRATCH_USAGE(1); } else { MOV64x(scratch, i64); if ((nextop & 7) == 4) { if (sib_reg != 4) { ADD(scratch, scratch, TO_NAT(sib_reg2)); ADDSL(ret, scratch, TO_NAT(sib_reg), sib >> 6, ret); + SCRATCH_USAGE(1); } else { PASS3(int tmp = TO_NAT(sib_reg2)); ADD(ret, tmp, scratch); + SCRATCH_USAGE(1); } } else { PASS3(int tmp = TO_NAT((nextop & 0x07) + (rex.b << 3))); ADD(ret, tmp, scratch); + SCRATCH_USAGE(1); } } } @@ -208,10 +222,12 @@ static uintptr_t geted_32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_ ADDW(ret, TO_NAT(sib_reg), scratch); } ZEROUP(ret); + SCRATCH_USAGE(1); } else { - if (sib >> 6) + if (sib >> 6) { SLLI(ret, TO_NAT(sib_reg), (sib >> 6)); - else + SCRATCH_USAGE(1); + } else ret = TO_NAT(sib_reg); *fixaddress = tmp; } @@ -223,6 +239,7 @@ static uintptr_t geted_32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_ break; } MOV32w(ret, tmp); + SCRATCH_USAGE(1); } } else { if (sib_reg != 4) { @@ -233,6 +250,7 @@ static uintptr_t geted_32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_ ADDW(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg)); } ZEROUP(ret); + SCRATCH_USAGE(1); } else { ret = TO_NAT(sib_reg2); } @@ -240,6 +258,7 @@ static uintptr_t geted_32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_ } else if ((nextop & 7) == 5) { uint32_t tmp = F32; MOV32w(ret, tmp); + SCRATCH_USAGE(1); switch (lock) { case 1: addLockAddress(tmp); break; case 2: @@ -276,6 +295,7 @@ static uintptr_t geted_32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_ ADDW(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg)); } ZEROUP(ret); + SCRATCH_USAGE(1); } else { ret = TO_NAT(sib_reg2); } @@ -298,6 +318,7 @@ static uintptr_t geted_32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_ scratch = TO_NAT(nextop & 0x07); ADDIW(ret, scratch, i32); ZEROUP(ret); + SCRATCH_USAGE(1); } else { // no need to zero up, as we did it below rv64_move32(dyn, ninst, scratch, i32, 0); @@ -318,6 +339,7 @@ static uintptr_t geted_32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_ ADDW(ret, tmp, scratch); } ZEROUP(ret); + SCRATCH_USAGE(1); } } } diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 5eb8ac69..37e8518c 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -1066,10 +1066,18 @@ READFLAGS(A) #endif -#define NAT_FLAGS_OPS(op1, op2) \ - do { \ - dyn->insts[ninst + 1].nat_flags_op1 = op1; \ - dyn->insts[ninst + 1].nat_flags_op2 = op2; \ +#define NAT_FLAGS_OPS(op1, op2, s1, s2) \ + do { \ + dyn->insts[dyn->insts[ninst].nat_next_inst].nat_flags_op1 = op1; \ + dyn->insts[dyn->insts[ninst].nat_next_inst].nat_flags_op2 = op2; \ + if (dyn->insts[ninst + 1].no_scratch_usage && IS_GPR(op1)) { \ + MV(s1, op1); \ + dyn->insts[dyn->insts[ninst].nat_next_inst].nat_flags_op1 = s1; \ + } \ + if (dyn->insts[ninst + 1].no_scratch_usage && IS_GPR(op2)) { \ + MV(s2, op2); \ + dyn->insts[dyn->insts[ninst].nat_next_inst].nat_flags_op2 = s2; \ + } \ } while (0) #define NAT_FLAGS_ENABLE_CARRY() dyn->insts[ninst].nat_flags_carry = 1 @@ -1951,7 +1959,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, } \ if (dyn->insts[ninst].nat_flags_fusion) { \ ANDI(s1, dst, 0xff); \ - NAT_FLAGS_OPS(s1, xZR); \ + NAT_FLAGS_OPS(s1, xZR, xZR, xZR); \ } \ break; \ } @@ -1974,7 +1982,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, } \ if (dyn->insts[ninst].nat_flags_fusion) { \ ZEXTH(s1, dst); \ - NAT_FLAGS_OPS(s1, xZR); \ + NAT_FLAGS_OPS(s1, xZR, xZR, xZR); \ } \ break; \ } @@ -1994,4 +2002,9 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, #define VECTOR_LOAD_VMASK(mask, s1, multiple) \ vector_loadmask(dyn, ninst, VMASK, mask, s1, multiple) +#define SCRATCH_USAGE(usage) \ + do { \ + dyn->insts[ninst].no_scratch_usage = !usage; \ + } while (0) + #endif //__DYNAREC_RV64_HELPER_H__ diff --git a/src/dynarec/rv64/dynarec_rv64_pass0.h b/src/dynarec/rv64/dynarec_rv64_pass0.h index 416e8bb0..a5cefa8c 100644 --- a/src/dynarec/rv64/dynarec_rv64_pass0.h +++ b/src/dynarec/rv64/dynarec_rv64_pass0.h @@ -14,15 +14,20 @@ dyn->f.dfnone = 1; \ dyn->f.pending = SF_SET -#define READFLAGS_FUSION(A, s1, s2, s3, s4, s5) \ - if (BOX64ENV(dynarec_nativeflags) && ninst > 0 && !dyn->insts[ninst - 1].nat_flags_nofusion) { \ - if ((A) == (X_ZF)) \ - dyn->insts[ninst].nat_flags_fusion = 1; \ - else if (dyn->insts[ninst - 1].nat_flags_carry && ((A) == (X_CF) || (A) == (X_CF | X_ZF))) \ - dyn->insts[ninst].nat_flags_fusion = 1; \ - else if (dyn->insts[ninst - 1].nat_flags_sign && ((A) == (X_SF | X_OF) || (A) == (X_SF | X_OF | X_ZF))) \ - dyn->insts[ninst].nat_flags_fusion = 1; \ - } \ +#define READFLAGS_FUSION(A, s1, s2, s3, s4, s5) \ + if (BOX64ENV(dynarec_nativeflags) && ninst > 0) { \ + int prev = ninst - 1; \ + while (prev && dyn->insts[prev].no_scratch_usage) \ + prev -= 1; \ + if (!dyn->insts[prev].nat_flags_nofusion) { \ + if ((A) == (X_ZF)) \ + dyn->insts[ninst].nat_flags_fusion = 1; \ + else if (dyn->insts[prev].nat_flags_carry && ((A) == (X_CF) || (A) == (X_CF | X_ZF))) \ + dyn->insts[ninst].nat_flags_fusion = 1; \ + else if (dyn->insts[prev].nat_flags_sign && ((A) == (X_SF | X_OF) || (A) == (X_SF | X_OF | X_ZF))) \ + dyn->insts[ninst].nat_flags_fusion = 1; \ + } \ + } \ READFLAGS(A); #define SETFLAGS(A, B, FUSION) \ diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h index 16ea574f..e32aa69e 100644 --- a/src/dynarec/rv64/dynarec_rv64_private.h +++ b/src/dynarec/rv64/dynarec_rv64_private.h @@ -138,8 +138,10 @@ typedef struct instruction_rv64_s { uint8_t nat_flags_needsign:1; uint8_t unaligned:1; // this opcode can be re-generated for unaligned special case uint8_t x87precision:1; // this opcode can handle x87pc + uint8_t no_scratch_usage:1; // this opcode does not use scratch register uint8_t nat_flags_op1; uint8_t nat_flags_op2; + uint16_t nat_next_inst; flagcache_t f_exit; // flags status at end of instruction extcache_t e; // extcache at end of instruction (but before poping) flagcache_t f_entry; // flags status before the instruction begin diff --git a/src/dynarec/rv64/rv64_mapping.h b/src/dynarec/rv64/rv64_mapping.h index 53e71f3c..ac7ec914 100644 --- a/src/dynarec/rv64/rv64_mapping.h +++ b/src/dynarec/rv64/rv64_mapping.h @@ -65,6 +65,7 @@ x31 t6 x6 Temporary Scratch // convert a x86 register to native according to the register mapping #define TO_NAT(A) (((uint8_t[]) { 16, 13, 12, 24, 9, 8, 11, 10, 14, 15, 26, 27, 18, 19, 20, 21 })[(A)]) +#define IS_GPR(A) (((uint8_t[]) { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0 })[(A)]) #define x1 6 #define x2 7 |