diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-12-02 23:58:20 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-12-02 16:58:20 +0100 |
| commit | 14bd1368cc34dc2b6d2c722fe6fa3e66c2566f67 (patch) | |
| tree | 0b6ed90857a84b01fad76eace673cbeed57b8266 /src | |
| parent | 415fc458be52095395236d80a7c402cb77b0b718 (diff) | |
| download | box64-14bd1368cc34dc2b6d2c722fe6fa3e66c2566f67.tar.gz box64-14bd1368cc34dc2b6d2c722fe6fa3e66c2566f67.zip | |
[RV64_DYNAREC] Added simple opcodes fusion as "native flags" (#2102)
* [RV64_DYNAREC] Added simple opcodes fusion as "native flags" * Disable 16bit CMOV * review
Diffstat (limited to 'src')
34 files changed, 1172 insertions, 798 deletions
diff --git a/src/core.c b/src/core.c index ebf88caf..007f578a 100644 --- a/src/core.c +++ b/src/core.c @@ -962,6 +962,9 @@ void LoadLogEnv() box64_dynarec_x87double = 1; box64_dynarec_div0 = 1; box64_dynarec_callret = 0; + #ifdef RV64 + box64_dynarec_nativeflags = 0; + #endif printf_log(LOG_INFO, "Dynarec will compare it's execution with the interpreter (super slow, only for testing)\n"); } } diff --git a/src/dynarec/arm64/dynarec_arm64_functions.c b/src/dynarec/arm64/dynarec_arm64_functions.c index 8e0827f0..cbbeb93a 100644 --- a/src/dynarec/arm64/dynarec_arm64_functions.c +++ b/src/dynarec/arm64/dynarec_arm64_functions.c @@ -976,7 +976,7 @@ static void propagateNativeFlags(dynarec_arm_t* dyn, int start) } } -void updateNatveFlags(dynarec_native_t* dyn) +void updateNativeFlags(dynarec_native_t* dyn) { if(!box64_dynarec_nativeflags) return; @@ -1019,4 +1019,4 @@ int nativeFlagsNeedsTransform(dynarec_arm_t* dyn, int ninst) if(((flags_before&flags_after)!=flags_after) || (flags_before&flags_x86)) return 1; return 0; -} \ No newline at end of file +} diff --git a/src/dynarec/arm64/dynarec_arm64_functions.h b/src/dynarec/arm64/dynarec_arm64_functions.h index b17a5bf1..c0f7dcd0 100644 --- a/src/dynarec/arm64/dynarec_arm64_functions.h +++ b/src/dynarec/arm64/dynarec_arm64_functions.h @@ -56,7 +56,7 @@ uint8_t flag2native(uint8_t flags); // mark a instruction as using/generating flags. return flag uint8_t mark_natflag(dynarec_arm_t* dyn, int ninst, uint8_t flag, int before); // propage the use of nativeflags or not (done between step 0 and step 1) -void updateNatveFlags(dynarec_arm_t* dyn); +void updateNativeFlags(dynarec_arm_t* dyn); // raz arm speicifc state when an opcode is unused void rasNativeState(dynarec_arm_t* dyn, int ninst); // check if natives flags needs some tranform to/from x86 flags diff --git a/src/dynarec/dynarec_arch.h b/src/dynarec/dynarec_arch.h index 3790a5db..a617009c 100644 --- a/src/dynarec/dynarec_arch.h +++ b/src/dynarec/dynarec_arch.h @@ -20,7 +20,8 @@ #define MAXBLOCK_SIZE ((1<<19)-200) #define RAZ_SPECIFIC(A, N) rasNativeState(A, N) -#define UPDATE_SPECIFICS(A) updateNatveFlags(A) +#define UPDATE_SPECIFICS(A) updateNativeFlags(A) +#define PREUPDATE_SPECIFICS(A) #elif defined(LA64) @@ -41,6 +42,7 @@ #define RAZ_SPECIFIC(A, N) #define UPDATE_SPECIFICS(A) +#define PREUPDATE_SPECIFICS(A) #elif defined(RV64) #define instruction_native_t instruction_rv64_t @@ -62,6 +64,7 @@ #define RAZ_SPECIFIC(A, N) #define UPDATE_SPECIFICS(A) +#define PREUPDATE_SPECIFICS(A) updateNativeFlags(A) #else #error Unsupported platform #endif diff --git a/src/dynarec/dynarec_native.c b/src/dynarec/dynarec_native.c index 20ef8795..4f37d9b3 100644 --- a/src/dynarec/dynarec_native.c +++ b/src/dynarec/dynarec_native.c @@ -669,6 +669,8 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit helper.predecessor = (int*)alloca(alloc_size*sizeof(int)); fillPredecessors(&helper); + PREUPDATE_SPECIFICS(&helper); + int pos = helper.size; while (pos>=0) pos = updateNeed(&helper, pos, 0); diff --git a/src/dynarec/dynarec_private.h b/src/dynarec/dynarec_private.h index 6133c0f1..20a22f7c 100644 --- a/src/dynarec/dynarec_private.h +++ b/src/dynarec/dynarec_private.h @@ -29,6 +29,9 @@ #define SF_NODF 16 #define SF_SET_NODF (SF_SET|SF_NODF) +#define NAT_FLAGS_FUSION 0 +#define NAT_FLAGS_NOFUSION 1 + typedef struct instruction_x64_s { uintptr_t addr; //address of the instruction int32_t size; // size of the instruction diff --git a/src/dynarec/rv64/dynarec_rv64_00_0.c b/src/dynarec/rv64/dynarec_rv64_00_0.c index a343d892..eec5d003 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_0.c +++ b/src/dynarec/rv64/dynarec_rv64_00_0.c @@ -53,7 +53,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int switch(opcode) { case 0x00: INST_NAME("ADD Eb, Gb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; FAST_8BIT_OPERATION(wb, gb, x1, ADD(wb, wb, x1)); GETEB(x1, 0); @@ -63,7 +63,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x01: INST_NAME("ADD Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -72,7 +72,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x02: INST_NAME("ADD Gb, Eb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; FAST_8BIT_OPERATION(gb, wb, x1, ADD(gb, gb, x1)); GETEB(x1, 0); @@ -82,7 +82,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x03: INST_NAME("ADD Gd, Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -90,7 +90,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x04: INST_NAME("ADD AL, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); u8 = F8; ANDI(x1, xRAX, 0xff); emit_add8c(dyn, ninst, x1, u8, x3, x4, x5, x6); @@ -99,7 +99,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x05: INST_NAME("ADD EAX, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i64 = F32S; emit_add32c(dyn, ninst, rex, xRAX, i64, x3, x4, x5, x6); break; @@ -124,7 +124,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x08: INST_NAME("OR Eb, Gb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; FAST_8BIT_OPERATION(wb, gb, x1, OR(wb, wb, x1)); GETEB(x1, 0); @@ -134,7 +134,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x09: INST_NAME("OR Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -143,7 +143,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x0A: INST_NAME("OR Gb, Eb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; FAST_8BIT_OPERATION(gb, wb, x1, OR(gb, gb, x1)); GETEB(x1, 0); @@ -153,7 +153,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x0B: INST_NAME("OR Gd, Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -161,7 +161,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x0C: INST_NAME("OR AL, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); u8 = F8; ANDI(x1, xRAX, 0xff); emit_or8c(dyn, ninst, x1, u8, x3, x4, x5); @@ -170,7 +170,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x0D: INST_NAME("OR EAX, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i64 = F32S; emit_or32c(dyn, ninst, rex, xRAX, i64, x3, x4); break; @@ -199,7 +199,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x10: INST_NAME("ADC Eb, Gb"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; FAST_8BIT_OPERATION(wb, gb, x1, { ADD(wb, wb, x1); @@ -215,7 +215,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x11: INST_NAME("ADC Ed, Gd"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -225,7 +225,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x12: INST_NAME("ADC Gb, Eb"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; FAST_8BIT_OPERATION(gb, wb, x1, { ADD(gb, gb, x1); @@ -241,7 +241,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x13: INST_NAME("ADC Gd, Ed"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -250,7 +250,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x14: INST_NAME("ADC AL, Ib"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); u8 = F8; ANDI(x1, xRAX, 0xff); emit_adc8c(dyn, ninst, x1, u8, x3, x4, x5, x6); @@ -260,7 +260,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x15: INST_NAME("ADC EAX, Id"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i64 = F32S; MOV64xw(x1, i64); emit_adc32(dyn, ninst, rex, xRAX, x1, x3, x4, x5, x6); @@ -289,7 +289,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x18: INST_NAME("SBB Eb, Gb"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; FAST_8BIT_OPERATION(wb, gb, x1, { SUB(wb, wb, x1); @@ -305,7 +305,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x19: INST_NAME("SBB Ed, Gd"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -315,7 +315,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x1A: INST_NAME("SBB Gb, Eb"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; FAST_8BIT_OPERATION(gb, wb, x1, { SUB(gb, gb, x1); @@ -331,7 +331,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x1B: INST_NAME("SBB Gd, Ed"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -340,7 +340,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x1C: INST_NAME("SBB AL, Ib"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); u8 = F8; ANDI(x1, xRAX, 0xff); emit_sbb8c(dyn, ninst, x1, u8, x3, x4, x5, x6); @@ -350,7 +350,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x1D: INST_NAME("SBB EAX, Id"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i64 = F32S; MOV64xw(x2, i64); emit_sbb32(dyn, ninst, rex, xRAX, x2, x3, x4, x5); @@ -378,7 +378,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x20: INST_NAME("AND Eb, Gb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETEB(x1, 0); GETGB(x2); @@ -387,7 +387,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x21: INST_NAME("AND Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -396,7 +396,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x22: INST_NAME("AND Gb, Eb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETEB(x1, 0); GETGB(x2); @@ -405,7 +405,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x23: INST_NAME("AND Gd, Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -413,7 +413,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x24: INST_NAME("AND AL, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); u8 = F8; ANDI(x1, xRAX, 0xff); emit_and8c(dyn, ninst, x1, u8, x3, x4); @@ -422,13 +422,13 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x25: INST_NAME("AND EAX, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i64 = F32S; emit_and32c(dyn, ninst, rex, xRAX, i64, x3, x4); break; case 0x28: INST_NAME("SUB Eb, Gb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; FAST_8BIT_OPERATION(wb, gb, x1, SUB(wb, wb, x1)); GETEB(x1, 0); @@ -438,7 +438,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x29: INST_NAME("SUB Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -447,7 +447,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x2A: INST_NAME("SUB Gb, Eb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; FAST_8BIT_OPERATION(gb, wb, x1, SUB(gb, gb, x1)); GETEB(x1, 0); @@ -457,7 +457,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x2B: INST_NAME("SUB Gd, Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -465,7 +465,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x2C: INST_NAME("SUB AL, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); u8 = F8; ANDI(x1, xRAX, 0xff); emit_sub8c(dyn, ninst, x1, u8, x2, x3, x4, x5); @@ -474,13 +474,13 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x2D: INST_NAME("SUB EAX, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i64 = F32S; emit_sub32c(dyn, ninst, rex, xRAX, i64, x2, x3, x4, x5); break; case 0x30: INST_NAME("XOR Eb, Gb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; FAST_8BIT_OPERATION(wb, gb, x1, XOR(wb, wb, x1)); GETEB(x1, 0); @@ -490,7 +490,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x31: INST_NAME("XOR Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -501,7 +501,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x32: INST_NAME("XOR Gb, Eb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; FAST_8BIT_OPERATION(gb, wb, x1, XOR(gb, gb, x1)); GETEB(x1, 0); @@ -511,7 +511,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x33: INST_NAME("XOR Gd, Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -519,7 +519,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x34: INST_NAME("XOR AL, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); u8 = F8; ANDI(x1, xRAX, 0xff); emit_xor8c(dyn, ninst, x1, u8, x3, x4); @@ -528,7 +528,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x35: INST_NAME("XOR EAX, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i64 = F32S; emit_xor32c(dyn, ninst, rex, xRAX, i64, x3, x4); break; @@ -537,7 +537,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x38: INST_NAME("CMP Eb, Gb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETEB(x1, 0); GETGB(x2); @@ -545,7 +545,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x39: INST_NAME("CMP Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -553,7 +553,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x3A: INST_NAME("CMP Gb, Eb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETEB(x1, 0); GETGB(x2); @@ -561,7 +561,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x3B: INST_NAME("CMP Gd, Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -569,7 +569,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x3C: INST_NAME("CMP AL, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); u8 = F8; ANDI(x1, xRAX, 0xff); if(u8) { @@ -581,7 +581,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x3D: INST_NAME("CMP EAX, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i64 = F32S; if(i64) { MOV64xw(x2, i64); diff --git a/src/dynarec/rv64/dynarec_rv64_00_1.c b/src/dynarec/rv64/dynarec_rv64_00_1.c index cb938b58..77afd61d 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_1.c +++ b/src/dynarec/rv64/dynarec_rv64_00_1.c @@ -62,7 +62,7 @@ uintptr_t dynarec64_00_1(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x46: case 0x47: INST_NAME("INC Reg (32bits)"); - SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING); + SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING, NAT_FLAGS_FUSION); gd = xRAX + (opcode&7); emit_inc32(dyn, ninst, rex, gd, x1, x2, x3, x4); break; @@ -75,7 +75,7 @@ uintptr_t dynarec64_00_1(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x4E: case 0x4F: INST_NAME("DEC Reg (32bits)"); - SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING); + SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING, NAT_FLAGS_FUSION); gd = xRAX + (opcode&7); emit_dec32(dyn, ninst, rex, gd, x1, x2, x3, x4); break; @@ -209,7 +209,7 @@ uintptr_t dynarec64_00_1(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x69: INST_NAME("IMUL Gd, Ed, Id"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_PENDING, NAT_FLAGS_NOFUSION); nextop = F8; GETGD; GETED(4); @@ -249,7 +249,7 @@ uintptr_t dynarec64_00_1(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x6B: INST_NAME("IMUL Gd, Ed, Ib"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_PENDING, NAT_FLAGS_NOFUSION); nextop = F8; GETGD; GETED(1); @@ -285,7 +285,7 @@ uintptr_t dynarec64_00_1(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x6C: case 0x6D: INST_NAME(opcode == 0x6C ? "INSB" : "INSD"); - SETFLAGS(X_ALL, SF_SET_NODF); // Hack to set flags in "don't care" state + SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Hack to set flags in "don't care" state GETIP(ip); STORE_XEMU_CALL(x3); CALL(native_priv, -1); @@ -297,7 +297,7 @@ uintptr_t dynarec64_00_1(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x6E: case 0x6F: INST_NAME(opcode == 0x6C ? "OUTSB" : "OUTSD"); - SETFLAGS(X_ALL, SF_SET_NODF); // Hack to set flags in "don't care" state + SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Hack to set flags in "don't care" state GETIP(ip); STORE_XEMU_CALL(x3); CALL(native_priv, -1); @@ -307,35 +307,43 @@ uintptr_t dynarec64_00_1(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int *ok = 0; break; - #define GO(GETFLAGS, NO, YES, F) \ - READFLAGS(F); \ - i8 = F8S; \ - BARRIER(BARRIER_MAYBE); \ - JUMP(addr+i8, 1); \ - GETFLAGS; \ - if(dyn->insts[ninst].x64.jmp_insts==-1 || \ - CHECK_CACHE()) { \ - /* out of the block */ \ - i32 = dyn->insts[ninst].epilog-(dyn->native_size); \ - B##NO##_safe(x1, i32); \ - if(dyn->insts[ninst].x64.jmp_insts==-1) { \ - if(!(dyn->insts[ninst].x64.barrier&BARRIER_FLOAT)) \ - fpu_purgecache(dyn, ninst, 1, x1, x2, x3); \ - jump_to_next(dyn, addr+i8, 0, ninst, rex.is32bits); \ - } else { \ - CacheTransform(dyn, ninst, cacheupd, x1, x2, x3); \ - i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size);\ - B(i32); \ - } \ - } else { \ - /* inside the block */ \ - i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size); \ - B##YES##_safe(x1, i32); \ - } - - GOCOND(0x70, "J", "ib"); +#define GO(GETFLAGS, NO, YES, NATNO, NATYES, F) \ + READFLAGS_FUSION(F, 1); \ + i8 = F8S; \ + BARRIER(BARRIER_MAYBE); \ + JUMP(addr + i8, 1); \ + if (!dyn->insts[ninst].nat_flags_fusion) { \ + GETFLAGS; \ + } \ + if (dyn->insts[ninst].x64.jmp_insts == -1 || CHECK_CACHE()) { \ + /* out of the block */ \ + i32 = dyn->insts[ninst].epilog - (dyn->native_size); \ + if (dyn->insts[ninst].nat_flags_fusion) { \ + NATIVEJUMP_safe(NATNO, i32); \ + } else { \ + B##NO##_safe(x1, i32); \ + } \ + if (dyn->insts[ninst].x64.jmp_insts == -1) { \ + if (!(dyn->insts[ninst].x64.barrier & BARRIER_FLOAT)) \ + fpu_purgecache(dyn, ninst, 1, x1, x2, x3); \ + jump_to_next(dyn, addr + i8, 0, ninst, rex.is32bits); \ + } else { \ + CacheTransform(dyn, ninst, cacheupd, x1, x2, x3); \ + i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address - (dyn->native_size); \ + B(i32); \ + } \ + } else { \ + /* inside the block */ \ + i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address - (dyn->native_size); \ + if (dyn->insts[ninst].nat_flags_fusion) { \ + NATIVEJUMP_safe(NATYES, i32); \ + } else { \ + B##YES##_safe(x1, i32); \ + } \ + } + GOCOND(0x70, "J", "ib"); +#undef GO - #undef GO default: DEFAULT; } diff --git a/src/dynarec/rv64/dynarec_rv64_00_2.c b/src/dynarec/rv64/dynarec_rv64_00_2.c index c495f935..82552d29 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_2.c +++ b/src/dynarec/rv64/dynarec_rv64_00_2.c @@ -55,7 +55,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int switch((nextop>>3)&7) { case 0: // ADD INST_NAME("ADD Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEB(x1, 1); u8 = F8; emit_add8c(dyn, ninst, x1, u8, x2, x4, x5, x6); @@ -63,7 +63,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 1: // OR INST_NAME("OR Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEB(x1, 1); u8 = F8; emit_or8c(dyn, ninst, x1, u8, x2, x4, x5); @@ -72,7 +72,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 2: // ADC INST_NAME("ADC Eb, Ib"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEB(x1, 1); u8 = F8; emit_adc8c(dyn, ninst, x1, u8, x2, x4, x5, x6); @@ -81,7 +81,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 3: // SBB INST_NAME("SBB Eb, Ib"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEB(x1, 1); u8 = F8; emit_sbb8c(dyn, ninst, x1, u8, x2, x4, x5, x6); @@ -89,7 +89,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 4: // AND INST_NAME("AND Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEB(x1, 1); u8 = F8; emit_and8c(dyn, ninst, x1, u8, x2, x4); @@ -97,7 +97,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 5: // SUB INST_NAME("SUB Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEB(x1, 1); u8 = F8; emit_sub8c(dyn, ninst, x1, u8, x2, x4, x5, x6); @@ -105,7 +105,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 6: // XOR INST_NAME("XOR Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEB(x1, 1); u8 = F8; emit_xor8c(dyn, ninst, x1, u8, x2, x4); @@ -113,7 +113,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 7: // CMP INST_NAME("CMP Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEB(x1, 1); u8 = F8; if(u8) { @@ -133,7 +133,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int switch((nextop>>3)&7) { case 0: // ADD if(opcode==0x81) {INST_NAME("ADD Ed, Id");} else {INST_NAME("ADD Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETED((opcode==0x81)?4:1); if(opcode==0x81) i64 = F32S; else i64 = F8S; emit_add32c(dyn, ninst, rex, ed, i64, x3, x4, x5, x6); @@ -141,7 +141,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 1: // OR if(opcode==0x81) {INST_NAME("OR Ed, Id");} else {INST_NAME("OR Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETED((opcode==0x81)?4:1); if(opcode==0x81) i64 = F32S; else i64 = F8S; emit_or32c(dyn, ninst, rex, ed, i64, x3, x4); @@ -150,7 +150,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 2: // ADC if(opcode==0x81) {INST_NAME("ADC Ed, Id");} else {INST_NAME("ADC Ed, Ib");} READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETED((opcode==0x81)?4:1); if(opcode==0x81) i64 = F32S; else i64 = F8S; MOV64xw(x5, i64); @@ -160,7 +160,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 3: // SBB if(opcode==0x81) {INST_NAME("SBB Ed, Id");} else {INST_NAME("SBB Ed, Ib");} READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETED((opcode==0x81)?4:1); if(opcode==0x81) i64 = F32S; else i64 = F8S; MOV64xw(x5, i64); @@ -169,7 +169,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 4: // AND if(opcode==0x81) {INST_NAME("AND Ed, Id");} else {INST_NAME("AND Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETED((opcode==0x81)?4:1); if(opcode==0x81) i64 = F32S; else i64 = F8S; emit_and32c(dyn, ninst, rex, ed, i64, x3, x4); @@ -177,7 +177,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 5: // SUB if(opcode==0x81) {INST_NAME("SUB Ed, Id");} else {INST_NAME("SUB Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETED((opcode==0x81)?4:1); if(opcode==0x81) i64 = F32S; else i64 = F8S; emit_sub32c(dyn, ninst, rex, ed, i64, x3, x4, x5, x6); @@ -185,7 +185,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 6: // XOR if(opcode==0x81) {INST_NAME("XOR Ed, Id");} else {INST_NAME("XOR Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETED((opcode==0x81)?4:1); if(opcode==0x81) i64 = F32S; else i64 = F8S; emit_xor32c(dyn, ninst, rex, ed, i64, x3, x4); @@ -193,7 +193,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 7: // CMP if(opcode==0x81) {INST_NAME("CMP Ed, Id");} else {INST_NAME("CMP Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETED((opcode==0x81)?4:1); if(opcode==0x81) i64 = F32S; else i64 = F8S; if(i64) { @@ -211,7 +211,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x84: INST_NAME("TEST Eb, Gb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop=F8; GETEB(x1, 0); GETGB(x2); @@ -219,7 +219,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x85: INST_NAME("TEST Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop=F8; GETGD; GETED(0); @@ -515,7 +515,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x9D: INST_NAME("POPF"); - SETFLAGS(X_ALL, SF_SET); + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); POP1z(xFlags); FLAGS_ADJUST_FROM11(xFlags, xFlags, x2); MOV32w(x1, 0x3F7FD7); @@ -531,7 +531,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x9E: INST_NAME("SAHF"); - SETFLAGS(X_CF | X_PF | X_AF | X_ZF | X_SF, SF_SUBSET); + SETFLAGS(X_CF | X_PF | X_AF | X_ZF | X_SF, SF_SUBSET, NAT_FLAGS_NOFUSION); ADDI(x1, xZR, ~0b11010101); AND(xFlags, xFlags, x1); NOT(x1, x1); @@ -645,7 +645,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 2: if(rep==1) {INST_NAME("REPNZ CMPSB");} else {INST_NAME("REPZ CMPSB");} MAYSETFLAGS(); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_NOFUSION); CBZ_NEXT(xRCX); ANDI(x1, xFlags, 1<<F_DF); BNEZ_MARK2(x1); @@ -671,7 +671,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; default: INST_NAME("CMPSB"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETDIR(x3, x1, 1); LBU(x1, xRSI, 0); LBU(x2, xRDI, 0); @@ -687,7 +687,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 2: if (rep == 1) { INST_NAME("REPNZ CMPSD"); } else { INST_NAME("REPZ CMPSD"); } MAYSETFLAGS(); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_NOFUSION); CBZ_NEXT(xRCX); ANDI(x1, xFlags, 1 << F_DF); BNEZ_MARK2(x1); @@ -713,7 +713,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; default: INST_NAME("CMPSD"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_NOFUSION); GETDIR(x3, x1, rex.w ? 8 : 4); LDxw(x1, xRSI, 0); LDxw(x2, xRDI, 0); @@ -725,7 +725,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0xA8: INST_NAME("TEST AL, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); ANDI(x1, xRAX, 0xff); u8 = F8; MOV32w(x2, u8); @@ -733,7 +733,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0xA9: INST_NAME("TEST EAX, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i64 = F32S; MOV64xw(x2, i64); emit_test32(dyn, ninst, rex, xRAX, x2, x3, x4, x5); @@ -816,7 +816,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 2: if (rep==1) {INST_NAME("REPNZ SCASB");} else {INST_NAME("REPZ SCASB");} MAYSETFLAGS(); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_NOFUSION); CBZ_NEXT(xRCX); ANDI(x1, xRAX, 0xff); ANDI(x2, xFlags, 1<<F_DF); @@ -839,7 +839,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; default: INST_NAME("SCASB"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_NOFUSION); GETDIR(x3, x1, 1); ANDI(x1, xRAX, 0xff); LBU(x2, xRDI, 0); @@ -854,7 +854,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 2: if (rep==1) {INST_NAME("REPNZ SCASD");} else {INST_NAME("REPZ SCASD");} MAYSETFLAGS(); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_NOFUSION); CBZ_NEXT(xRCX); if (rex.w) {MV(x1, xRAX);} else {AND(x1, xRAX, xMASK);} ANDI(x2, xFlags, 1<<F_DF); @@ -877,7 +877,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; default: INST_NAME("SCASD"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_NOFUSION); GETDIR(x3, x1, rex.w?8:4); LDxw(x2, xRDI, 0); ADD(xRDI, xRDI, x3); diff --git a/src/dynarec/rv64/dynarec_rv64_00_3.c b/src/dynarec/rv64/dynarec_rv64_00_3.c index 38ef9148..7c5c7a1f 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_3.c +++ b/src/dynarec/rv64/dynarec_rv64_00_3.c @@ -59,7 +59,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0: INST_NAME("ROL Eb, Ib"); MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_OF|X_CF, SF_SET_DF); + SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); GETEB(x1, 1); u8 = F8; MOV32w(x2, u8); @@ -69,7 +69,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 1: INST_NAME("ROR Eb, Ib"); MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_OF|X_CF, SF_SET_DF); + SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); GETEB(x1, 1); u8 = F8; MOV32w(x2, u8); @@ -80,7 +80,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("RCL Eb, Ib"); MESSAGE(LOG_DUMP, "Need Optimization\n"); READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET_DF); + SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); GETEB(x1, 1); u8 = F8; MOV32w(x2, u8); @@ -91,7 +91,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("RCR Eb, Ib"); MESSAGE(LOG_DUMP, "Need Optimization\n"); READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET_DF); + SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); GETEB(x1, 1); u8 = F8; MOV32w(x2, u8); @@ -103,7 +103,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("SHL Eb, Ib"); u8 = geted_ib(dyn, addr, ninst, nextop) & 0x1f; if (u8) { - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEB(x1, 1); u8 = (F8) & 0x1f; emit_shl8c(dyn, ninst, ed, u8, x4, x5, x6); @@ -117,7 +117,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("SHR Eb, Ib"); u8 = geted_ib(dyn, addr, ninst, nextop) & 0x1f; if (u8) { - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEB(x1, 1); u8 = (F8) & 0x1f; emit_shr8c(dyn, ninst, ed, u8, x4, x5, x6); @@ -131,7 +131,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("SAR Eb, Ib"); u8 = geted_ib(dyn, addr, ninst, nextop) & 0x1f; if (u8) { - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETSEB(x1, 1); u8 = (F8) & 0x1f; emit_sar8c(dyn, ninst, ed, u8, x4, x5, x6); @@ -153,7 +153,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int u8 = geted_ib(dyn, addr, ninst, nextop) & (rex.w ? 0x3f : 0x1f); // flags are not affected if count is 0, we make it a nop if possible. if (u8) { - SETFLAGS(X_OF | X_CF, SF_SUBSET_PENDING); + SETFLAGS(X_OF | X_CF, SF_SUBSET_PENDING, NAT_FLAGS_FUSION); GETED(1); F8; emit_rol32c(dyn, ninst, rex, ed, u8, x3, x4); @@ -173,7 +173,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int u8 = geted_ib(dyn, addr, ninst, nextop) & (rex.w ? 0x3f : 0x1f); // flags are not affected if count is 0, we make it a nop if possible. if (u8) { - SETFLAGS(X_OF | X_CF, SF_SUBSET_PENDING); + SETFLAGS(X_OF | X_CF, SF_SUBSET_PENDING, NAT_FLAGS_FUSION); GETED(1); F8; emit_ror32c(dyn, ninst, rex, ed, u8, x3, x4); @@ -192,7 +192,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("RCL Ed, Ib"); MESSAGE(LOG_DUMP, "Need Optimization\n"); READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET_DF); + SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); u8 = (F8)&(rex.w?0x3f:0x1f); MOV32w(x2, u8); GETEDW(x4, x1, 0); @@ -204,7 +204,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("RCR Ed, Ib"); MESSAGE(LOG_DUMP, "Need Optimization\n"); READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET_DF); + SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); u8 = (F8)&(rex.w?0x3f:0x1f); MOV32w(x2, u8); GETEDW(x4, x1, 0); @@ -218,7 +218,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int u8 = geted_ib(dyn, addr, ninst, nextop) & (rex.w ? 0x3f : 0x1f); // flags are not affected if count is 0, we make it a nop if possible. if (u8) { - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_NOFUSION); GETED(1); F8; emit_shl32c(dyn, ninst, rex, ed, u8, x3, x4, x5); @@ -238,7 +238,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int u8 = geted_ib(dyn, addr, ninst, nextop) & (rex.w ? 0x3f : 0x1f); // flags are not affected if count is 0, we make it a nop if possible. if (u8) { - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_NOFUSION); GETED(1); F8; emit_shr32c(dyn, ninst, rex, ed, u8, x3, x4); @@ -258,7 +258,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int u8 = geted_ib(dyn, addr, ninst, nextop) & (rex.w ? 0x3f : 0x1f); // flags are not affected if count is 0, we make it a nop if possible. if (u8) { - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_NOFUSION); GETED(1); F8; emit_sar32c(dyn, ninst, rex, ed, u8, x3, x4); @@ -411,7 +411,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0xCC: - SETFLAGS(X_ALL, SF_SET_NODF); // Hack, set all flags (to an unknown state...) + SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Hack, set all flags (to an unknown state...) SKIPTEST(x1); if(PK(0)=='S' && PK(1)=='C') { addr+=2; @@ -507,7 +507,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int jump_to_epilog(dyn, 0, xRIP, ninst); } else if (u8==0x03) { INST_NAME("INT 3"); - SETFLAGS(X_ALL, SF_SET_NODF); // Hack to set flags in "don't care" state + SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Hack to set flags in "don't care" state GETIP(addr); STORE_XEMU_CALL(x3); CALL(native_int3, -1); @@ -517,7 +517,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int *ok = 0; } else { INST_NAME("INT n"); - SETFLAGS(X_ALL, SF_SET_NODF); // Hack to set flags in "don't care" state + SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Hack to set flags in "don't care" state GETIP(ip); // priviledged instruction, IP not updated STORE_XEMU_CALL(x3); CALL(native_priv, -1); @@ -529,7 +529,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0xCF: INST_NAME("IRET"); - SETFLAGS(X_ALL, SF_SET_NODF); // Not a hack, EFLAGS are restored + SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Not a hack, EFLAGS are restored BARRIER(BARRIER_FLOAT); iret_to_epilog(dyn, ninst, rex.w); *need_epilog = 0; @@ -550,7 +550,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ANDI(x2, xRCX, 0x1f); } MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_OF|X_CF, SF_SET_DF); + SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); CALL_(rol8, ed, x3); EBBACK(x5, 0); break; @@ -565,7 +565,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ANDI(x2, xRCX, 0x1f); } MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_OF|X_CF, SF_SET_DF); + SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); CALL_(ror8, ed, x3); EBBACK(x5, 0); break; @@ -581,7 +581,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int } MESSAGE(LOG_DUMP, "Need Optimization\n"); READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET_DF); + SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); CALL_(rcl8, ed, x3); EBBACK(x5, 0); break; @@ -597,7 +597,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int } MESSAGE(LOG_DUMP, "Need Optimization\n"); READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET_DF); + SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); CALL_(rcr8, ed, x3); EBBACK(x5, 0); break; @@ -613,7 +613,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ANDI(x2, xRCX, 0x1f); BEQ_NEXT(x2, xZR); } - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined if(box64_dynarec_safeflags>1) MAYSETFLAGS(); emit_shl8(dyn, ninst, x1, x2, x5, x4, x6); @@ -630,7 +630,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ANDI(x2, xRCX, 0x1F); BEQ_NEXT(x2, xZR); } - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined if(box64_dynarec_safeflags>1) MAYSETFLAGS(); emit_shr8(dyn, ninst, x1, x2, x5, x4, x6); @@ -647,7 +647,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ANDI(x2, xRCX, 0x1f); BEQ_NEXT(x2, xZR); } - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined if(box64_dynarec_safeflags>1) MAYSETFLAGS(); emit_sar8(dyn, ninst, x1, x2, x5, x4, x6); @@ -662,7 +662,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int switch((nextop>>3)&7) { case 0: INST_NAME("ROL Ed, 1"); - SETFLAGS(X_OF|X_CF, SF_SUBSET_PENDING); + SETFLAGS(X_OF | X_CF, SF_SUBSET_PENDING, NAT_FLAGS_FUSION); GETED(0); emit_rol32c(dyn, ninst, rex, ed, 1, x3, x4); WBACK; @@ -670,7 +670,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 1: INST_NAME("ROR Ed, 1"); - SETFLAGS(X_OF|X_CF, SF_SUBSET_PENDING); + SETFLAGS(X_OF | X_CF, SF_SUBSET_PENDING, NAT_FLAGS_FUSION); GETED(0); emit_ror32c(dyn, ninst, rex, ed, 1, x3, x4); WBACK; @@ -680,7 +680,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("RCL Ed, 1"); MESSAGE("LOG_DUMP", "Need optimization\n"); READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET_DF); + SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); MOV32w(x2, 1); GETEDW(x4, x1, 0); CALL_(rex.w ? ((void*)rcl64) : ((void*)rcl32), ed, x4); @@ -691,7 +691,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("RCR Ed, 1"); MESSAGE(LOG_DUMP, "Need Optimization\n"); READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET_DF); + SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); MOV32w(x2, 1); GETEDW(x4, x1, 0); CALL_(rex.w?((void*)rcr64):((void*)rcr32), ed, x4); @@ -701,21 +701,21 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 4: case 6: INST_NAME("SHL Ed, 1"); - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined GETED(0); emit_shl32c(dyn, ninst, rex, ed, 1, x3, x4, x5); WBACK; break; case 5: INST_NAME("SHR Ed, 1"); - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined GETED(0); emit_shr32c(dyn, ninst, rex, ed, 1, x3, x4); WBACK; break; case 7: INST_NAME("SAR Ed, 1"); - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined GETED(0); emit_sar32c(dyn, ninst, rex, ed, 1, x3, x4); WBACK; @@ -730,7 +730,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int switch((nextop>>3)&7) { case 0: INST_NAME("ROL Ed, CL"); - SETFLAGS(X_OF|X_CF, SF_SUBSET); + SETFLAGS(X_OF | X_CF, SF_SUBSET, NAT_FLAGS_FUSION); GETED(0); ANDI(x6, xRCX, rex.w ? 0x3f : 0x1f); emit_rol32(dyn, ninst, rex, ed, x6, x3, x4); @@ -739,7 +739,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 1: INST_NAME("ROR Ed, CL"); - SETFLAGS(X_OF|X_CF, SF_SUBSET); + SETFLAGS(X_OF | X_CF, SF_SUBSET, NAT_FLAGS_FUSION); GETED(0); ANDI(x6, xRCX, rex.w ? 0x3f : 0x1f); emit_ror32(dyn, ninst, rex, ed, x6, x3, x4); @@ -750,7 +750,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("RCL Ed, CL"); MESSAGE("LOG_DUMP", "Need optimization\n"); READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET_DF); + SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); ANDI(x2, xRCX, rex.w?0x3f:0x1f); GETEDW(x4, x1, 0); CALL_(rex.w ? ((void*)rcl64) : ((void*)rcl32), ed, x4); @@ -761,7 +761,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("RCR Ed, CL"); MESSAGE(LOG_DUMP, "Need Optimization\n"); READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET_DF); + SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); ANDI(x2, xRCX, rex.w?0x3f:0x1f); GETEDW(x4, x1, 0); CALL_(rex.w?((void*)rcr64):((void*)rcr32), ed, x4); @@ -771,7 +771,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 4: case 6: INST_NAME("SHL Ed, CL"); - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined ANDI(x3, xRCX, rex.w?0x3f:0x1f); GETED(0); if(!rex.w && MODREG) {ZEROUP(ed);} @@ -781,7 +781,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 5: INST_NAME("SHR Ed, CL"); - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined ANDI(x3, xRCX, rex.w?0x3f:0x1f); GETED(0); if(!rex.w && MODREG) {ZEROUP(ed);} @@ -791,7 +791,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 7: INST_NAME("SAR Ed, CL"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_PENDING, NAT_FLAGS_NOFUSION); ANDI(x3, xRCX, rex.w?0x3f:0x1f); GETED(0); if(!rex.w && MODREG) {ZEROUP(ed);} @@ -909,7 +909,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int #endif switch(tmp) { case 3: - SETFLAGS(X_ALL, SF_SET_NODF); // Hack to set flags to "dont'care" state + SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Hack to set flags to "dont'care" state SKIPTEST(x1); BARRIER(BARRIER_FULL); if(dyn->last_ip && (addr-dyn->last_ip<0x1000)) { @@ -967,7 +967,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int if((box64_dynarec_safeflags>1) || (ninst && dyn->insts[ninst-1].x64.set_flags)) { READFLAGS(X_PEND); // that's suspicious } else { - SETFLAGS(X_ALL, SF_SET_NODF); // Hack to set flags to "dont'care" state + SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Hack to set flags to "dont'care" state } // regular call /*if(box64_dynarec_callret && box64_dynarec_bigblock>1) { @@ -1065,7 +1065,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("OUT DX, AL"); else INST_NAME("OUT DX, EAX"); - SETFLAGS(X_ALL, SF_SET_NODF); // Hack to set flags in "don't care" state + SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Hack to set flags in "don't care" state GETIP(ip); STORE_XEMU_CALL(xRIP); CALL(native_priv, -1); @@ -1080,7 +1080,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0xF5: INST_NAME("CMC"); READFLAGS(X_CF); - SETFLAGS(X_CF, SF_SUBSET); + SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION); XORI(xFlags, xFlags, 1<<F_CF); break; case 0xF6: @@ -1089,7 +1089,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0: case 1: INST_NAME("TEST Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEB(x1, 1); u8 = F8; MOV32w(x2, u8); @@ -1103,14 +1103,14 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 3: INST_NAME("NEG Eb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEB(x1, 0); emit_neg8(dyn, ninst, x1, x2, x4, x5); EBBACK(x5, 0); break; case 4: INST_NAME("MUL AL, Ed"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_PENDING, NAT_FLAGS_NOFUSION); GETEB(x1, 0); ANDI(x2, xRAX, 0xff); MULW(x1, x2, x1); @@ -1123,7 +1123,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 5: INST_NAME("IMUL AL, Eb"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_PENDING, NAT_FLAGS_NOFUSION); GETSEB(x1, 0); SLLI(x2, xRAX, 56); SRAI(x2, x2, 56); @@ -1138,7 +1138,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 6: INST_NAME("DIV Eb"); MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_ALL, SF_SET_DF); + SETFLAGS(X_ALL, SF_SET_DF, NAT_FLAGS_NOFUSION); GETEB(x1, 0); CALL(div8, -1); break; @@ -1146,7 +1146,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("IDIV Eb"); SKIPTEST(x1); MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_ALL, SF_SET_DF); + SETFLAGS(X_ALL, SF_SET_DF, NAT_FLAGS_NOFUSION); GETEB(x1, 0); CALL(idiv8, -1); break; @@ -1158,7 +1158,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0: case 1: INST_NAME("TEST Ed, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETED(4); i64 = F32S; emit_test32c(dyn, ninst, rex, ed, i64, x3, x4, x5); @@ -1173,14 +1173,14 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 3: INST_NAME("NEG Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETED(0); emit_neg32(dyn, ninst, rex, ed, x3, x4, x5, x6); WBACK; break; case 4: INST_NAME("MUL EAX, Ed"); - SETFLAGS(X_ALL, SF_SET); + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); CLEAR_FLAGS(); SET_DFNONE(); GETED(0); @@ -1213,7 +1213,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 5: INST_NAME("IMUL EAX, Ed"); - SETFLAGS(X_ALL, SF_SET); + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); CLEAR_FLAGS(); SET_DFNONE(); GETSED(0); @@ -1244,7 +1244,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 6: INST_NAME("DIV Ed"); - SETFLAGS(X_ALL, SF_SET); + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); SET_DFNONE(); if(!rex.w) { GETED(0); @@ -1326,7 +1326,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 7: INST_NAME("IDIV Ed"); SKIPTEST(x1); - SETFLAGS(X_ALL, SF_SET); + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); SET_DFNONE() if(!rex.w) { GETSED(0); @@ -1404,13 +1404,13 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0xF8: INST_NAME("CLC"); - SETFLAGS(X_CF, SF_SUBSET); + SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); ANDI(xFlags, xFlags, ~(1 << F_CF)); break; case 0xF9: INST_NAME("STC"); - SETFLAGS(X_CF, SF_SUBSET); + SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); ORI(xFlags, xFlags, 1 << F_CF); break; @@ -1427,14 +1427,14 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int switch((nextop>>3)&7) { case 0: INST_NAME("INC Eb"); - SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING); + SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING, NAT_FLAGS_FUSION); GETEB(x1, 0); emit_inc8(dyn, ninst, ed, x2, x4, x5); EBBACK(x5, 0); break; case 1: INST_NAME("DEC Eb"); - SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING); + SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING, NAT_FLAGS_FUSION); GETEB(x1, 0); emit_dec8(dyn, ninst, ed, x2, x4, x5); EBBACK(x5, 0); @@ -1448,14 +1448,14 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int switch((nextop>>3)&7) { case 0: // INC Ed INST_NAME("INC Ed"); - SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING); + SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING, NAT_FLAGS_FUSION); GETED(0); emit_inc32(dyn, ninst, rex, ed, x3, x4, x5, x6); WBACK; break; case 1: //DEC Ed INST_NAME("DEC Ed"); - SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING); + SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING, NAT_FLAGS_FUSION); GETED(0); emit_dec32(dyn, ninst, rex, ed, x3, x4, x5, x6); WBACK; @@ -1468,7 +1468,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int { READFLAGS(X_PEND); // that's suspicious } else { - SETFLAGS(X_ALL, SF_SET_NODF); //Hack to put flag in "don't care" state + SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Hack to put flag in "don't care" state } GETEDz(0); if(box64_dynarec_callret && box64_dynarec_bigblock>1) { diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index c583987b..7829a414 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -130,7 +130,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x09: INST_NAME("WBINVD"); - SETFLAGS(X_ALL, SF_SET_NODF); // Hack to set flags in "don't care" state + SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Hack to set flags in "don't care" state GETIP(ip); STORE_XEMU_CALL(x3); CALL(native_ud, -1); @@ -142,7 +142,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x0B: INST_NAME("UD2"); - SETFLAGS(X_ALL, SF_SET_NODF); // Hack to set flags in "don't care" state + SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Hack to set flags in "don't care" state GETIP(ip); STORE_XEMU_CALL(x3); CALL(native_ud, -1); @@ -380,7 +380,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("UCOMISS Gx, Ex"); } - SETFLAGS(X_ALL, SF_SET); + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); SET_DFNONE(); nextop = F8; GETGXSS(d0); @@ -902,18 +902,28 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } break; -#define GO(GETFLAGS, NO, YES, F) \ - READFLAGS(F); \ - GETFLAGS; \ +#define GO(GETFLAGS, NO, YES, NATNO, NATYES, F) \ + READFLAGS_FUSION(F, 0); \ + if (!dyn->insts[ninst].nat_flags_fusion) { \ + GETFLAGS; \ + } \ nextop = F8; \ GETGD; \ if (MODREG) { \ ed = xRAX + (nextop & 7) + (rex.b << 3); \ - B##NO(x1, 8); \ + if (dyn->insts[ninst].nat_flags_fusion) { \ + NATIVEJUMP(NATNO, 8); \ + } else { \ + B##NO(x1, 8); \ + } \ MV(gd, ed); \ } else { \ addr = geted(dyn, addr, ninst, nextop, &ed, x2, x4, &fixedaddress, rex, NULL, 1, 0); \ - B##NO(x1, 8); \ + if (dyn->insts[ninst].nat_flags_fusion) { \ + NATIVEJUMP(NATNO, 8); \ + } else { \ + B##NO(x1, 8); \ + } \ LDxw(gd, ed, fixedaddress); \ } \ if (!rex.w) ZEROUP(gd); @@ -1663,67 +1673,77 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LD(x3, gback, gdoffset + 0); SD(x3, wback, fixedaddress); break; - #define GO(GETFLAGS, NO, YES, F) \ - READFLAGS(F); \ - i32_ = F32S; \ - if(rex.is32bits) \ - j64 = (uint32_t)(addr+i32_); \ - else \ - j64 = addr+i32_; \ - BARRIER(BARRIER_MAYBE); \ - JUMP(j64, 1); \ - GETFLAGS; \ - if (dyn->insts[ninst].x64.jmp_insts == -1 || CHECK_CACHE()) { \ - /* out of the block */ \ - i32 = dyn->insts[ninst].epilog - (dyn->native_size); \ - B##NO##_safe(x1, i32); \ - if (dyn->insts[ninst].x64.jmp_insts == -1) { \ - if (!(dyn->insts[ninst].x64.barrier & BARRIER_FLOAT)) \ - fpu_purgecache(dyn, ninst, 1, x1, x2, x3); \ - jump_to_next(dyn, j64, 0, ninst, rex.is32bits); \ - } else { \ - CacheTransform(dyn, ninst, cacheupd, x1, x2, x3); \ - i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address - (dyn->native_size); \ - B(i32); \ - } \ - } else { \ - /* inside the block */ \ - i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address - (dyn->native_size); \ - B##YES##_safe(x1, i32); \ - } +#define GO(GETFLAGS, NO, YES, NATNO, NATYES, F) \ + READFLAGS_FUSION(F, 1); \ + i32_ = F32S; \ + if (rex.is32bits) \ + j64 = (uint32_t)(addr + i32_); \ + else \ + j64 = addr + i32_; \ + BARRIER(BARRIER_MAYBE); \ + JUMP(j64, 1); \ + if (!dyn->insts[ninst].nat_flags_fusion) { \ + GETFLAGS; \ + } \ + if (dyn->insts[ninst].x64.jmp_insts == -1 || CHECK_CACHE()) { \ + /* out of the block */ \ + i32 = dyn->insts[ninst].epilog - (dyn->native_size); \ + if (dyn->insts[ninst].nat_flags_fusion) { \ + NATIVEJUMP_safe(NATNO, i32); \ + } else { \ + B##NO##_safe(x1, i32); \ + } \ + if (dyn->insts[ninst].x64.jmp_insts == -1) { \ + if (!(dyn->insts[ninst].x64.barrier & BARRIER_FLOAT)) \ + fpu_purgecache(dyn, ninst, 1, x1, x2, x3); \ + jump_to_next(dyn, j64, 0, ninst, rex.is32bits); \ + } else { \ + CacheTransform(dyn, ninst, cacheupd, x1, x2, x3); \ + i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address - (dyn->native_size); \ + B(i32); \ + } \ + } else { \ + /* inside the block */ \ + i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address - (dyn->native_size); \ + if (dyn->insts[ninst].nat_flags_fusion) { \ + NATIVEJUMP_safe(NATYES, i32); \ + } else { \ + B##YES##_safe(x1, i32); \ + } \ + } GOCOND(0x80, "J", "Id"); - #undef GO +#undef GO - #define GO(GETFLAGS, NO, YES, F) \ - READFLAGS(F); \ - GETFLAGS; \ - nextop = F8; \ - S##YES(x3, x1); \ - if (MODREG) { \ - if (rex.rex) { \ - eb1 = xRAX + (nextop & 7) + (rex.b << 3); \ - eb2 = 0; \ - } else { \ - ed = (nextop & 7); \ - eb2 = (ed >> 2) * 8; \ - eb1 = xRAX + (ed & 3); \ - } \ - if (eb2) { \ - LUI(x1, 0xffff0); \ - ORI(x1, x1, 0xff); \ - AND(eb1, eb1, x1); \ - SLLI(x3, x3, 8); \ - } else { \ - ANDI(eb1, eb1, 0xf00); \ - } \ - OR(eb1, eb1, x3); \ - } else { \ - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); \ - SB(x3, ed, fixedaddress); \ - SMWRITE(); \ - } +#define GO(GETFLAGS, NO, YES, NATNO, NATYES, F) \ + READFLAGS(F); \ + GETFLAGS; \ + nextop = F8; \ + S##YES(x3, x1); \ + if (MODREG) { \ + if (rex.rex) { \ + eb1 = xRAX + (nextop & 7) + (rex.b << 3); \ + eb2 = 0; \ + } else { \ + ed = (nextop & 7); \ + eb2 = (ed >> 2) * 8; \ + eb1 = xRAX + (ed & 3); \ + } \ + if (eb2) { \ + LUI(x1, 0xffff0); \ + ORI(x1, x1, 0xff); \ + AND(eb1, eb1, x1); \ + SLLI(x3, x3, 8); \ + } else { \ + ANDI(eb1, eb1, 0xf00); \ + } \ + OR(eb1, eb1, x3); \ + } else { \ + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); \ + SB(x3, ed, fixedaddress); \ + SMWRITE(); \ + } GOCOND(0x90, "SET", "Eb"); - #undef GO +#undef GO case 0xA2: INST_NAME("CPUID"); @@ -1736,7 +1756,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0xA3: INST_NAME("BT Ed, Gd"); - SETFLAGS(X_CF, SF_SUBSET); + SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); nextop = F8; GETGD; @@ -1758,7 +1778,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni nextop = F8; INST_NAME("SHLD Ed, Gd, Ib"); if (geted_ib(dyn, addr, ninst, nextop)) { - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETED(1); GETGD; u8 = F8; @@ -1772,7 +1792,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0xA5: nextop = F8; INST_NAME("SHLD Ed, Gd, CL"); - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined if(box64_dynarec_safeflags>1) MAYSETFLAGS(); GETGD; @@ -1785,7 +1805,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0xAB: INST_NAME("BTS Ed, Gd"); - SETFLAGS(X_CF, SF_SUBSET); + SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); nextop = F8; GETGD; @@ -1819,7 +1839,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni nextop = F8; INST_NAME("SHRD Ed, Gd, Ib"); if (geted_ib(dyn, addr, ninst, nextop)) { - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETED(1); GETGD; u8 = F8; @@ -1834,7 +1854,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0xAD: nextop = F8; INST_NAME("SHRD Ed, Gd, CL"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); if (box64_dynarec_safeflags > 1) MAYSETFLAGS(); GETGD; @@ -1933,7 +1953,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0xAF: // TODO: Refine this INST_NAME("IMUL Gd, Ed"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_PENDING, NAT_FLAGS_NOFUSION); nextop = F8; GETGD; GETED(0); @@ -1966,7 +1986,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0xB3: INST_NAME("BTR Ed, Gd"); - SETFLAGS(X_CF, SF_SUBSET); + SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); nextop = F8; GETGD; @@ -2040,7 +2060,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni switch ((nextop >> 3) & 7) { case 4: INST_NAME("BT Ed, Ib"); - SETFLAGS(X_CF, SF_SUBSET); + SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); GETED(1); u8 = F8; @@ -2051,7 +2071,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 5: INST_NAME("BTS Ed, Ib"); - SETFLAGS(X_CF, SF_SUBSET); + SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); GETED(1); u8 = F8; @@ -2079,7 +2099,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 6: INST_NAME("BTR Ed, Ib"); - SETFLAGS(X_CF, SF_SUBSET); + SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); GETED(1); u8 = F8; @@ -2108,7 +2128,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 7: INST_NAME("BTC Ed, Ib"); - SETFLAGS(X_CF, SF_SUBSET); + SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); GETED(1); u8 = F8; @@ -2135,7 +2155,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0xBB: INST_NAME("BTC Ed, Gd"); - SETFLAGS(X_CF, SF_SUBSET); + SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); nextop = F8; GETGD; @@ -2167,7 +2187,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0xBC: INST_NAME("BSF Gd, Ed"); - SETFLAGS(X_ZF, SF_SUBSET); + SETFLAGS(X_ZF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); nextop = F8; GETED(0); @@ -2186,7 +2206,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0xBD: INST_NAME("BSR Gd, Ed"); - SETFLAGS(X_ZF, SF_SUBSET); + SETFLAGS(X_ZF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); nextop = F8; GETED(0); @@ -2245,7 +2265,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0xC0: INST_NAME("XADD Eb, Gb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETEB(x1, 0); GETGB(x2); @@ -2260,7 +2280,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0xC1: INST_NAME("XADD Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); diff --git a/src/dynarec/rv64/dynarec_rv64_64.c b/src/dynarec/rv64/dynarec_rv64_64.c index 1b8629e3..db5d074e 100644 --- a/src/dynarec/rv64/dynarec_rv64_64.c +++ b/src/dynarec/rv64/dynarec_rv64_64.c @@ -60,7 +60,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni switch(opcode) { case 0x03: INST_NAME("ADD Gd, Seg:Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); grab_segdata(dyn, addr, ninst, x4, seg); nextop = F8; GETGD; @@ -135,7 +135,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x2B: INST_NAME("SUB Gd, Seg:Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); grab_segdata(dyn, addr, ninst, x4, seg); nextop = F8; GETGD; @@ -145,7 +145,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x33: INST_NAME("XOR Gd, Seg:Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); grab_segdata(dyn, addr, ninst, x4, seg); nextop = F8; GETGD; @@ -162,7 +162,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0: // ADD INST_NAME("ADD Eb, Ib"); grab_segdata(dyn, addr, ninst, x1, seg); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEBO(x1, 1); u8 = F8; emit_add8c(dyn, ninst, x1, u8, x2, x4, x5, x6); @@ -171,7 +171,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 1: // OR INST_NAME("OR Eb, Ib"); grab_segdata(dyn, addr, ninst, x1, seg); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEBO(x1, 1); u8 = F8; emit_or8c(dyn, ninst, x1, u8, x2, x4, x5); @@ -181,7 +181,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("ADC Eb, Ib"); grab_segdata(dyn, addr, ninst, x1, seg); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEBO(x1, 1); u8 = F8; emit_adc8c(dyn, ninst, x1, u8, x2, x4, x5, x6); @@ -191,7 +191,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("SBB Eb, Ib"); grab_segdata(dyn, addr, ninst, x1, seg); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEBO(x1, 1); u8 = F8; emit_sbb8c(dyn, ninst, x1, u8, x2, x4, x5, x6); @@ -200,7 +200,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 4: // AND INST_NAME("AND Eb, Ib"); grab_segdata(dyn, addr, ninst, x1, seg); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEBO(x1, 1); u8 = F8; emit_and8c(dyn, ninst, x1, u8, x2, x4); @@ -209,7 +209,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 5: // SUB INST_NAME("SUB Eb, Ib"); grab_segdata(dyn, addr, ninst, x1, seg); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEBO(x1, 1); u8 = F8; emit_sub8c(dyn, ninst, x1, u8, x2, x4, x5, x6); @@ -218,7 +218,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 6: // XOR INST_NAME("XOR Eb, Ib"); grab_segdata(dyn, addr, ninst, x1, seg); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEBO(x1, 1); u8 = F8; emit_xor8c(dyn, ninst, x1, u8, x2, x4); @@ -227,7 +227,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 7: // CMP INST_NAME("CMP Eb, Ib"); grab_segdata(dyn, addr, ninst, x1, seg); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEBO(x1, 1); u8 = F8; if(u8) { @@ -248,7 +248,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni switch((nextop>>3)&7) { case 0: // ADD if(opcode==0x81) {INST_NAME("ADD Ed, Id");} else {INST_NAME("ADD Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEDO(x6, (opcode==0x81)?4:1, x5); if(opcode==0x81) i64 = F32S; else i64 = F8S; emit_add32c(dyn, ninst, rex, ed, i64, x3, x4, x5, x9); @@ -256,7 +256,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 1: // OR if(opcode==0x81) {INST_NAME("OR Ed, Id");} else {INST_NAME("OR Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEDO(x6, (opcode==0x81)?4:1, x5); if(opcode==0x81) i64 = F32S; else i64 = F8S; emit_or32c(dyn, ninst, rex, ed, i64, x3, x4); @@ -265,7 +265,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 2: // ADC if(opcode==0x81) {INST_NAME("ADC Ed, Id");} else {INST_NAME("ADC Ed, Ib");} READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEDO(x6, (opcode==0x81)?4:1, x5); if(opcode==0x81) i64 = F32S; else i64 = F8S; MOV64xw(x5, i64); @@ -277,7 +277,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 3: // SBB if(opcode==0x81) {INST_NAME("SBB Ed, Id");} else {INST_NAME("SBB Ed, Ib");} READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEDO(x6, (opcode==0x81)?4:1, x5); if(opcode==0x81) i64 = F32S; else i64 = F8S; MOV64xw(x5, i64); @@ -286,7 +286,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 4: // AND if(opcode==0x81) {INST_NAME("AND Ed, Id");} else {INST_NAME("AND Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEDO(x6, (opcode==0x81)?4:1, x5); if(opcode==0x81) i64 = F32S; else i64 = F8S; emit_and32c(dyn, ninst, rex, ed, i64, x3, x4); @@ -294,7 +294,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 5: // SUB if(opcode==0x81) {INST_NAME("SUB Ed, Id");} else {INST_NAME("SUB Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEDO(x6, (opcode==0x81)?4:1, x5); if(opcode==0x81) i64 = F32S; else i64 = F8S; emit_sub32c(dyn, ninst, rex, ed, i64, x3, x4, x5, x9); @@ -302,7 +302,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 6: // XOR if(opcode==0x81) {INST_NAME("XOR Ed, Id");} else {INST_NAME("XOR Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEDO(x6, (opcode==0x81)?4:1, x5); if(opcode==0x81) i64 = F32S; else i64 = F8S; emit_xor32c(dyn, ninst, rex, ed, i64, x3, x4); @@ -310,7 +310,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 7: // CMP if(opcode==0x81) {INST_NAME("CMP Ed, Id");} else {INST_NAME("CMP Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEDO(x6, (opcode==0x81)?4:1, x5); if(opcode==0x81) i64 = F32S; else i64 = F8S; if(i64) { diff --git a/src/dynarec/rv64/dynarec_rv64_66.c b/src/dynarec/rv64/dynarec_rv64_66.c index fcfae274..885d8c14 100644 --- a/src/dynarec/rv64/dynarec_rv64_66.c +++ b/src/dynarec/rv64/dynarec_rv64_66.c @@ -61,7 +61,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni switch(opcode) { case 0x01: INST_NAME("ADD Ew, Gw"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; FAST_16BIT_OPERATION(ed, gd, x1, ADD(ed, ed, x1)); GETGW(x2); @@ -71,7 +71,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x03: INST_NAME("ADD Gw, Ew"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; FAST_16BIT_OPERATION(gd, ed, x1, ADD(gd, gd, x1)); GETGW(x1); @@ -81,7 +81,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x05: INST_NAME("ADD AX, Iw"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i32 = F16; ZEXTH(x1 , xRAX); MOV32w(x2, i32); @@ -101,7 +101,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x09: INST_NAME("OR Ew, Gw"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; FAST_16BIT_OPERATION(ed, gd, x1, OR(ed, ed, x1)); GETGW(x2); @@ -111,7 +111,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x0B: INST_NAME("OR Gw, Ew"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; FAST_16BIT_OPERATION(gd, ed, x1, OR(gd, gd, x1)); GETGW(x1); @@ -121,7 +121,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x0D: INST_NAME("OR AX, Iw"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i32 = F16; ZEXTH(x1, xRAX); MOV32w(x2, i32); @@ -143,7 +143,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x11: INST_NAME("ADC Ew, Gw"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; FAST_16BIT_OPERATION(ed, gd, x1, { ADD(ed, ed, x1); @@ -159,7 +159,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x13: INST_NAME("ADC Gw, Ew"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; FAST_16BIT_OPERATION(gd, ed, x1, { ADD(gd, gd, x1); @@ -175,7 +175,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x15: INST_NAME("ADC AX, Iw"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); u64 = F16; ZEXTH(x1, xRAX); MOV64x(x2, u64); @@ -185,7 +185,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x19: INST_NAME("SBB Ew, Gw"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; FAST_16BIT_OPERATION(ed, gd, x1, { SUB(ed, ed, x1); @@ -201,7 +201,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x1B: INST_NAME("SBB Gw, Ew"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; FAST_16BIT_OPERATION(gd, ed, x1, { SUB(gd, gd, x1); @@ -217,7 +217,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x1D: INST_NAME("SBB AX, Iw"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); ZEXTH(x1, xRAX); u64 = F16; MOV64x(x2, u64); @@ -237,7 +237,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x21: INST_NAME("AND Ew, Gw"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGW(x2); GETEW(x1, 0); @@ -246,7 +246,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x23: INST_NAME("AND Gw, Ew"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGW(x1); GETEW(x2, 0); @@ -255,7 +255,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x25: INST_NAME("AND AX, Iw"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i32 = F16; ZEXTH(x1, xRAX); MOV32w(x2, i32); @@ -264,7 +264,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x29: INST_NAME("SUB Ew, Gw"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; FAST_16BIT_OPERATION(ed, gd, x1, SUB(ed, ed, x1)); GETGW(x1); @@ -274,7 +274,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x2B: INST_NAME("SUB Gw, Ew"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; FAST_16BIT_OPERATION(gd, ed, x1, SUB(gd, gd, x1)); GETGW(x1); @@ -284,7 +284,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x2D: INST_NAME("SUB AX, Iw"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i32 = F16; ZEXTH(x1, xRAX); MOV32w(x2, i32); @@ -293,7 +293,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x31: INST_NAME("XOR Ew, Gw"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; if (MODREG && !dyn->insts[ninst].x64.gen_flags) { gd = xRAX + ((nextop & 0x38) >> 3) + (rex.r << 3); @@ -312,7 +312,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x33: INST_NAME("XOR Gw, Ew"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; if (MODREG && !dyn->insts[ninst].x64.gen_flags) { gd = xRAX + ((nextop & 0x38) >> 3) + (rex.r << 3); @@ -331,7 +331,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x35: INST_NAME("XOR AX, Iw"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i32 = F16; ZEXTH(x1, xRAX); MOV32w(x2, i32); @@ -340,7 +340,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x39: INST_NAME("CMP Ew, Gw"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGW(x2); GETEW(x1, 0); @@ -348,7 +348,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x3B: INST_NAME("CMP Gw, Ew"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGW(x1); GETEW(x2, 0); @@ -356,7 +356,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x3D: INST_NAME("CMP AX, Iw"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i32 = F16; ZEXTH(x1, xRAX); if(i32) { @@ -375,7 +375,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x46: case 0x47: INST_NAME("INC Reg16 (32bits)"); - SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING); + SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING, NAT_FLAGS_FUSION); gd = xRAX + (opcode&7); ZEXTH(x1, gd); emit_inc16(dyn, ninst, x1, x2, x3, x4, x5); @@ -390,7 +390,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x4E: case 0x4F: INST_NAME("DEC Reg16 (32bits)"); - SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING); + SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING, NAT_FLAGS_FUSION); gd = xRAX + (opcode&7); ZEXTH(x1, gd); emit_dec16(dyn, ninst, x1, x2, x3, x4, x5); @@ -437,7 +437,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("IMUL Gw,Ew,Ib"); } - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_PENDING, NAT_FLAGS_NOFUSION); nextop = F8; GETSEW(x1, (opcode==0x69)?2:1); if(opcode==0x69) i32 = F16S; else i32 = F8S; @@ -474,7 +474,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni switch((nextop>>3)&7) { case 0: // ADD if(opcode==0x81) {INST_NAME("ADD Ew, Iw");} else {INST_NAME("ADD Ew, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEW(x1, (opcode==0x81)?2:1); if(opcode==0x81) u64 = F16; else u64 = (uint16_t)(int16_t)F8S; MOV64x(x5, u64); @@ -483,7 +483,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 1: // OR if(opcode==0x81) {INST_NAME("OR Ew, Iw");} else {INST_NAME("OR Ew, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEW(x1, (opcode==0x81)?2:1); if(opcode==0x81) u64 = F16; else u64 = (uint16_t)(int16_t)F8S; MOV64x(x5, u64); @@ -493,7 +493,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 2: // ADC if(opcode==0x81) {INST_NAME("ADC Ew, Iw");} else {INST_NAME("ADC Ew, Ib");} READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEW(x1, (opcode==0x81)?2:1); if(opcode==0x81) u64 = F16; else u64 = (uint16_t)(int16_t)F8S; MOV64x(x5, u64); @@ -503,7 +503,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 3: // SBB if(opcode==0x81) {INST_NAME("SBB Ew, Iw");} else {INST_NAME("SBB Ew, Ib");} READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEW(x1, (opcode==0x81)?2:1); if(opcode==0x81) u64 = F16; else u64 = (uint16_t)(int16_t)F8S; MOV64x(x5, u64); @@ -512,7 +512,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 4: // AND if(opcode==0x81) {INST_NAME("AND Ew, Iw");} else {INST_NAME("AND Ew, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEW(x1, (opcode==0x81)?2:1); if(opcode==0x81) u64 = F16; else u64 = (uint16_t)(int16_t)F8S; MOV64x(x5, u64); @@ -521,7 +521,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 5: // SUB if(opcode==0x81) {INST_NAME("SUB Ew, Iw");} else {INST_NAME("SUB Ew, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEW(x1, (opcode==0x81)?2:1); if(opcode==0x81) u64 = F16; else u64 = (uint16_t)(int16_t)F8S; MOV64x(x5, u64); @@ -530,7 +530,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 6: // XOR if(opcode==0x81) {INST_NAME("XOR Ew, Iw");} else {INST_NAME("XOR Ew, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEW(x1, (opcode==0x81)?2:1); if(opcode==0x81) u64 = F16; else u64 = (uint16_t)(int16_t)F8S; MOV64x(x5, u64); @@ -539,7 +539,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 7: // CMP if(opcode==0x81) {INST_NAME("CMP Ew, Iw");} else {INST_NAME("CMP Ew, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEW(x1, (opcode==0x81)?2:1); if(opcode==0x81) u64 = F16; else u64 = (uint16_t)(int16_t)F8S; if(u64) { @@ -554,7 +554,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x85: INST_NAME("TEST Ew, Gw"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETEW(x1, 0); GETGW(x2); @@ -701,7 +701,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x9D: INST_NAME("POPF"); - SETFLAGS(X_ALL, SF_SET); + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); POP1_16(x1); FLAGS_ADJUST_FROM11(x1, x1, x2); LUI(x2, 0xffff0); @@ -806,7 +806,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 2: if (rep == 1) { INST_NAME("REPNZ CMPSW"); } else { INST_NAME("REPZ CMPSW"); } MAYSETFLAGS(); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); CBZ_NEXT(xRCX); ANDI(x1, xFlags, 1 << F_DF); BNEZ_MARK2(x1); @@ -832,7 +832,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; default: INST_NAME("CMPSW"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETDIR(x3, x1, 2); LHU(x1, xRSI, 0); LHU(x2, xRDI, 0); @@ -844,7 +844,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0xA9: INST_NAME("TEST AX,Iw"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); u16 = F16; MOV32w(x2, u16); SLLIW(x1, xRAX, 16); @@ -893,7 +893,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 2: if(rep==1) {INST_NAME("REPNZ SCASW");} else {INST_NAME("REPZ SCASW");} MAYSETFLAGS(); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); CBZ_NEXT(xRCX); GETDIR(x3, x1, rex.w?8:2); if (rex.w) { @@ -919,7 +919,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; default: INST_NAME("SCASW"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETDIR(x3, x1, rex.w?8:2); if (rex.w) { LD(x2, xRDI, 0); @@ -956,7 +956,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0: INST_NAME("ROL Ew, Ib"); MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_OF|X_CF, SF_SET_DF); + SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); GETEW(x1, 1); u8 = F8; MOV32w(x2, u8); @@ -966,7 +966,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 1: INST_NAME("ROR Ew, Ib"); MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_OF|X_CF, SF_SET_DF); + SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); GETEW(x1, 1); u8 = F8; MOV32w(x2, u8); @@ -977,7 +977,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("RCL Ew, Ib"); MESSAGE(LOG_DUMP, "Need Optimization\n"); READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET_DF); + SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); GETEW(x1, 1); u8 = F8; MOV32w(x2, u8); @@ -988,7 +988,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("RCR Ew, Ib"); MESSAGE(LOG_DUMP, "Need Optimization\n"); READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET_DF); + SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); GETEW(x1, 1); u8 = F8; MOV32w(x2, u8); @@ -999,7 +999,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 6: INST_NAME("SHL Ew, Ib"); if (geted_ib(dyn, addr, ninst, nextop) & 0x1f) { - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined GETEW(x1, 0); u8 = (F8)&0x1f; emit_shl16c(dyn, ninst, x1, u8, x5, x4, x6); @@ -1012,7 +1012,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 5: INST_NAME("SHR Ew, Ib"); if (geted_ib(dyn, addr, ninst, nextop) & 0x1f) { - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined GETEW(x1, 0); u8 = (F8)&0x1f; emit_shr16c(dyn, ninst, x1, u8, x5, x4, x6); @@ -1025,7 +1025,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 7: INST_NAME("SAR Ew, Ib"); if (geted_ib(dyn, addr, ninst, nextop) & 0x1f) { - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined GETSEW(x1, 0); u8 = (F8)&0x1f; emit_sar16c(dyn, ninst, x1, u8, x5, x4, x6); @@ -1067,7 +1067,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ANDI(x2, xRCX, 0x1f); } MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_OF|X_CF, SF_SET_DF); + SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); GETEW(x1, 1); CALL_(rol16, x1, x3); EWBACK; @@ -1081,7 +1081,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ANDI(x2, xRCX, 0x1f); } MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_OF|X_CF, SF_SET_DF); + SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); GETEW(x1, 1); CALL_(ror16, x1, x3); EWBACK; @@ -1096,7 +1096,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } MESSAGE("LOG_DUMP", "Need optimization\n"); READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET_DF); + SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); GETEW(x1, 1); CALL_(rcl16, x1, x3); EWBACK; @@ -1111,7 +1111,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } MESSAGE("LOG_DUMP", "Need optimization\n"); READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET_DF); + SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); GETEW(x1, 1); CALL_(rcr16, x1, x3); EWBACK; @@ -1125,7 +1125,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ANDI(x2, xRCX, 0x1f); BEQ_NEXT(x2, xZR); } - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined if(box64_dynarec_safeflags>1) MAYSETFLAGS(); GETEW(x1, 0); @@ -1142,7 +1142,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ANDI(x2, xRCX, 0x1f); BEQ_NEXT(x2, xZR); } - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined if(box64_dynarec_safeflags>1) MAYSETFLAGS(); GETEW(x1, 0); @@ -1158,7 +1158,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ANDI(x2, xRCX, 0x1f); BEQ_NEXT(x2, xZR); } - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined if(box64_dynarec_safeflags>1) MAYSETFLAGS(); GETSEW(x1, 0); @@ -1179,7 +1179,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0: case 1: INST_NAME("TEST Ew, Iw"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEW(x1, 2); u16 = F16; MOV32w(x2, u16); @@ -1194,14 +1194,14 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 3: INST_NAME("NEG Ew"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEW(x1, 0); emit_neg16(dyn, ninst, ed, x2, x4, x5); EWBACK; break; case 4: INST_NAME("MUL AX, Ew"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_PENDING, NAT_FLAGS_NOFUSION); GETEW(x1, 0); ZEXTH(x2, xRAX); MULW(x1, x2, x1); @@ -1215,7 +1215,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 5: INST_NAME("IMUL AX, Ew"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_PENDING, NAT_FLAGS_NOFUSION); GETSEW(x1, 0); SLLI(x2, xRAX, 16); SRAIW(x2, x2, 16); @@ -1230,7 +1230,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 6: INST_NAME("DIV Ew"); - SETFLAGS(X_ALL, SF_SET); + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); SET_DFNONE(); GETEW(x1, 0); ZEXTH(x2, xRAX); @@ -1255,7 +1255,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 7: INST_NAME("IDIV Ew"); NOTEST(x1); - SETFLAGS(X_ALL, SF_SET); + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); SET_DFNONE(); GETSEW(x1, 0); if(box64_dynarec_div0) { @@ -1287,14 +1287,14 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni switch((nextop>>3)&7) { case 0: INST_NAME("INC Ew"); - SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING); + SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING, NAT_FLAGS_FUSION); GETEW(x1, 0); emit_inc16(dyn, ninst, x1, x2, x4, x5, x6); EWBACK; break; case 1: INST_NAME("DEC Ew"); - SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING); + SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING, NAT_FLAGS_FUSION); GETEW(x1, 0); emit_dec16(dyn, ninst, x1, x2, x4, x5, x6); EWBACK; diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c index c9b40dc2..27021b3f 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f.c +++ b/src/dynarec/rv64/dynarec_rv64_660f.c @@ -238,7 +238,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int } else { INST_NAME("UCOMISD Gx, Ex"); } - SETFLAGS(X_ALL, SF_SET); + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); SET_DFNONE(); nextop = F8; GETGXSD(d0); @@ -276,28 +276,30 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x3A: // these are some more SSSE3+ opcodes addr = dynarec64_660F38(dyn, addr, opcode, ip, ninst, rex, ok, need_epilog); break; - #define GO(GETFLAGS, NO, YES, F) \ - READFLAGS(F); \ - GETFLAGS; \ - nextop = F8; \ - GETGD; \ - if (MODREG) { \ - ed = xRAX + (nextop & 7) + (rex.b << 3); \ - ZEXTH(x4, ed); \ - ed = x4; \ - } else { \ - SMREAD(); \ - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x4, &fixedaddress, rex, NULL, 1, 0); \ - LHU(x4, ed, fixedaddress); \ - ed = x4; \ - } \ - B##NO(x1, 4 + 3 * 4); \ - LUI(x3, 0xffff0); \ - AND(gd, gd, x3); \ - OR(gd, gd, ed); + +#define GO(GETFLAGS, NO, YES, NATNO, NATYES, F) \ + READFLAGS(F); \ + GETFLAGS; \ + nextop = F8; \ + GETGD; \ + if (MODREG) { \ + ed = xRAX + (nextop & 7) + (rex.b << 3); \ + ZEXTH(x4, ed); \ + ed = x4; \ + } else { \ + SMREAD(); \ + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x4, &fixedaddress, rex, NULL, 1, 0); \ + LHU(x4, ed, fixedaddress); \ + ed = x4; \ + } \ + B##NO(x1, 4 + 3 * 4); \ + LUI(x3, 0xffff0); \ + AND(gd, gd, x3); \ + OR(gd, gd, ed); GOCOND(0x40, "CMOV", "Gw, Ew"); - #undef GO +#undef GO + case 0x50: INST_NAME("PMOVMSKD Gd, Ex"); nextop = F8; @@ -1158,7 +1160,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0xA3: INST_NAME("BT Ew, Gw"); - SETFLAGS(X_CF, SF_SUBSET); + SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); nextop = F8; GETEW(x1, 0); @@ -1171,7 +1173,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0xA4: nextop = F8; INST_NAME("SHLD Ew, Gw, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEW(x1, 0); GETGW(x2); u8 = F8; @@ -1180,7 +1182,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0xAB: INST_NAME("BTS Ew, Gw"); - SETFLAGS(X_CF, SF_SUBSET); + SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); nextop = F8; GETEW(x1, 0); @@ -1197,7 +1199,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0xAC: nextop = F8; INST_NAME("SHRD Ew, Gw, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEW(x1, 0); GETGW(x2); u8 = F8; @@ -1206,7 +1208,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0xAF: INST_NAME("IMUL Gw,Ew"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_PENDING, NAT_FLAGS_NOFUSION); nextop = F8; GETSEW(x1, 0); GETSGW(x2); @@ -1218,7 +1220,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0xB3: INST_NAME("BTR Ew, Gw"); - SETFLAGS(X_CF, SF_SUBSET); + SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); nextop = F8; GETEW(x1, 0); @@ -1265,7 +1267,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int switch ((nextop >> 3) & 7) { case 4: INST_NAME("BT Ew, Ib"); - SETFLAGS(X_CF, SF_SUBSET); + SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); GETED(1); u8 = F8; @@ -1276,7 +1278,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 5: INST_NAME("BTS Ew, Ib"); - SETFLAGS(X_CF, SF_SUBSET); + SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); GETEW(x1, 1); u8 = F8; @@ -1300,7 +1302,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 6: INST_NAME("BTR Ed, Ib"); - SETFLAGS(X_CF, SF_SUBSET); + SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); GETEW(x1, 1); u8 = F8; @@ -1324,7 +1326,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 7: INST_NAME("BTC Ew, Ib"); - SETFLAGS(X_CF, SF_SUBSET); + SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); GETEW(x1, 1); u8 = F8; @@ -1346,7 +1348,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0xBB: INST_NAME("BTC Ew, Gw"); - SETFLAGS(X_CF, SF_SUBSET); + SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); nextop = F8; GETGD; @@ -1387,7 +1389,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0xBC: INST_NAME("BSF Gw, Ew"); - SETFLAGS(X_ZF, SF_SUBSET); + SETFLAGS(X_ZF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); nextop = F8; GETEW(x5, 0); @@ -1403,7 +1405,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0xBD: INST_NAME("BSR Gw, Ew"); - SETFLAGS(X_ZF, SF_SUBSET); + SETFLAGS(X_ZF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); nextop = F8; GETEW(x5, 0); @@ -1447,7 +1449,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0xC1: INST_NAME("XADD Ew, Gw"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGW(x1); GETEW(x2, 0); diff --git a/src/dynarec/rv64/dynarec_rv64_660f38.c b/src/dynarec/rv64/dynarec_rv64_660f38.c index 15464d72..5ac15061 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f38.c +++ b/src/dynarec/rv64/dynarec_rv64_660f38.c @@ -341,7 +341,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, case 0x17: INST_NAME("PTEST Gx, Ex"); nextop = F8; - SETFLAGS(X_ALL, SF_SET); + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); GETGX(); GETEX(x2, 0, 8); CLEAR_FLAGS(); @@ -733,7 +733,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, break; case 0x61: INST_NAME("PCMPESTRI Gx, Ex, Ib"); - SETFLAGS(X_ALL, SF_SET_DF); + SETFLAGS(X_ALL, SF_SET_DF, NAT_FLAGS_NOFUSION); nextop = F8; GETG; sse_reflect_reg(dyn, ninst, x6, gd); @@ -1222,7 +1222,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, break; case 0x63: INST_NAME("PCMPISTRI Gx, Ex, Ib"); - SETFLAGS(X_ALL, SF_SET_DF); + SETFLAGS(X_ALL, SF_SET_DF, NAT_FLAGS_NOFUSION); nextop = F8; GETG; sse_reflect_reg(dyn, ninst, x6, gd); diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c index a07c2d25..294009d0 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c @@ -421,7 +421,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x17: INST_NAME("PTEST Gx, Ex"); nextop = F8; - SETFLAGS(X_ALL, SF_SET); + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); GETGX_vector(q0, 0, VECTOR_SEW64); GETEX_vector(q1, 0, 0, VECTOR_SEW64); diff --git a/src/dynarec/rv64/dynarec_rv64_66f0.c b/src/dynarec/rv64/dynarec_rv64_66f0.c index 17cd0c00..64fad3c0 100644 --- a/src/dynarec/rv64/dynarec_rv64_66f0.c +++ b/src/dynarec/rv64/dynarec_rv64_66f0.c @@ -49,72 +49,7 @@ uintptr_t dynarec64_66F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETREX(); - switch(opcode) { - case 0x11: - INST_NAME("LOCK ADC Ew, Gw"); - READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGW(x1); - SMDMB(); - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - MOV32w(x6, 0xFFFF); - AND(x2, ed, x6); - emit_adc16(dyn, ninst, x1, x2, x3, x4, x5); - if (rv64_zbb) { - ANDN(ed, ed, x6); - } else { - NOT(x6, x6); - AND(ed, ed, x6); - } - OR(ed, ed, x2); - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, LOCK_LOCK, 0, 0); - ANDI(x3, wback, 0b10); - MOV32w(x4, 0xffff); // x4 = mask - BNEZ_MARK(x3); - // lower 16bits - MARKLOCK; - LR_W(x5, wback, 1, 1); - AND(x6, x5, x4); // x6 = Ed.h[0] - SRLIW(x5, x5, 16); - SLLIW(x5, x5, 16); // x5 = clear Ed.h[0] - ADDW(x2, x1, x6); // x2 = Gw + Ew - ANDI(x9, xFlags, 1 << F_CF); - ADDW(x2, x2, x9); // x2 = adc - AND(x2, x2, x4); - OR(x5, x5, x2); - SC_W(x2, x5, wback, 1, 1); - BNEZ_MARKLOCK(x2); - B_MARK3_nocond; - MARK; - // upper 16bits - SLLIW(x3, x4, 16); // x3 = mask - SUBI(wback, wback, 2); // aligning address - SLLI(x1, x1, 16); // x1 = extended Gw - MARK2; - LR_W(x6, wback, 1, 1); // x6 = Ed - AND(x5, x6, x3); // x5 = Ed.h[1] << 16 - ADDW(x5,x5, x1); - ANDI(x9, xFlags, 1 << F_CF); - SLLIW(x9, x9, 16); - ADDW(x5, x5, x9); // x5 = adc << 16 - AND(x9, x6, x4); // x9 = Ed.h[0] - OR(x5, x5, x9); - SC_W(x9, x5, wback, 1, 1); - BNEZ_MARK2(x9); - IFX(X_ALL|X_PEND) { - SRLIW(x6, x6, 16); - SRLIW(x1, x1, 16); - } - MARK3; - IFX(X_ALL|X_PEND) { - emit_adc16(dyn, ninst, x1, x6, x3, x4, x5); - } - } - SMDMB(); - break; + switch (opcode) { case 0x81: case 0x83: nextop = F8; @@ -126,7 +61,7 @@ uintptr_t dynarec64_66F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int } else { INST_NAME("LOCK ADD Ew, Ib"); } - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); if(MODREG) { if(opcode==0x81) u64 = F16; else u64 = (uint16_t)(int16_t)F8S; ed = xRAX+(nextop&7)+(rex.b<<3); @@ -155,7 +90,7 @@ uintptr_t dynarec64_66F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int OR(x4, x4, x3); SC_W(x3, x4, wback, 1, 1); BNEZ_MARKLOCK(x3); - IFX(X_ALL|X_PEND) { + IFXORNAT (X_ALL | X_PEND) { SLLIW(x1, x1, 16); SRLIW(x1, x1, 16); } @@ -177,7 +112,7 @@ uintptr_t dynarec64_66F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int MARK3; // final - IFX(X_ALL|X_PEND) { + IFXORNAT (X_ALL | X_PEND) { emit_add16(dyn, ninst, x1, x5, x3, x4, x6); } } diff --git a/src/dynarec/rv64/dynarec_rv64_67.c b/src/dynarec/rv64/dynarec_rv64_67.c index e9c54901..ef3daed9 100644 --- a/src/dynarec/rv64/dynarec_rv64_67.c +++ b/src/dynarec/rv64/dynarec_rv64_67.c @@ -64,7 +64,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x01: INST_NAME("ADD Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED32(0); @@ -73,7 +73,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x02: INST_NAME("ADD Gb, Eb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; FAST_8BIT_OPERATION(gb, wb, x1, ADD(gb, gb, x1)); GETEB32(x2, 0); @@ -83,7 +83,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x03: INST_NAME("ADD Gd, Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED32(0); @@ -92,14 +92,14 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x05: INST_NAME("ADD EAX, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i64 = F32S; emit_add32c(dyn, ninst, rex, xRAX, i64, x3, x4, x5, x6); break; case 0x09: INST_NAME("OR Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED32(0); @@ -108,7 +108,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x0A: INST_NAME("OR Gb, Eb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; FAST_8BIT_OPERATION(gb, wb, x1, OR(gb, gb, x1)); GETEB32(x2, 0); @@ -118,7 +118,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x0B: INST_NAME("OR Gd, Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED32(0); @@ -127,7 +127,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x0D: INST_NAME("OR EAX, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i64 = F32S; emit_or32c(dyn, ninst, rex, xRAX, i64, x3, x4); break; @@ -141,7 +141,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni switch (rep) { case 0: if(opcode==0x2F) {INST_NAME("COMISS Gx, Ex");} else {INST_NAME("UCOMISS Gx, Ex");} - SETFLAGS(X_ALL, SF_SET); + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); nextop = F8; GETGXSS(s0); if(MODREG) { @@ -228,7 +228,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x11: INST_NAME("ADC Ed, Gd"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED32(0); @@ -239,7 +239,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x13: INST_NAME("ADC Gd, Ed"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED32(0); @@ -249,7 +249,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x15: INST_NAME("ADC EAX, Id"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i64 = F32S; MOV64xw(x1, i64); emit_adc32(dyn, ninst, rex, xRAX, x1, x3, x4, x5, x6); @@ -258,7 +258,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x19: INST_NAME("SBB Ed, Gd"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED32(0); @@ -268,7 +268,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x1A: INST_NAME("SBB Gb, Eb"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; FAST_8BIT_OPERATION(gb, wb, x1, { SUB(gb, gb, x1); @@ -284,7 +284,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x1B: INST_NAME("SBB Gd, Ed"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED32(0); @@ -294,7 +294,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x1D: INST_NAME("SBB EAX, Id"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i64 = F32S; MOV64xw(x2, i64); emit_sbb32(dyn, ninst, rex, xRAX, x2, x3, x4, x5); @@ -302,7 +302,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x21: INST_NAME("AND Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED32(0); @@ -311,7 +311,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x22: INST_NAME("AND Gb, Eb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETEB32(x2, 0); GETGB(x1); @@ -320,7 +320,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x23: INST_NAME("AND Gd, Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED32(0); @@ -329,14 +329,14 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x25: INST_NAME("AND EAX, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i64 = F32S; emit_and32c(dyn, ninst, rex, xRAX, i64, x3, x4); break; case 0x29: INST_NAME("SUB Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED32(0); @@ -345,7 +345,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x2A: INST_NAME("SUB Gb, Eb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; FAST_8BIT_OPERATION(gb, wb, x1, SUB(gb, gb, x1)); GETEB32(x2, 0); @@ -355,7 +355,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x2B: INST_NAME("SUB Gd, Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED32(0); @@ -364,14 +364,14 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x2D: INST_NAME("SUB EAX, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i64 = F32S; emit_sub32c(dyn, ninst, rex, xRAX, i64, x3, x4, x5, x6); break; case 0x31: INST_NAME("XOR Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED32(0); @@ -380,7 +380,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x32: INST_NAME("XOR Gb, Eb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; FAST_8BIT_OPERATION(gb, wb, x1, XOR(gb, gb, x1)); GETEB32(x2, 0); @@ -390,7 +390,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x33: INST_NAME("XOR Gd, Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED32(0); @@ -399,14 +399,14 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x35: INST_NAME("XOR EAX, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i64 = F32S; emit_xor32c(dyn, ninst, rex, xRAX, i64, x3, x4); break; case 0x38: INST_NAME("CMP Eb, Gb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETEB32(x1, 0); GETGB(x2); @@ -414,7 +414,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x39: INST_NAME("CMP Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED32(0); @@ -422,7 +422,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x3A: INST_NAME("CMP Gb, Eb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETEB32(x2, 0); GETGB(x1); @@ -430,7 +430,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x3B: INST_NAME("CMP Gd, Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED32(0); @@ -438,7 +438,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x3C: INST_NAME("CMP AL, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); u8 = F8; ANDI(x1, xRAX, 0xff); if(u8) { @@ -450,7 +450,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x3D: INST_NAME("CMP EAX, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i64 = F32S; if(i64) { MOV64xw(x2, i64); @@ -513,7 +513,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni switch((nextop>>3)&7) { case 0: //ADD if(opcode==0x81) {INST_NAME("ADD Ed, Id");} else {INST_NAME("ADD Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETED32((opcode==0x81)?4:1); if(opcode==0x81) i64 = F32S; else i64 = F8S; emit_add32c(dyn, ninst, rex, ed, i64, x3, x4, x5, x6); @@ -521,7 +521,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 1: //OR if(opcode==0x81) {INST_NAME("OR Ed, Id");} else {INST_NAME("OR Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETED32((opcode==0x81)?4:1); if(opcode==0x81) i64 = F32S; else i64 = F8S; emit_or32c(dyn, ninst, rex, ed, i64, x3, x4); @@ -530,7 +530,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 2: //ADC if(opcode==0x81) {INST_NAME("ADC Ed, Id");} else {INST_NAME("ADC Ed, Ib");} READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETED32((opcode==0x81)?4:1); if(opcode==0x81) i64 = F32S; else i64 = F8S; MOV64xw(x5, i64); @@ -540,7 +540,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 3: //SBB if(opcode==0x81) {INST_NAME("SBB Ed, Id");} else {INST_NAME("SBB Ed, Ib");} READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETED32((opcode==0x81)?4:1); if(opcode==0x81) i64 = F32S; else i64 = F8S; MOV64xw(x5, i64); @@ -549,7 +549,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 4: //AND if(opcode==0x81) {INST_NAME("AND Ed, Id");} else {INST_NAME("AND Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETED32((opcode==0x81)?4:1); if(opcode==0x81) i64 = F32S; else i64 = F8S; emit_and32c(dyn, ninst, rex, ed, i64, x3, x4); @@ -557,7 +557,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 5: //SUB if(opcode==0x81) {INST_NAME("SUB Ed, Id");} else {INST_NAME("SUB Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETED32((opcode==0x81)?4:1); if(opcode==0x81) i64 = F32S; else i64 = F8S; emit_sub32c(dyn, ninst, rex, ed, i64, x3, x4, x5, x6); @@ -565,7 +565,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 6: //XOR if(opcode==0x81) {INST_NAME("XOR Ed, Id");} else {INST_NAME("XOR Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETED32((opcode==0x81)?4:1); if(opcode==0x81) i64 = F32S; else i64 = F8S; emit_xor32c(dyn, ninst, rex, ed, i64, x3, x4); @@ -573,7 +573,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 7: //CMP if(opcode==0x81) {INST_NAME("CMP Ed, Id");} else {INST_NAME("CMP Ed, Ib");} - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETED32((opcode==0x81)?4:1); if(opcode==0x81) i64 = F32S; else i64 = F8S; if(i64) { @@ -670,7 +670,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni u8 = geted_ib(dyn, addr, ninst, nextop) & (rex.w ? 0x3f : 0x1f); // flags are not affected if count is 0, we make it a nop if possible. if (u8) { - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined GETED32(1); F8; emit_shr32c(dyn, ninst, rex, ed, u8, x3, x4); @@ -731,6 +731,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0xE0: INST_NAME("LOOPNZ (32bits)"); READFLAGS(X_ZF); + i8 = F8S; SUBI(xRCX, xRCX, 1); ANDI(x1, xFlags, 1 << F_ZF); @@ -771,7 +772,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni switch ((nextop >> 3) & 7) { case 4: INST_NAME("MUL EAX, Ed"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_PENDING, NAT_FLAGS_NOFUSION); GETED32(0); if (rex.w) { if (ed == xRDX) diff --git a/src/dynarec/rv64/dynarec_rv64_db.c b/src/dynarec/rv64/dynarec_rv64_db.c index 4e10342a..ed14eb40 100644 --- a/src/dynarec/rv64/dynarec_rv64_db.c +++ b/src/dynarec/rv64/dynarec_rv64_db.c @@ -149,7 +149,7 @@ uintptr_t dynarec64_DB(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0xEE: case 0xEF: INST_NAME("FUCOMI ST0, STx"); - SETFLAGS(X_ALL, SF_SET); + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); if (ST_IS_F(0)) { @@ -168,7 +168,7 @@ uintptr_t dynarec64_DB(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0xF6: case 0xF7: INST_NAME("FCOMI ST0, STx"); - SETFLAGS(X_ALL, SF_SET); + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); if (ST_IS_F(0)) { diff --git a/src/dynarec/rv64/dynarec_rv64_df.c b/src/dynarec/rv64/dynarec_rv64_df.c index 48cf9ac7..f1b19cf4 100644 --- a/src/dynarec/rv64/dynarec_rv64_df.c +++ b/src/dynarec/rv64/dynarec_rv64_df.c @@ -68,7 +68,7 @@ uintptr_t dynarec64_DF(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("FCOMIP ST0, STx"); } - SETFLAGS(X_ALL, SF_SET); + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); SET_DFNONE(); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); diff --git a/src/dynarec/rv64/dynarec_rv64_emit_logic.c b/src/dynarec/rv64/dynarec_rv64_emit_logic.c index 86b63a91..5cbd6eca 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_logic.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_logic.c @@ -27,11 +27,14 @@ void emit_xor8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) CLEAR_FLAGS(); IFX(X_PEND) { SET_DF(s4, d_xor8); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } + XOR(s1, s1, s2); - ANDI(s1, s1, 0xff); + + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_SF) { SRLI(s3, s1, 7); SET_FLAGS_NEZ(s3, F_SF, s4); @@ -53,11 +56,13 @@ void emit_xor8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s CLEAR_FLAGS(); IFX(X_PEND) { SET_DF(s4, d_xor8); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } - XORI(s1, s1, c&0xff); - ANDI(s1, s1, 0xff); + XORI(s1, s1, c & 0xff); + + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_SF) { SRLI(s3, s1, 7); SET_FLAGS_NEZ(s3, F_SF, s4); @@ -79,20 +84,21 @@ void emit_xor32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s CLEAR_FLAGS(); IFX(X_PEND) { SET_DF(s4, rex.w?d_xor64:d_xor32); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } XOR(s1, s1, s2); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + // test sign bit before zeroup. IFX(X_SF) { if (!rex.w) SEXT_W(s1, s1); SET_FLAGS_LTZ(s1, F_SF, s3, s4); } - if (!rex.w && s1!=s2) { - ZEROUP(s1); - } + + if (!rex.w && s1 != s2) ZEROUP(s1); IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); @@ -112,25 +118,25 @@ void emit_xor32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i CLEAR_FLAGS(); IFX(X_PEND) { SET_DF(s4, rex.w?d_xor64:d_xor32); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } - if(c>=-2048 && c<=2047) { + if (c >= -2048 && c <= 2047) { XORI(s1, s1, c); } else { MOV64xw(s3, c); XOR(s1, s1, s3); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + // test sign bit before zeroup. IFX(X_SF) { if (!rex.w) SEXT_W(s1, s1); SET_FLAGS_LTZ(s1, F_SF, s3, s4); } - if (!rex.w) { - ZEROUP(s1); - } + if (!rex.w) ZEROUP(s1); IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); @@ -150,16 +156,18 @@ void emit_xor16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, CLEAR_FLAGS(); IFX(X_PEND) { SET_DF(s4, d_xor16); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } XOR(s1, s1, s2); - ZEXTH(s1, s1); + + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX(X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, res)); } + IFX(X_ZF | X_SF) { IFX(X_ZF) { SET_FLAGS_EQZ(s1, F_ZF, s4); @@ -175,16 +183,19 @@ void emit_xor16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, } // emit OR16 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch, s4 can be same as s2 (and so s2 destroyed) -void emit_or16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) { +void emit_or16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) +{ CLEAR_FLAGS(); IFX(X_PEND) { SET_DF(s3, d_or16); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } OR(s1, s1, s2); - ZEXTH(s1, s1); + + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_PEND) { SD(s1, xEmu, offsetof(x64emu_t, res)); } @@ -208,13 +219,14 @@ void emit_or32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 CLEAR_FLAGS(); IFX(X_PEND) { SET_DF(s4, rex.w?d_or64:d_or32); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } OR(s1, s1, s2); - // test sign bit before zeroup. + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_SF) { if (!rex.w) SEXT_W(s1, s1); SET_FLAGS_LTZ(s1, F_SF, s3, s4); @@ -234,24 +246,26 @@ void emit_or32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 emit_pf(dyn, ninst, s1, s3, s4); } } + // emit OR32 instruction, from s1, c, store result in s1 using s3 and s4 as scratch void emit_or32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4) { CLEAR_FLAGS(); IFX(X_PEND) { SET_DF(s4, rex.w?d_or64:d_or32); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } - if(c>=-2048 && c<=2047) { + if (c >= -2048 && c <= 2047) { ORI(s1, s1, c); } else { MOV64xw(s3, c); OR(s1, s1, s3); } - // test sign bit before zeroup. + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_SF) { if (!rex.w) SEXT_W(s1, s1); SET_FLAGS_LTZ(s1, F_SF, s3, s4); @@ -276,16 +290,17 @@ void emit_or32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in // emit AND8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch, s4 can be same as s2 (and so s2 destroyed) void emit_and8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) { - CLEAR_FLAGS(); IFX(X_PEND) { SET_DF(s3, d_and8); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } AND(s1, s1, s2); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_PEND) { SB(s1, xEmu, offsetof(x64emu_t, res)); } @@ -308,11 +323,13 @@ void emit_and8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s CLEAR_FLAGS(); IFX(X_PEND) { SET_DF(s3, d_and8); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } - ANDI(s1, s1, c&0xff); + ANDI(s1, s1, c & 0xff); + + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX(X_PEND) { SD(s1, xEmu, offsetof(x64emu_t, res)); @@ -329,16 +346,19 @@ void emit_and8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s } } -void emit_and16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) { +void emit_and16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) +{ CLEAR_FLAGS(); IFX(X_PEND) { SET_DF(s3, d_tst16); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } AND(s1, s1, s2); // res = s1 & s2 + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, res)); } @@ -360,13 +380,15 @@ void emit_and32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s CLEAR_FLAGS(); IFX(X_PEND) { SET_DF(s3, rex.w?d_tst64:d_tst32); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } AND(s1, s1, s2); // res = s1 & s2 if (!rex.w) ZEROUP(s1); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } @@ -388,11 +410,11 @@ void emit_and32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i CLEAR_FLAGS(); IFX(X_PEND) { SET_DF(s3, rex.w?d_tst64:d_tst32); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } - if(c>=-2048 && c<=2047) { + if (c >= -2048 && c <= 2047) { ANDI(s1, s1, c); } else { MOV64xw(s3, c); @@ -400,6 +422,8 @@ void emit_and32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i } if (!rex.w && (c > 2047 || c < 0)) ZEROUP(s1); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } @@ -421,12 +445,14 @@ void emit_or8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) CLEAR_FLAGS(); IFX(X_PEND) { SET_DF(s3, d_or8); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } OR(s1, s1, s2); - + + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_PEND) { SB(s1, xEmu, offsetof(x64emu_t, res)); } diff --git a/src/dynarec/rv64/dynarec_rv64_emit_math.c b/src/dynarec/rv64/dynarec_rv64_emit_math.c index 6528483d..2b2ab7a8 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_math.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_math.c @@ -29,7 +29,7 @@ void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SDxw(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, rex.w?d_add64:d_add32b); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } IFX(X_CF) { @@ -55,11 +55,15 @@ void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s } SET_FLAGS_NEZ(s5, F_CF, s4); } - IFX(X_AF | X_OF) { - OR(s3, s1, s2); // s3 = op1 | op2 - AND(s4, s1, s2); // s4 = op1 & op2 + IFX (X_AF | X_OF) { + OR(s3, s1, s2); // s3 = op1 | op2 + AND(s4, s1, s2); // s4 = op1 & op2 } + ADDxw(s1, s1, s2); + + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } @@ -100,7 +104,6 @@ void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s // emit ADD32 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5) { - CLEAR_FLAGS(); if (s1 == xRSP && (!dyn->insts || dyn->insts[ninst].x64.gen_flags == X_PEND)) { // special case when doing math on ESP and only PEND is needed: ignoring it! if (c >= -2048 && c < 2048) { @@ -114,6 +117,7 @@ void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i } return; } + CLEAR_FLAGS(); IFX(X_PEND | X_AF | X_CF | X_OF) { MOV64xw(s2, c); } @@ -121,7 +125,7 @@ void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SDxw(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, rex.w?d_add64:d_add32b); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } IFX(X_CF) { @@ -155,10 +159,15 @@ void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i if(c >= -2048 && c < 2048) { ADDIxw(s1, s1, c); } else { - IFX(X_PEND | X_AF | X_CF | X_OF) {} else {MOV64xw(s2, c);} + IFX (X_PEND | X_AF | X_CF | X_OF) { + } else { + MOV64xw(s2, c); + } ADDxw(s1, s1, s2); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } @@ -204,7 +213,7 @@ void emit_add16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SH(s1, xEmu, offsetof(x64emu_t, op1)); SH(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, d_add16); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } IFX(X_AF | X_OF) { @@ -213,6 +222,8 @@ void emit_add16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, } ADD(s1, s1, s2); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_PEND) { SW(s1, xEmu, offsetof(x64emu_t, res)); } @@ -264,7 +275,7 @@ void emit_add8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i SB(s1, xEmu, offsetof(x64emu_t, op1)); SB(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, d_add8); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } IFX(X_AF | X_OF) { @@ -273,6 +284,8 @@ void emit_add8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i } ADD(s1, s1, s2); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_AF|X_OF) { if(rv64_zbb) { ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) @@ -322,7 +335,7 @@ void emit_add8c(dynarec_rv64_t* dyn, int ninst, int s1, int c, int s2, int s3, i SB(s1, xEmu, offsetof(x64emu_t, op1)); SB(s4, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, d_add8); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } IFX(X_AF | X_OF) { @@ -332,6 +345,8 @@ void emit_add8c(dynarec_rv64_t* dyn, int ninst, int s1, int c, int s2, int s3, i } ADDI(s1, s1, c); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_AF|X_OF) { if(rv64_zbb) { ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) @@ -380,7 +395,7 @@ void emit_sub8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i SB(s1, xEmu, offsetof(x64emu_t, op1)); SB(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, d_sub8); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -391,6 +406,9 @@ void emit_sub8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i SUB(s1, s1, s2); ANDI(s1, s1, 0xff); + + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_SF) { SRLI(s3, s1, 7); SET_FLAGS_NEZ(s3, F_SF, s4); @@ -422,7 +440,7 @@ void emit_sub16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SH(s1, xEmu, offsetof(x64emu_t, op1)); SH(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, d_sub16); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -432,6 +450,8 @@ void emit_sub16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, } SUBW(s1, s1, s2); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, res)); } @@ -459,7 +479,7 @@ void emit_sub32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SDxw(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, rex.w?d_sub64:d_sub32); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -469,6 +489,8 @@ void emit_sub32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s } SUBxw(s1, s1, s2); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } @@ -491,7 +513,6 @@ void emit_sub32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s // emit SUB32 instruction, from s1, constant c, store result in s1 using s2, s3, s4 and s5 as scratch void emit_sub32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5) { - CLEAR_FLAGS(); if (s1 == xRSP && (!dyn->insts || dyn->insts[ninst].x64.gen_flags == X_PEND)) { // special case when doing math on RSP and only PEND is needed: ignoring it! if (c > -2048 && c <= 2048) { @@ -505,13 +526,13 @@ void emit_sub32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i } return; } - + CLEAR_FLAGS(); IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, op1)); MOV64xw(s2, c); SDxw(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, rex.w?d_sub64:d_sub32); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -526,6 +547,7 @@ void emit_sub32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i IFX(X_PEND) {} else {MOV64xw(s2, c);} SUBxw(s1, s1, s2); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX(X_AF | X_CF | X_OF) { IFX(X_PEND) {} @@ -554,13 +576,13 @@ void emit_sub32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i // emit INC8 instruction, from s1, store result in s1 using s2, s3 and s4 as scratch void emit_inc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) { - IFX(X_ALL) { + IFXORNAT (X_ALL) { ANDI(xFlags, xFlags, ~((1UL<<F_AF) | (1UL<<F_OF2) | (1UL<<F_ZF) | (1UL<<F_SF) | (1UL<<F_PF))); } IFX(X_PEND) { SB(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s3, d_inc8); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } IFX(X_AF | X_OF) { @@ -569,6 +591,7 @@ void emit_inc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) } ADDIW(s1, s1, 1); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX(X_PEND) { SB(s1, xEmu, offsetof(x64emu_t, res)); @@ -610,13 +633,13 @@ void emit_inc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) // emit DEC8 instruction, from s1, store result in s1 using s2, s3 and s4 as scratch void emit_dec8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) { - IFX(X_ALL) { + IFXORNAT (X_ALL) { ANDI(xFlags, xFlags, ~((1UL<<F_AF) | (1UL<<F_OF2) | (1UL<<F_ZF) | (1UL<<F_SF) | (1UL<<F_PF))); } IFX(X_PEND) { SB(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s3, d_dec8); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } IFX(X_AF | X_OF) { @@ -626,6 +649,7 @@ void emit_dec8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) } ADDIW(s1, s1, -1); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX(X_PEND) { SB(s1, xEmu, offsetof(x64emu_t, res)); @@ -661,13 +685,13 @@ void emit_dec8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) // emit INC32 instruction, from s1, store result in s1 using s3 and s4 as scratch void emit_inc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5) { - IFX(X_ALL) { + IFXORNAT (X_ALL) { ANDI(xFlags, xFlags, ~((1UL<<F_AF) | (1UL<<F_OF2) | (1UL<<F_ZF) | (1UL<<F_SF) | (1UL<<F_PF))); } IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s3, rex.w?d_inc64:d_inc32); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } IFX(X_AF | X_OF) { @@ -676,6 +700,7 @@ void emit_inc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s } ADDIxw(s1, s1, 1); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); @@ -717,13 +742,13 @@ void emit_inc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s // emit DEC32 instruction, from s1, store result in s1 using s3 and s4 as scratch void emit_dec32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5) { - IFX(X_ALL) { + IFXORNAT (X_ALL) { ANDI(xFlags, xFlags, ~((1UL<<F_AF) | (1UL<<F_OF2) | (1UL<<F_ZF) | (1UL<<F_SF) | (1UL<<F_PF))); } IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s3, rex.w?d_dec64:d_dec32); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } IFX(X_AF | X_OF) { @@ -733,6 +758,7 @@ void emit_dec32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s } ADDIxw(s1, s1, -1); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); @@ -769,13 +795,13 @@ void emit_dec32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s // emit INC16 instruction, from s1, store result in s1 using s3 and s4 as scratch void emit_inc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) { - IFX(X_ALL) { + IFXORNAT (X_ALL) { ANDI(xFlags, xFlags, ~((1UL<<F_AF) | (1UL<<F_OF2) | (1UL<<F_ZF) | (1UL<<F_SF) | (1UL<<F_PF))); } IFX(X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s3, d_inc16); - } else IFX(X_ZF|X_OF|X_AF|X_SF|X_PF) { + } else IFXORNAT (X_ZF | X_OF | X_AF | X_SF | X_PF) { SET_DFNONE(); } IFX(X_AF | X_OF) { @@ -784,6 +810,7 @@ void emit_inc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, } ADDI(s1, s1, 1); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX(X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, res)); @@ -826,13 +853,13 @@ void emit_inc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, // emit DEC16 instruction, from s1, store result in s1 using s3 and s4 as scratch void emit_dec16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) { - IFX(X_ALL) { + IFXORNAT (X_ALL) { ANDI(xFlags, xFlags, ~((1UL<<F_AF) | (1UL<<F_OF2) | (1UL<<F_ZF) | (1UL<<F_SF) | (1UL<<F_PF))); } IFX(X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s3, d_dec16); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } IFX(X_AF | X_OF) { @@ -843,6 +870,8 @@ void emit_dec16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, ADDIW(s1, s1, -1); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, res)); } @@ -881,7 +910,7 @@ void emit_sbb8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i SB(s1, xEmu, offsetof(x64emu_t, op1)); SB(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, d_sbb8); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -895,6 +924,8 @@ void emit_sbb8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i SUBW(s1, s1, s3); ANDI(s1, s1, 0xff); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + CLEAR_FLAGS(); IFX(X_PEND) { SB(s1, xEmu, offsetof(x64emu_t, res)); @@ -914,12 +945,13 @@ void emit_sbb8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i } // emit ADC8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch -void emit_adc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) { +void emit_adc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) +{ IFX(X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, op1)); SH(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, d_adc8); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } IFX(X_AF | X_OF) { @@ -931,6 +963,8 @@ void emit_adc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i ANDI(s3, xFlags, 1 << F_CF); ADD(s1, s1, s3); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + CLEAR_FLAGS(); IFX(X_PEND) { SW(s1, xEmu, offsetof(x64emu_t, res)); @@ -994,7 +1028,7 @@ void emit_sbb16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SH(s1, xEmu, offsetof(x64emu_t, op1)); SH(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, d_sbb16); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -1007,6 +1041,8 @@ void emit_sbb16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, ANDI(s3, xFlags, 1 << F_CF); SUBW(s1, s1, s3); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + CLEAR_FLAGS(); SLLIW(s1, s1, 16); IFX(X_SF) { @@ -1034,7 +1070,7 @@ void emit_sbb32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SDxw(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, rex.w?d_sbb64:d_sbb32); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -1047,6 +1083,8 @@ void emit_sbb32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s ANDI(s3, xFlags, 1 << F_CF); SUBxw(s1, s1, s3); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + CLEAR_FLAGS(); IFX(X_SF) { SET_FLAGS_LTZ(s1, F_SF, s4, s5); @@ -1075,7 +1113,7 @@ void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s3, rex.w?d_neg64:d_neg32); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } IFX(X_AF | X_OF) { @@ -1083,6 +1121,8 @@ void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s } NEGxw(s1, s1); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } @@ -1128,7 +1168,7 @@ void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) IFX(X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s3, d_neg16); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } IFX(X_AF | X_OF) { @@ -1137,6 +1177,8 @@ void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) NEG(s1, s1); ZEXTH(s1, s1); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, res)); } @@ -1181,7 +1223,7 @@ void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) IFX(X_PEND) { SB(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s3, d_neg8); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } IFX(X_AF | X_OF) { @@ -1190,6 +1232,8 @@ void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) NEG(s1, s1); ANDI(s1, s1, 0xff); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_PEND) { SB(s1, xEmu, offsetof(x64emu_t, res)); } @@ -1233,7 +1277,7 @@ void emit_adc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SH(s1, xEmu, offsetof(x64emu_t, op1)); SH(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, d_adc16); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } IFX(X_AF | X_OF) { @@ -1245,6 +1289,8 @@ void emit_adc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, ANDI(s3, xFlags, 1 << F_CF); ADD(s1, s1, s3); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + CLEAR_FLAGS(); IFX(X_PEND) { SW(s1, xEmu, offsetof(x64emu_t, res)); @@ -1295,7 +1341,7 @@ void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SDxw(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, rex.w?d_adc64:d_adc32b); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } IFX(X_CF) { @@ -1328,6 +1374,8 @@ void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s ANDI(s3, xFlags, 1 << F_CF); ADDxw(s1, s1, s3); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + CLEAR_FLAGS(); IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); diff --git a/src/dynarec/rv64/dynarec_rv64_emit_shift.c b/src/dynarec/rv64/dynarec_rv64_emit_shift.c index 7bf4a7dd..3c4b26b4 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_shift.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_shift.c @@ -33,7 +33,7 @@ void emit_shl8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int SB(s3, xEmu, offsetof(x64emu_t, op2)); SB(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s4, d_shl8); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -51,6 +51,7 @@ void emit_shl8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int SET_FLAGS_LTZ(s1, F_SF, s3, s4); } SRLI(s1, s1, 56); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX(X_PEND) { SB(s1, xEmu, offsetof(x64emu_t, res)); @@ -78,6 +79,8 @@ void emit_shl8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int } } MV(s1, xZR); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(xZR, xZR); + IFX(X_PEND) { SB(s1, xEmu, offsetof(x64emu_t, res)); } @@ -106,7 +109,7 @@ void emit_shr8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int SB(s3, xEmu, offsetof(x64emu_t, op2)); SB(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s4, d_shr8); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } IFX(X_CF) { @@ -132,6 +135,8 @@ void emit_shr8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int SRLI(s1, s1, c); ANDI(s1, s1, 0xff); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + // SF should be unset IFX(X_PEND) { SB(s1, xEmu, offsetof(x64emu_t, res)); @@ -155,7 +160,7 @@ void emit_sar8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int SB(s3, xEmu, offsetof(x64emu_t, op2)); SB(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s4, d_sar8); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } IFX(X_CF) { @@ -178,6 +183,8 @@ void emit_sar8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int SRLI(s1, s1, c); ANDI(s1, s1, 0xff); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_PEND) { SB(s1, xEmu, offsetof(x64emu_t, res)); } @@ -198,7 +205,7 @@ void emit_shl8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i SB(s1, xEmu, offsetof(x64emu_t, op1)); SB(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s4, d_shl8); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -218,6 +225,8 @@ void emit_shl8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i } SRLI(s1, s1, 56); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_PEND) { SB(s1, xEmu, offsetof(x64emu_t, res)); } @@ -249,7 +258,7 @@ void emit_shr8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i SB(s2, xEmu, offsetof(x64emu_t, op2)); SB(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s4, d_shr8); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -272,6 +281,8 @@ void emit_shr8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i SRL(s1, s1, s2); ANDI(s1, s1, 0xff); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + // SF should be unset IFX(X_PEND) { SB(s1, xEmu, offsetof(x64emu_t, res)); @@ -295,7 +306,7 @@ void emit_sar8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i SB(s2, xEmu, offsetof(x64emu_t, op2)); SB(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s4, d_sar8); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } IFX(X_CF) { @@ -314,6 +325,8 @@ void emit_sar8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i SRL(s1, s1, s2); ANDI(s1, s1, 0xff); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_PEND) { SB(s1, xEmu, offsetof(x64emu_t, res)); } @@ -337,7 +350,7 @@ void emit_shl16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int SH(s3, xEmu, offsetof(x64emu_t, op2)); SH(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s4, d_shl16); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -356,6 +369,8 @@ void emit_shl16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int } SRLI(s1, s1, 48); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, res)); } @@ -382,6 +397,8 @@ void emit_shl16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int } } MV(s1, xZR); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(xZR, xZR); + IFX(X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, res)); } @@ -410,7 +427,7 @@ void emit_shr16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int SH(s3, xEmu, offsetof(x64emu_t, op2)); SH(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s4, d_shr16); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } IFX(X_CF) { @@ -434,6 +451,9 @@ void emit_shr16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int } SRLI(s1, s1, c); + + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + // SF should be unset IFX(X_PEND) { @@ -458,7 +478,7 @@ void emit_sar16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int SH(s3, xEmu, offsetof(x64emu_t, op2)); SH(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s4, d_sar16); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } IFX(X_CF) { @@ -481,6 +501,8 @@ void emit_sar16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int SRLI(s1, s1, c); ZEXTH(s1, s1); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, res)); } @@ -502,7 +524,7 @@ void emit_shl16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SH(s1, xEmu, offsetof(x64emu_t, op1)); SH(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s4, d_shl16); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -522,6 +544,8 @@ void emit_shl16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, } SRLI(s1, s1, 48); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, res)); } @@ -553,7 +577,7 @@ void emit_shr16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SH(s2, xEmu, offsetof(x64emu_t, op2)); SH(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s4, d_shr16); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -576,6 +600,8 @@ void emit_shr16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SRL(s1, s1, s2); ZEXTH(s1, s1); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + // SF should be unset IFX(X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, res)); @@ -599,7 +625,7 @@ void emit_sar16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SH(s2, xEmu, offsetof(x64emu_t, op2)); SH(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s4, d_sar16); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } IFX(X_CF) { @@ -618,6 +644,8 @@ void emit_sar16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SRL(s1, s1, s2); ZEXTH(s1, s1); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, res)); } @@ -638,7 +666,7 @@ void emit_shl32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SDxw(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s4, rex.w?d_shl64:d_shl32); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -658,6 +686,8 @@ void emit_shl32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SLLW(s1, s1, s2); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_SF) { SET_FLAGS_LTZ(s1, F_SF, s3, s4); } @@ -697,7 +727,7 @@ void emit_shl32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, SDxw(s3, xEmu, offsetof(x64emu_t, op2)); SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s4, rex.w?d_shl64:d_shl32); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -715,6 +745,8 @@ void emit_shl32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, SLLIW(s1, s1, c); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_SF) { SET_FLAGS_LTZ(s1, F_SF, s3, s4); } @@ -752,7 +784,7 @@ void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SDxw(s2, xEmu, offsetof(x64emu_t, op2)); SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s4, rex.w?d_shr64:d_shr32); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -774,6 +806,8 @@ void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SRL(s1, s1, s2); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_SF) { SET_FLAGS_LTZ(s1, F_SF, s3, s4); } @@ -808,7 +842,7 @@ void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, } else SDxw(xZR, xEmu, offsetof(x64emu_t, op2)); SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s4, rex.w?d_shr64:d_shr32); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } IFX(X_CF) { @@ -837,6 +871,8 @@ void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, SRLIW(s1, s1, c); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_SF) { SET_FLAGS_LTZ(s1, F_SF, s3, s4); } @@ -871,7 +907,7 @@ void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, } else SDxw(xZR, xEmu, offsetof(x64emu_t, op2)); SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s4, rex.w?d_sar64:d_sar32); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } IFX(X_CF) { @@ -893,6 +929,8 @@ void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, SRAIW(s1, s1, c); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + // SRAIW sign-extends, so test sign bit before clearing upper bits IFX(X_SF) { SET_FLAGS_LTZ(s1, F_SF, s3, s4); @@ -923,7 +961,7 @@ void emit_rol32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX(X_PEND) { SDxw(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s4, rex.w?d_rol64:d_rol32); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -941,6 +979,8 @@ void emit_rol32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SRLxw(s1, s1, s4); OR(s1, s3, s1); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } @@ -975,7 +1015,7 @@ void emit_ror32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX(X_PEND) { SDxw(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s4, rex.w?d_ror64:d_ror32); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -993,6 +1033,8 @@ void emit_ror32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SLLxw(s1, s1, s4); OR(s1, s3, s1); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } @@ -1033,7 +1075,7 @@ void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, MOV32w(s3, c); SDxw(s3, xEmu, offsetof(x64emu_t, op2)); SET_DF(s4, rex.w?d_rol64:d_rol32); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } if(rv64_zbb) { @@ -1048,6 +1090,8 @@ void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, if (!rex.w) ZEROUP(s1); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } @@ -1082,7 +1126,7 @@ void emit_ror32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, MOV32w(s3, c); SDxw(s3, xEmu, offsetof(x64emu_t, op2)); SET_DF(s4, rex.w?d_ror64:d_ror32); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } if(rv64_zbb) { @@ -1097,6 +1141,8 @@ void emit_ror32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, if (!rex.w) ZEROUP(s1); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } @@ -1130,7 +1176,7 @@ void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin } else SDxw(xZR, xEmu, offsetof(x64emu_t, op2)); SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s4, rex.w?d_shrd64:d_shrd32); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } if(!c) { @@ -1158,6 +1204,8 @@ void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin SLLIxw(s1, s2, (rex.w?64:32)-c); OR(s1, s1, s3); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + if (!rex.w) { ZEROUP(s1); } @@ -1197,7 +1245,7 @@ void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin } else SH(xZR, xEmu, offsetof(x64emu_t, op2)); SH(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s4, d_shrd16); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } if(!c) { @@ -1234,6 +1282,8 @@ void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin OR(s1, s1, s5); } ZEXTH(s1, s1); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, res)); } @@ -1270,7 +1320,7 @@ void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin } else SDxw(xZR, xEmu, offsetof(x64emu_t, op2)); SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s4, rex.w?d_shld64:d_shld32); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -1302,6 +1352,8 @@ void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin if (!rex.w) { ZEROUP(s1); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } @@ -1335,7 +1387,7 @@ void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SDxw(s5, xEmu, offsetof(x64emu_t, op2)); SET_DF(s4, rex.w ? d_shrd64 : d_shrd32); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } IFX(X_CF) { @@ -1354,6 +1406,8 @@ void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int SLLxw(s4, s2, s4); OR(s1, s4, s3); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } @@ -1389,7 +1443,7 @@ void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SDxw(s5, xEmu, offsetof(x64emu_t, op2)); SET_DF(s4, rex.w?d_shld64:d_shld32); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -1414,6 +1468,8 @@ void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int if (!rex.w) { ZEROUP(s1); } + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_OF) { ADDI(s5, s5, -1); BNEZ_MARK(s5); @@ -1446,7 +1502,7 @@ void emit_shld16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin } else SH(xZR, xEmu, offsetof(x64emu_t, op2)); SH(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s4, d_shld16); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -1483,6 +1539,8 @@ void emit_shld16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin OR(s1, s1, s3); } ZEXTH(s1, s1); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX(X_PEND) { SH(s1, xEmu, offsetof(x64emu_t, res)); } diff --git a/src/dynarec/rv64/dynarec_rv64_emit_tests.c b/src/dynarec/rv64/dynarec_rv64_emit_tests.c index 116c602d..4053c15d 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_tests.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_tests.c @@ -38,16 +38,19 @@ void emit_cmp8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i NOT(s5, s1); } - // It's a cmp, we can't store the result back to s1. - SUB(s6, s1, s2); - ANDI(s6, s6, 0xff); IFX_PENDOR0 { + SUB(s6, s1, s2); SB(s6, xEmu, offsetof(x64emu_t, res)); + } else IFX (X_ALL) { + SUB(s6, s1, s2); } + IFX (X_ALL) ANDI(s6, s6, 0xff); + IFX(X_SF) { SRLI(s3, s6, 7); SET_FLAGS_NEZ(s3, F_SF, s4); } + CALC_SUB_FLAGS(s5, s2, s6, s3, s4, 8); IFX(X_ZF) { SET_FLAGS_EQZ(s6, F_ZF, s4); @@ -55,6 +58,20 @@ void emit_cmp8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i IFX(X_PF) { emit_pf(dyn, ninst, s6, s3, s4); } + + NAT_FLAGS_ENABLE_CARRY(); + NAT_FLAGS_ENABLE_SIGN(); + if (dyn->insts[ninst].nat_flags_fusion) { + if (dyn->insts[ninst].nat_flags_needsign) { + SLLI(s3, s1, 56); + SRAI(s3, s3, 56); + SLLI(s4, s2, 56); + SRAI(s4, s4, 56); + NAT_FLAGS_OPS(s3, s4); + } else { + NAT_FLAGS_OPS(s1, s2); + } + } } // emit CMP8 instruction, from cmp s1 , 0, using s3 and s4 as scratch @@ -80,6 +97,17 @@ void emit_cmp8_0(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4) IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + NAT_FLAGS_ENABLE_CARRY(); + NAT_FLAGS_ENABLE_SIGN(); + if (dyn->insts[ninst].nat_flags_fusion) { + if (dyn->insts[ninst].nat_flags_needsign) { + SLLI(s3, s1, 56); + SRAI(s3, s3, 56); + NAT_FLAGS_OPS(s3, xZR); + } else { + NAT_FLAGS_OPS(s1, xZR); + } + } } // emit CMP16 instruction, from cmp s1, s2, using s3 and s4 as scratch @@ -99,18 +127,19 @@ void emit_cmp16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, NOT(s5, s1); } - // It's a cmp, we can't store the result back to s1. - SUB(s6, s1, s2); - IFX(X_ALL) { - ZEXTH(s6, s6); - } IFX_PENDOR0 { + SUB(s6, s1, s2); SH(s6, xEmu, offsetof(x64emu_t, res)); + } else IFX (X_ALL) { + SUB(s6, s1, s2); } + IFX (X_ALL) ZEXTH(s6, s6); + IFX(X_SF) { SRLI(s3, s6, 15); SET_FLAGS_NEZ(s3, F_SF, s4); } + CALC_SUB_FLAGS(s5, s2, s6, s3, s4, 16); IFX(X_ZF) { SET_FLAGS_EQZ(s6, F_ZF, s4); @@ -118,6 +147,20 @@ void emit_cmp16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, IFX(X_PF) { emit_pf(dyn, ninst, s6, s3, s4); } + + NAT_FLAGS_ENABLE_CARRY(); + NAT_FLAGS_ENABLE_SIGN(); + if (dyn->insts[ninst].nat_flags_fusion) { + if (dyn->insts[ninst].nat_flags_needsign) { + SLLI(s3, s1, 48); + SRAI(s3, s3, 48); + SLLI(s4, s2, 48); + SRAI(s4, s4, 48); + NAT_FLAGS_OPS(s3, s4); + } else { + NAT_FLAGS_OPS(s1, s2); + } + } } // emit CMP16 instruction, from cmp s1 , #0, using s3 and s4 as scratch @@ -132,7 +175,6 @@ void emit_cmp16_0(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4) } else { SET_DFNONE(); } - IFX(X_SF) { SRLI(s3, s1, 15); SET_FLAGS_NEZ(s3, F_SF, s4); @@ -143,6 +185,18 @@ void emit_cmp16_0(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4) IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + + NAT_FLAGS_ENABLE_CARRY(); + NAT_FLAGS_ENABLE_SIGN(); + if (dyn->insts[ninst].nat_flags_fusion) { + if (dyn->insts[ninst].nat_flags_needsign) { + SLLI(s3, s1, 48); + SRAI(s3, s3, 48); + NAT_FLAGS_OPS(s3, xZR); + } else { + NAT_FLAGS_OPS(s1, xZR); + } + } } // emit CMP32 instruction, from cmp s1, s2, using s3 and s4 as scratch @@ -162,16 +216,18 @@ void emit_cmp32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s NOT(s5, s1); } - // It's a cmp, we can't store the result back to s1. - SUBxw(s6, s1, s2); IFX_PENDOR0 { + SUBxw(s6, s1, s2); SDxw(s6, xEmu, offsetof(x64emu_t, res)); + } else IFX (X_ALL) { + SUBxw(s6, s1, s2); } + IFX(X_SF) { SET_FLAGS_LTZ(s6, F_SF, s3, s4); } - if (!rex.w) { - ZEROUP(s6); + IFX (X_AF | X_CF | X_OF | X_ZF | X_PF) { + if (!rex.w) ZEROUP(s6); } CALC_SUB_FLAGS(s5, s2, s6, s3, s4, rex.w?64:32); IFX(X_ZF) { @@ -180,6 +236,22 @@ void emit_cmp32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX(X_PF) { emit_pf(dyn, ninst, s6, s3, s4); } + NAT_FLAGS_ENABLE_CARRY(); + NAT_FLAGS_ENABLE_SIGN(); + if (dyn->insts[ninst].nat_flags_fusion) { + if (rex.w) + NAT_FLAGS_OPS(s1, s2); + else { + if (dyn->insts[ninst].nat_flags_needsign) { + SEXT_W(s3, s1); + SEXT_W(s4, s2); + } else { + AND(s3, s1, xMASK); + AND(s4, s2, xMASK); + } + NAT_FLAGS_OPS(s3, s4); + } + } } // emit CMP32 instruction, from cmp s1, 0, using s3 and s4 as scratch @@ -194,6 +266,7 @@ void emit_cmp32_0(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s3, int } else { SET_DFNONE(); } + IFX(X_SF) { if (rex.w) { SET_FLAGS_LTZ(s1, F_SF, s3, s4); @@ -208,10 +281,25 @@ void emit_cmp32_0(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s3, int IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + NAT_FLAGS_ENABLE_CARRY(); + NAT_FLAGS_ENABLE_SIGN(); + if (dyn->insts[ninst].nat_flags_fusion) { + if (rex.w) + NAT_FLAGS_OPS(s1, xZR); + else { + if (dyn->insts[ninst].nat_flags_needsign) { + SEXT_W(s3, s1); + } else { + AND(s3, s1, xMASK); + } + NAT_FLAGS_OPS(s3, xZR); + } + } } // emit TEST8 instruction, from test s1, s2, using s3, s4 and s5 as scratch -void emit_test8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) { +void emit_test8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) +{ CLEAR_FLAGS(); IFX_PENDOR0 { SET_DF(s3, d_tst8); @@ -224,6 +312,9 @@ void emit_test8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, IFX_PENDOR0 { SD(s3, xEmu, offsetof(x64emu_t, res)); } + + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s3, xZR); + IFX(X_SF) { SRLI(s4, s3, 7); SET_FLAGS_NEZ(s4, F_SF, s5); @@ -234,6 +325,12 @@ void emit_test8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, IFX(X_PF) { emit_pf(dyn, ninst, s3, s4, s5); } + + NAT_FLAGS_ENABLE_SIGN(); + if (dyn->insts[ninst].nat_flags_fusion && dyn->insts[ninst].nat_flags_needsign) { + SLLI(s3, s3, 56); + SRAI(s3, s3, 56); + } } // emit TEST16 instruction, from test s1, s2, using s3, s4 and s5 as scratch @@ -251,6 +348,9 @@ void emit_test16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, IFX_PENDOR0 { SH(s3, xEmu, offsetof(x64emu_t, res)); } + + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s3, xZR); + IFX(X_SF) { SRLI(s4, s3, 15); SET_FLAGS_NEZ(s4, F_SF, s5); @@ -261,6 +361,12 @@ void emit_test16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, IFX(X_PF) { emit_pf(dyn, ninst, s3, s4, s5); } + + NAT_FLAGS_ENABLE_SIGN(); + if (dyn->insts[ninst].nat_flags_fusion && dyn->insts[ninst].nat_flags_needsign) { + SLLI(s3, s3, 48); + SRAI(s3, s3, 48); + } } // emit TEST32 instruction, from test s1, s2, using s3 and s4 as scratch @@ -278,9 +384,15 @@ void emit_test32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int IFX_PENDOR0 { SDxw(s3, xEmu, offsetof(x64emu_t, res)); } - IFX(X_SF|X_ZF) { + + IFX (X_SF | X_ZF) { + if (!rex.w) ZEROUP(s3); + } else if (dyn->insts[ninst].nat_flags_fusion) { if (!rex.w) ZEROUP(s3); } + + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s3, xZR); + IFX(X_SF) { SRLI(s4, s3, rex.w ? 63 : 31); SET_FLAGS_NEZ(s4, F_SF, s5); @@ -291,6 +403,14 @@ void emit_test32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int IFX(X_PF) { emit_pf(dyn, ninst, s3, s4, s5); } + + NAT_FLAGS_ENABLE_SIGN(); + if (dyn->insts[ninst].nat_flags_fusion && dyn->insts[ninst].nat_flags_needsign) { + if (!rex.w) { + SLLI(s3, s3, 32); + SRAI(s3, s3, 32); + } + } } // emit TEST32 instruction, from test s1, s2, using s3 and s4 as scratch @@ -303,10 +423,10 @@ void emit_test32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, SET_DFNONE(); } - if(c>=-2048 && c<=2047) { + if (c >= -2048 && c <= 2047) { ANDI(s3, s1, c); - IFX(X_SF|X_ZF) { - if (!rex.w && c<0) ZEROUP(s3); + IFX (X_SF | X_ZF) { + if (!rex.w && c < 0) ZEROUP(s3); } } else { MOV64xw(s3, c); @@ -316,6 +436,9 @@ void emit_test32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, IFX_PENDOR0 { SDxw(s3, xEmu, offsetof(x64emu_t, res)); } + + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s3, xZR); + IFX(X_SF) { SRLI(s4, s3, rex.w ? 63 : 31); SET_FLAGS_NEZ(s4, F_SF, s5); @@ -326,4 +449,12 @@ void emit_test32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, IFX(X_PF) { emit_pf(dyn, ninst, s3, s4, s5); } + + NAT_FLAGS_ENABLE_SIGN(); + if (dyn->insts[ninst].nat_flags_fusion && dyn->insts[ninst].nat_flags_needsign) { + if (!rex.w) { + SLLI(s3, s3, 32); + SRAI(s3, s3, 32); + } + } } diff --git a/src/dynarec/rv64/dynarec_rv64_f0.c b/src/dynarec/rv64/dynarec_rv64_f0.c index f99d8632..6b54d9ce 100644 --- a/src/dynarec/rv64/dynarec_rv64_f0.c +++ b/src/dynarec/rv64/dynarec_rv64_f0.c @@ -55,7 +55,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni switch(opcode) { case 0x01: INST_NAME("LOCK ADD Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; SMDMB(); @@ -65,7 +65,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0); AMOADDxw(x1, gd, wback, 1, 1); - IFX(X_ALL|X_PEND) { + IFXORNAT (X_ALL | X_PEND) { emit_add32(dyn, ninst, rex, x1, gd, x3, x4, x5); } } @@ -73,7 +73,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x09: INST_NAME("LOCK OR Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; SMDMB(); @@ -83,7 +83,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0); AMOORxw(x1, gd, wback, 1, 1); - IFX(X_ALL|X_PEND) + IFXORNAT (X_ALL | X_PEND) emit_or32(dyn, ninst, rex, x1, gd, x3, x4); } SMDMB(); @@ -96,7 +96,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni switch(rep) { case 0: INST_NAME("LOCK CMPXCHG Eb, Gb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_NOFUSION); nextop = F8; ANDI(x6, xRAX, 0xff); // AL SMDMB(); @@ -127,7 +127,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { ANDI(wback, x2, 0xff); } - GETGB(x1); + GETGB(x5); MV(ed, gd); MARK2; ANDI(xRAX, xRAX, ~0xff); @@ -185,7 +185,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni switch (rep) { case 0: INST_NAME("LOCK CMPXCHG Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_NOFUSION); nextop = F8; GETGD; if (MODREG) { @@ -238,7 +238,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni switch(rep) { case 0: INST_NAME("LOCK XADD Gd, Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; SMDMB(); @@ -251,7 +251,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0); AMOADDxw(x1, gd, wback, 1, 1); - IFX(X_ALL|X_PEND) { + IFXORNAT (X_ALL | X_PEND) { MVxw(x2, x1); emit_add32(dyn, ninst, rex, x2, gd, x3, x4, x5); } @@ -275,7 +275,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("LOCK CMPXCHG8B Gq, Eq"); } - SETFLAGS(X_ZF, SF_SUBSET); + SETFLAGS(X_ZF, SF_SUBSET, NAT_FLAGS_NOFUSION); nextop = F8; addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, LOCK_LOCK, 0, 0); ANDI(xFlags, xFlags, ~(1<<F_ZF)); @@ -342,7 +342,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x10: INST_NAME("LOCK ADC Eb, Gb"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGB(x2); SMDMB(); @@ -358,12 +358,12 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SRLIW(x1, wback, wb2*8); ANDI(x1, x1, 0xFF); emit_adc8(dyn, ninst, x1, x2, x3, x4, x5); - SLLI(x1, x1, wb2*8); + SLLI(x5, x1, wb2 * 8); MOV_U12(x3, 0xFF); SLLI(x3, x3, wb2*8); NOT(x3, x3); AND(wback, wback, x3); - OR(wback, wback, x1); + OR(wback, wback, x5); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x1, x3, &fixedaddress, rex, LOCK_LOCK, 0, 0); ANDI(x3, wback, 0b11); @@ -398,12 +398,12 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni OR(x5, x5, x6); SC_W(x4, x5, wback, 1, 1); BNEZ_MARK2(x4); - IFX(X_ALL|X_PEND) { + IFXORNAT (X_ALL | X_PEND) { SRLI(x2, x2, x3); // Gb SRLI(x4, x9, x3); // Eb } MARK3; - IFX(X_ALL|X_PEND) { + IFXORNAT (X_ALL | X_PEND) { emit_adc8(dyn, ninst, x4, x2, x3, x5, x6); } } @@ -412,7 +412,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x11: INST_NAME("LOCK ADC Ed, Gd"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; SMDMB(); @@ -428,7 +428,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ADDxw(x3, x3, x4); SCxw(x4, x3, wback, 1, 1); BNEZ_MARKLOCK(x4); - IFX(X_ALL|X_PEND) { + IFXORNAT (X_ALL | X_PEND) { emit_adc32(dyn, ninst, rex, x1, gd, x3, x4, x5, x6); } } @@ -436,7 +436,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x21: INST_NAME("LOCK AND Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; SMDMB(); @@ -446,14 +446,14 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0); AMOANDxw(x1, gd, wback, 1, 1); - IFX(X_ALL|X_PEND) + IFXORNAT (X_ALL | X_PEND) emit_and32(dyn, ninst, rex, x1, gd, x3, x4); } SMDMB(); break; case 0x29: INST_NAME("LOCK SUB Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; SMDMB(); @@ -464,7 +464,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0); SUBxw(x4, xZR, gd); AMOADDxw(x1, x4, wback, 1, 1); - IFX(X_ALL|X_PEND) + IFXORNAT (X_ALL | X_PEND) emit_sub32(dyn, ninst, rex, x1, gd, x3, x4, x5); } SMDMB(); @@ -478,7 +478,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni switch((nextop>>3)&7) { case 1: // OR INST_NAME("LOCK OR Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); if(MODREG) { GETEB(x1, 1); u8 = F8; @@ -493,7 +493,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ADDI(x1, xZR, u8); SLL(x1, x1, x2); // Ib << offset AMOORxw(x4, x1, x3, 1, 1); - IFX(X_ALL|X_PEND) { + IFXORNAT (X_ALL | X_PEND) { SRL(x1, x4, x2); ANDI(x1, x1, 0xFF); emit_or8c(dyn, ninst, x1, u8, x2, x4, x5); @@ -516,7 +516,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("LOCK ADD Ed, Ib"); } - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); if(MODREG) { if(opcode==0x81) i64 = F32S; else i64 = F8S; ed = xRAX+(nextop&7)+(rex.b<<3); @@ -545,7 +545,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni BNEZ_MARK2(x3); SDxw(x4, wback, 0); MARK; - IFX (X_ALL | X_PEND) + IFXORNAT (X_ALL | X_PEND) emit_add32c(dyn, ninst, rex, x1, i64, x3, x4, x5, x6); SMDMB(); } @@ -556,7 +556,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("LOCK OR Ed, Ib"); } - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); if(MODREG) { if(opcode==0x81) i64 = F32S; else i64 = F8S; ed = xRAX+(nextop&7)+(rex.b<<3); @@ -569,7 +569,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni i64 = F8S; MOV64xw(x4, i64); AMOORxw(x1, x4, wback, 1, 1); - IFX(X_ALL|X_PEND) + IFXORNAT (X_ALL | X_PEND) emit_or32c(dyn, ninst, rex, x1, i64, x3, x4); } break; @@ -579,7 +579,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("LOCK AND Ed, Ib"); } - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); if(MODREG) { if(opcode==0x81) i64 = F32S; else i64 = F8S; ed = xRAX+(nextop&7)+(rex.b<<3); @@ -592,7 +592,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni i64 = F8S; MOV64xw(x9, i64); AMOANDxw(x1, x9, wback, 1, 1); - IFX(X_ALL|X_PEND) + IFXORNAT (X_ALL | X_PEND) emit_and32c(dyn, ninst, rex, x1, i64, x3, x4); } break; @@ -602,7 +602,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("LOCK SUB Ed, Ib"); } - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); if(MODREG) { if(opcode==0x81) i64 = F32S; else i64 = F8S; ed = xRAX+(nextop&7)+(rex.b<<3); @@ -631,7 +631,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni BNEZ_MARK2(x3); SDxw(x4, wback, 0); MARK; - IFX (X_ALL | X_PEND) + IFXORNAT (X_ALL | X_PEND) emit_sub32c(dyn, ninst, rex, x1, i64, x3, x4, x5, x6); SMDMB(); } @@ -642,7 +642,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("LOCK XOR Ed, Ib"); } - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); if (MODREG) { if (opcode == 0x81) i64 = F32S; @@ -658,7 +658,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni i64 = F8S; MOV64xw(x9, i64); AMOXORxw(x1, x9, wback, 1, 1); - IFX(X_ALL | X_PEND) + IFXORNAT (X_ALL | X_PEND) emit_xor32c(dyn, ninst, rex, x1, i64, x3, x4); } break; @@ -729,7 +729,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni { case 0: // INC Ed INST_NAME("LOCK INC Ed"); - SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING); + SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING, NAT_FLAGS_FUSION); SMDMB(); if(MODREG) { ed = xRAX+(nextop&7)+(rex.b<<3); @@ -741,13 +741,13 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ADDIxw(x4, x1, 1); SCxw(x3, x4, wback, 1, 1); BNEZ_MARKLOCK(x3); - IFX(X_ALL|X_PEND) + IFXORNAT (X_ALL | X_PEND) emit_inc32(dyn, ninst, rex, x1, x3, x4, x5, x6); } break; case 1: // DEC Ed INST_NAME("LOCK DEC Ed"); - SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING); + SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING, NAT_FLAGS_FUSION); SMDMB(); if(MODREG) { ed = xRAX+(nextop&7)+(rex.b<<3); @@ -759,7 +759,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ADDIxw(x4, x1, -1); SCxw(x3, x4, wback, 1, 1); BNEZ_MARKLOCK(x3); - IFX(X_ALL|X_PEND) + IFXORNAT (X_ALL | X_PEND) emit_dec32(dyn, ninst, rex, x1, x3, x4, x5, x6); } break; diff --git a/src/dynarec/rv64/dynarec_rv64_f30f.c b/src/dynarec/rv64/dynarec_rv64_f30f.c index de0e21e5..42a67929 100644 --- a/src/dynarec/rv64/dynarec_rv64_f30f.c +++ b/src/dynarec/rv64/dynarec_rv64_f30f.c @@ -431,7 +431,7 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0xB8: INST_NAME("POPCNT Gd, Ed"); - SETFLAGS(X_ALL, SF_SET); + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); SET_DFNONE(); nextop = F8; GETED(0); @@ -473,7 +473,7 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0xBC: INST_NAME("TZCNT Gd, Ed"); - SETFLAGS(X_ZF, SF_SUBSET); + SETFLAGS(X_ZF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); nextop = F8; GETED(0); @@ -494,7 +494,7 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0xBD: INST_NAME("LZCNT Gd, Ed"); - SETFLAGS(X_ZF | X_CF, SF_SUBSET); + SETFLAGS(X_ZF | X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); nextop = F8; GETED(0); diff --git a/src/dynarec/rv64/dynarec_rv64_functions.c b/src/dynarec/rv64/dynarec_rv64_functions.c index 2f5839c0..863607b5 100644 --- a/src/dynarec/rv64/dynarec_rv64_functions.c +++ b/src/dynarec/rv64/dynarec_rv64_functions.c @@ -637,7 +637,7 @@ void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t r }; if(box64_dynarec_dump) { printf_x64_instruction(rex.is32bits?my_context->dec32:my_context->dec, &dyn->insts[ninst].x64, name); - dynarec_log(LOG_NONE, "%s%p: %d emitted opcodes, inst=%d, barrier=%d state=%d/%d(%d), %s=%X/%X, use=%X, need=%X/%X, sm=%d(%d/%d), sew@entry=%d, sew@exit=%d", + dynarec_log(LOG_NONE, "%s%p: %d emitted opcodes, inst=%d, barrier=%d state=%d/%d(%d), %s=%X/%X, use=%X, need=%X/%X, fuse=%d, sm=%d(%d/%d), sew@entry=%d, sew@exit=%d", (box64_dynarec_dump > 1) ? "\e[32m" : "", (void*)(dyn->native_start + dyn->insts[ninst].address), dyn->insts[ninst].size / 4, @@ -652,6 +652,7 @@ void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t r dyn->insts[ninst].x64.use_flags, dyn->insts[ninst].x64.need_before, dyn->insts[ninst].x64.need_after, + dyn->insts[ninst].nat_flags_fusion, dyn->smwrite, dyn->insts[ninst].will_write, dyn->insts[ninst].last_write, dyn->insts[ninst].vector_sew_entry, dyn->insts[ninst].vector_sew_exit); if(dyn->insts[ninst].pred_sz) { @@ -764,3 +765,21 @@ int fpu_is_st_freed(dynarec_rv64_t* dyn, int ninst, int st) { return (dyn->e.tags&(0b11<<(st*2)))?1:0; } + +void updateNativeFlags(dynarec_rv64_t* dyn) +{ + if (!box64_dynarec_nativeflags) + return; + for (int i = 1; i < dyn->size; ++i) + if (dyn->insts[i].nat_flags_fusion) { + if (dyn->insts[i].pred_sz == 1 && dyn->insts[i].pred[0] == i - 1 + && (dyn->insts[i].x64.use_flags & dyn->insts[i - 1].x64.set_flags) == dyn->insts[i].x64.use_flags) { + dyn->insts[i - 1].nat_flags_fusion = 1; + if (dyn->insts[i].x64.use_flags & X_SF) { + dyn->insts[i - 1].nat_flags_needsign = 1; + } + dyn->insts[i].x64.use_flags = 0; + } else + dyn->insts[i].nat_flags_fusion = 0; + } +} diff --git a/src/dynarec/rv64/dynarec_rv64_functions.h b/src/dynarec/rv64/dynarec_rv64_functions.h index fa618381..244aca9d 100644 --- a/src/dynarec/rv64/dynarec_rv64_functions.h +++ b/src/dynarec/rv64/dynarec_rv64_functions.h @@ -66,4 +66,6 @@ void fpu_reset_ninst(dynarec_native_t* dyn, int ninst); // is st freed int fpu_is_st_freed(dynarec_native_t* dyn, int ninst, int st); + +void updateNativeFlags(dynarec_rv64_t* dyn); #endif //__DYNAREC_RV64_FUNCTIONS_H__ diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index a8d14669..18ff8680 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -2213,6 +2213,24 @@ void fpu_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1, int s2, in fpu_reset_reg(dyn); } +int fpu_needpurgecache(dynarec_rv64_t* dyn, int ninst) +{ + // x87 + for (int i = 0; i < 8; ++i) + if (dyn->e.x87cache[i] != -1) + return 1; + // mmx + if (dyn->e.mmxcount) return 1; + // sse + for (int i = 0; i < 16; ++i) + if (dyn->e.ssecache[i].v != -1) return 1; + // avx + if (dyn->ymm_zero) + for (int i = 0; i < 16; ++i) + if (is_avx_zero(dyn, ninst, i)) return 1; + return 0; +} + static int findCacheSlot(dynarec_rv64_t* dyn, int ninst, int t, int n, extcache_t* cache) { ext_cache_t f; diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 33143127..c896f284 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -798,7 +798,8 @@ BNEZ(reg, j64); #define IFX(A) if ((dyn->insts[ninst].x64.gen_flags & (A))) -#define IFX_PENDOR0 if ((dyn->insts[ninst].x64.gen_flags & (X_PEND) || !dyn->insts[ninst].x64.gen_flags)) +#define IFXORNAT(A) if ((dyn->insts[ninst].x64.gen_flags & (A)) || dyn->insts[ninst].nat_flags_fusion) +#define IFX_PENDOR0 if ((dyn->insts[ninst].x64.gen_flags & (X_PEND) || (!dyn->insts[ninst].x64.gen_flags && !dyn->insts[ninst].nat_flags_fusion))) #define IFXX(A) if ((dyn->insts[ninst].x64.gen_flags == (A))) #define IFX2X(A, B) if ((dyn->insts[ninst].x64.gen_flags == (A) || dyn->insts[ninst].x64.gen_flags == (B) || dyn->insts[ninst].x64.gen_flags == ((A) | (B)))) #define IFXN(A, B) if ((dyn->insts[ninst].x64.gen_flags & (A) && !(dyn->insts[ninst].x64.gen_flags & (B)))) @@ -1012,17 +1013,36 @@ } #endif +#ifndef READFLAGS_FUSION +#define READFLAGS_FUSION(A, checkbarrier) READFLAGS(A) +#endif + +#define NAT_FLAGS_OPS(op1, op2) \ + do { \ + dyn->insts[ninst + 1].nat_flags_op1 = op1; \ + dyn->insts[ninst + 1].nat_flags_op2 = op2; \ + } while (0) + +#define NAT_FLAGS_ENABLE_CARRY() dyn->insts[ninst].nat_flags_carry = 1 +#define NAT_FLAGS_ENABLE_SIGN() dyn->insts[ninst].nat_flags_sign = 1 + #ifndef SETFLAGS -#define SETFLAGS(A, B) \ +#define SETFLAGS(A, B, FUSION) \ if (dyn->f.pending != SF_SET \ - && ((B)&SF_SUB) \ + && ((B) & SF_SUB) \ && (dyn->insts[ninst].x64.gen_flags & (~(A)))) \ READFLAGS(((dyn->insts[ninst].x64.gen_flags & X_PEND) ? X_ALL : dyn->insts[ninst].x64.gen_flags) & (~(A))); \ if (dyn->insts[ninst].x64.gen_flags) switch (B) { \ case SF_SUBSET: \ case SF_SET: dyn->f.pending = SF_SET; break; \ - case SF_SET_DF: dyn->f.pending = SF_SET; dyn->f.dfnone = 1; break; \ - case SF_SET_NODF: dyn->f.pending = SF_SET; dyn->f.dfnone = 0; break; \ + case SF_SET_DF: \ + dyn->f.pending = SF_SET; \ + dyn->f.dfnone = 1; \ + break; \ + case SF_SET_NODF: \ + dyn->f.pending = SF_SET; \ + dyn->f.dfnone = 0; \ + break; \ case SF_PENDING: dyn->f.pending = SF_PENDING; break; \ case SF_SUBSET_PENDING: \ case SF_SET_PENDING: \ @@ -1030,7 +1050,8 @@ break; \ } \ else \ - dyn->f.pending = SF_SET + dyn->f.pending = SF_SET; \ + dyn->insts[ninst].nat_flags_nofusion = (FUSION) #endif #ifndef JUMP #define JUMP(A, C) @@ -1331,6 +1352,7 @@ void* rv64_next(x64emu_t* emu, uintptr_t addr); #define fpu_reset_cache STEPNAME(fpu_reset_cache) #define fpu_propagate_stack STEPNAME(fpu_propagate_stack) #define fpu_purgecache STEPNAME(fpu_purgecache) +#define fpu_needpurgecache STEPNAME(fpu_needpurgecache) #define mmx_purgecache STEPNAME(mmx_purgecache) #define x87_purgecache STEPNAME(x87_purgecache) #define sse_purgecache STEPNAME(sse_purgecache) @@ -1579,6 +1601,8 @@ void fpu_reset_cache(dynarec_rv64_t* dyn, int ninst, int reset_n); void fpu_propagate_stack(dynarec_rv64_t* dyn, int ninst); // purge the FPU cache (needs 3 scratch registers) void fpu_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1, int s2, int s3); +// check if the fpu cache need to be purged +int fpu_needpurgecache(dynarec_rv64_t* dyn, int ninst); // purge MMX cache void mmx_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1); // purge x87 cache @@ -1647,86 +1671,96 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, #endif // GOCOND will use x1 and x3 -#define GOCOND(B, T1, T2) \ - case B + 0x0: \ - INST_NAME(T1 "O " T2); \ - GO(ANDI(x1, xFlags, 1 << F_OF2), EQZ, NEZ, X_OF) \ - break; \ - case B + 0x1: \ - INST_NAME(T1 "NO " T2); \ - GO(ANDI(x1, xFlags, 1 << F_OF2), NEZ, EQZ, X_OF) \ - break; \ - case B + 0x2: \ - INST_NAME(T1 "C " T2); \ - GO(ANDI(x1, xFlags, 1 << F_CF), EQZ, NEZ, X_CF) \ - break; \ - case B + 0x3: \ - INST_NAME(T1 "NC " T2); \ - GO(ANDI(x1, xFlags, 1 << F_CF), NEZ, EQZ, X_CF) \ - break; \ - case B + 0x4: \ - INST_NAME(T1 "Z " T2); \ - GO(ANDI(x1, xFlags, 1 << F_ZF), EQZ, NEZ, X_ZF) \ - break; \ - case B + 0x5: \ - INST_NAME(T1 "NZ " T2); \ - GO(ANDI(x1, xFlags, 1 << F_ZF), NEZ, EQZ, X_ZF) \ - break; \ - case B + 0x6: \ - INST_NAME(T1 "BE " T2); \ - GO(ANDI(x1, xFlags, (1 << F_CF) | (1 << F_ZF)), EQZ, NEZ, X_CF | X_ZF) \ - break; \ - case B + 0x7: \ - INST_NAME(T1 "NBE " T2); \ - GO(ANDI(x1, xFlags, (1 << F_CF) | (1 << F_ZF)), NEZ, EQZ, X_CF | X_ZF) \ - break; \ - case B + 0x8: \ - INST_NAME(T1 "S " T2); \ - GO(ANDI(x1, xFlags, 1 << F_SF), EQZ, NEZ, X_SF) \ - break; \ - case B + 0x9: \ - INST_NAME(T1 "NS " T2); \ - GO(ANDI(x1, xFlags, 1 << F_SF), NEZ, EQZ, X_SF) \ - break; \ - case B + 0xA: \ - INST_NAME(T1 "P " T2); \ - GO(ANDI(x1, xFlags, 1 << F_PF), EQZ, NEZ, X_PF) \ - break; \ - case B + 0xB: \ - INST_NAME(T1 "NP " T2); \ - GO(ANDI(x1, xFlags, 1 << F_PF), NEZ, EQZ, X_PF) \ - break; \ - case B + 0xC: \ - INST_NAME(T1 "L " T2); \ - GO(SRLI(x1, xFlags, F_SF - F_OF2); \ - XOR(x1, x1, xFlags); \ - ANDI(x1, x1, 1 << F_OF2), EQZ, NEZ, X_SF | X_OF) \ - break; \ - case B + 0xD: \ - INST_NAME(T1 "GE " T2); \ - GO(SRLI(x1, xFlags, F_SF - F_OF2); \ - XOR(x1, x1, xFlags); \ - ANDI(x1, x1, 1 << F_OF2), NEZ, EQZ, X_SF | X_OF) \ - break; \ - case B + 0xE: \ - INST_NAME(T1 "LE " T2); \ - GO(SRLI(x1, xFlags, F_SF - F_OF2); \ - XOR(x1, x1, xFlags); \ - ANDI(x1, x1, 1 << F_OF2); \ - ANDI(x3, xFlags, 1 << F_ZF); \ - OR(x1, x1, x3); \ - ANDI(x1, x1, (1 << F_OF2) | (1 << F_ZF)), EQZ, NEZ, X_SF | X_OF | X_ZF) \ - break; \ - case B + 0xF: \ - INST_NAME(T1 "G " T2); \ - GO(SRLI(x1, xFlags, F_SF - F_OF2); \ - XOR(x1, x1, xFlags); \ - ANDI(x1, x1, 1 << F_OF2); \ - ANDI(x3, xFlags, 1 << F_ZF); \ - OR(x1, x1, x3); \ - ANDI(x1, x1, (1 << F_OF2) | (1 << F_ZF)), NEZ, EQZ, X_SF | X_OF | X_ZF) \ +#define GOCOND(B, T1, T2) \ + case B + 0x0: \ + INST_NAME(T1 "O " T2); \ + GO(ANDI(x1, xFlags, 1 << F_OF2), EQZ, NEZ, _, _, X_OF) \ + break; \ + case B + 0x1: \ + INST_NAME(T1 "NO " T2); \ + GO(ANDI(x1, xFlags, 1 << F_OF2), NEZ, EQZ, _, _, X_OF) \ + break; \ + case B + 0x2: \ + INST_NAME(T1 "C " T2); \ + GO(ANDI(x1, xFlags, 1 << F_CF), EQZ, NEZ, GEU, LTU, X_CF) \ + break; \ + case B + 0x3: \ + INST_NAME(T1 "NC " T2); \ + GO(ANDI(x1, xFlags, 1 << F_CF), NEZ, EQZ, LTU, GEU, X_CF) \ + break; \ + case B + 0x4: \ + INST_NAME(T1 "Z " T2); \ + GO(ANDI(x1, xFlags, 1 << F_ZF), EQZ, NEZ, NE, EQ, X_ZF) \ + break; \ + case B + 0x5: \ + INST_NAME(T1 "NZ " T2); \ + GO(ANDI(x1, xFlags, 1 << F_ZF), NEZ, EQZ, EQ, NE, X_ZF) \ + break; \ + case B + 0x6: \ + INST_NAME(T1 "BE " T2); \ + GO(ANDI(x1, xFlags, (1 << F_CF) | (1 << F_ZF)), EQZ, NEZ, GTU, LEU, X_CF | X_ZF) \ + break; \ + case B + 0x7: \ + INST_NAME(T1 "NBE " T2); \ + GO(ANDI(x1, xFlags, (1 << F_CF) | (1 << F_ZF)), NEZ, EQZ, LEU, GTU, X_CF | X_ZF) \ + break; \ + case B + 0x8: \ + INST_NAME(T1 "S " T2); \ + GO(ANDI(x1, xFlags, 1 << F_SF), EQZ, NEZ, _, _, X_SF) \ + break; \ + case B + 0x9: \ + INST_NAME(T1 "NS " T2); \ + GO(ANDI(x1, xFlags, 1 << F_SF), NEZ, EQZ, _, _, X_SF) \ + break; \ + case B + 0xA: \ + INST_NAME(T1 "P " T2); \ + GO(ANDI(x1, xFlags, 1 << F_PF), EQZ, NEZ, _, _, X_PF) \ + break; \ + case B + 0xB: \ + INST_NAME(T1 "NP " T2); \ + GO(ANDI(x1, xFlags, 1 << F_PF), NEZ, EQZ, _, _, X_PF) \ + break; \ + case B + 0xC: \ + INST_NAME(T1 "L " T2); \ + GO(SRLI(x1, xFlags, F_SF - F_OF2); \ + XOR(x1, x1, xFlags); \ + ANDI(x1, x1, 1 << F_OF2), EQZ, NEZ, GE, LT, X_SF | X_OF) \ + break; \ + case B + 0xD: \ + INST_NAME(T1 "GE " T2); \ + GO(SRLI(x1, xFlags, F_SF - F_OF2); \ + XOR(x1, x1, xFlags); \ + ANDI(x1, x1, 1 << F_OF2), NEZ, EQZ, LT, GE, X_SF | X_OF) \ + break; \ + case B + 0xE: \ + INST_NAME(T1 "LE " T2); \ + GO(SRLI(x1, xFlags, F_SF - F_OF2); \ + XOR(x1, x1, xFlags); \ + ANDI(x1, x1, 1 << F_OF2); \ + ANDI(x3, xFlags, 1 << F_ZF); \ + OR(x1, x1, x3); \ + ANDI(x1, x1, (1 << F_OF2) | (1 << F_ZF)), EQZ, NEZ, GT, LE, X_SF | X_OF | X_ZF) \ + break; \ + case B + 0xF: \ + INST_NAME(T1 "G " T2); \ + GO(SRLI(x1, xFlags, F_SF - F_OF2); \ + XOR(x1, x1, xFlags); \ + ANDI(x1, x1, 1 << F_OF2); \ + ANDI(x3, xFlags, 1 << F_ZF); \ + OR(x1, x1, x3); \ + ANDI(x1, x1, (1 << F_OF2) | (1 << F_ZF)), NEZ, EQZ, LE, GT, X_SF | X_OF | X_ZF) \ break +// Dummy macros +#define B__safe(a, b, c) XOR(xZR, xZR, xZR) +#define B_(a, b, c) XOR(xZR, xZR, xZR) + +#define NATIVEJUMP_safe(COND, val) \ + B##COND##_safe(dyn->insts[ninst].nat_flags_op1, dyn->insts[ninst].nat_flags_op2, val); + +#define NATIVEJUMP(COND, val) \ + B##COND(dyn->insts[ninst].nat_flags_op1, dyn->insts[ninst].nat_flags_op2, val); + #define NOTEST(s1) \ if (box64_dynarec_test) { \ SW(xZR, xEmu, offsetof(x64emu_t, test.test)); \ @@ -1823,54 +1857,54 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, BLT(reg, s, 4 + 4); \ ADDIW(reg, s, -1); -#define FAST_8BIT_OPERATION(dst, src, s1, OP) \ - if (MODREG && (rv64_zbb || rv64_xtheadbb) && !dyn->insts[ninst].x64.gen_flags) { \ - if (rex.rex) { \ - wb = xRAX + (nextop & 7) + (rex.b << 3); \ - wb2 = 0; \ - gb = xRAX + ((nextop & 0x38) >> 3) + (rex.r << 3); \ - gb2 = 0; \ - } else { \ - wb = (nextop & 7); \ - wb2 = (wb >> 2) * 8; \ - wb = xRAX + (wb & 3); \ - gd = (nextop & 0x38) >> 3; \ - gb2 = ((gd & 4) >> 2) * 8; \ - gb = xRAX + (gd & 3); \ - } \ - if (src##2) { ANDI(s1, src, 0xf00); } \ - SLLI(s1, (src##2 ? s1 : src), 64 - src##2 - 8); \ - if (rv64_zbb) { \ - RORI(dst, dst, 8 + dst##2); \ - } else { \ - TH_SRRI(dst, dst, 8 + dst##2); \ - } \ - OP; \ - if (rv64_zbb) { \ - RORI(dst, dst, 64 - 8 - dst##2); \ - } else { \ - TH_SRRI(dst, dst, 64 - 8 - dst##2); \ - } \ - break; \ +#define FAST_8BIT_OPERATION(dst, src, s1, OP) \ + if (!box64_dynarec_nativeflags && MODREG && (rv64_zbb || rv64_xtheadbb) && !dyn->insts[ninst].x64.gen_flags) { \ + if (rex.rex) { \ + wb = xRAX + (nextop & 7) + (rex.b << 3); \ + wb2 = 0; \ + gb = xRAX + ((nextop & 0x38) >> 3) + (rex.r << 3); \ + gb2 = 0; \ + } else { \ + wb = (nextop & 7); \ + wb2 = (wb >> 2) * 8; \ + wb = xRAX + (wb & 3); \ + gd = (nextop & 0x38) >> 3; \ + gb2 = ((gd & 4) >> 2) * 8; \ + gb = xRAX + (gd & 3); \ + } \ + if (src##2) { ANDI(s1, src, 0xf00); } \ + SLLI(s1, (src##2 ? s1 : src), 64 - src##2 - 8); \ + if (rv64_zbb) { \ + RORI(dst, dst, 8 + dst##2); \ + } else { \ + TH_SRRI(dst, dst, 8 + dst##2); \ + } \ + OP; \ + if (rv64_zbb) { \ + RORI(dst, dst, 64 - 8 - dst##2); \ + } else { \ + TH_SRRI(dst, dst, 64 - 8 - dst##2); \ + } \ + break; \ } -#define FAST_16BIT_OPERATION(dst, src, s1, OP) \ - if (MODREG && (rv64_zbb || rv64_xtheadbb) && !dyn->insts[ninst].x64.gen_flags) { \ - gd = xRAX + ((nextop & 0x38) >> 3) + (rex.r << 3); \ - ed = xRAX + (nextop & 7) + (rex.b << 3); \ - SLLI(s1, src, 64 - 16); \ - if (rv64_zbb) { \ - RORI(dst, dst, 16); \ - } else { \ - TH_SRRI(dst, dst, 16); \ - } \ - OP; \ - if (rv64_zbb) { \ - RORI(dst, dst, 64 - 16); \ - } else { \ - TH_SRRI(dst, dst, 64 - 16); \ - } \ - break; \ +#define FAST_16BIT_OPERATION(dst, src, s1, OP) \ + if (!box64_dynarec_nativeflags && MODREG && (rv64_zbb || rv64_xtheadbb) && !dyn->insts[ninst].x64.gen_flags) { \ + gd = xRAX + ((nextop & 0x38) >> 3) + (rex.r << 3); \ + ed = xRAX + (nextop & 7) + (rex.b << 3); \ + SLLI(s1, src, 64 - 16); \ + if (rv64_zbb) { \ + RORI(dst, dst, 16); \ + } else { \ + TH_SRRI(dst, dst, 16); \ + } \ + OP; \ + if (rv64_zbb) { \ + RORI(dst, dst, 64 - 16); \ + } else { \ + TH_SRRI(dst, dst, 64 - 16); \ + } \ + break; \ } #define VECTOR_SPLAT_IMM(vreg, imm, s1) \ diff --git a/src/dynarec/rv64/dynarec_rv64_pass0.h b/src/dynarec/rv64/dynarec_rv64_pass0.h index c2b7a27b..53e7054f 100644 --- a/src/dynarec/rv64/dynarec_rv64_pass0.h +++ b/src/dynarec/rv64/dynarec_rv64_pass0.h @@ -10,11 +10,26 @@ #define READFLAGS(A) \ dyn->insts[ninst].x64.use_flags = A; dyn->f.dfnone = 1;\ dyn->f.pending=SF_SET -#define SETFLAGS(A,B) \ - dyn->insts[ninst].x64.set_flags = A; \ - dyn->insts[ninst].x64.state_flags = (B)&~SF_DF; \ - dyn->f.pending=(B)&SF_SET_PENDING; \ - dyn->f.dfnone=((B)&SF_SET)?(((B)==SF_SET_NODF)?0:1):0; + +#define READFLAGS_FUSION(A, checkbarrier) \ + if (box64_dynarec_nativeflags && ninst > 0 && !dyn->insts[ninst - 1].nat_flags_nofusion) { \ + if ((A) == (X_ZF)) \ + dyn->insts[ninst].nat_flags_fusion = 1; \ + else if (dyn->insts[ninst - 1].nat_flags_carry && ((A) == (X_CF) || (A) == (X_CF | X_ZF))) \ + dyn->insts[ninst].nat_flags_fusion = 1; \ + else if (dyn->insts[ninst - 1].nat_flags_sign && ((A) == (X_SF | X_OF) || (A) == (X_SF | X_OF | X_ZF))) \ + dyn->insts[ninst].nat_flags_fusion = 1; \ + if (checkbarrier && fpu_needpurgecache(dyn, ninst)) dyn->insts[ninst].nat_flags_fusion = 0; \ + } \ + READFLAGS(A); + +#define SETFLAGS(A, B, FUSION) \ + dyn->insts[ninst].x64.set_flags = A; \ + dyn->insts[ninst].x64.state_flags = (B) & ~SF_DF; \ + dyn->f.pending = (B) & SF_SET_PENDING; \ + dyn->f.dfnone = ((B) & SF_SET) ? (((B) == SF_SET_NODF) ? 0 : 1) : 0; \ + dyn->insts[ninst].nat_flags_nofusion = (FUSION) + #define EMIT(A) dyn->native_size+=4 #define JUMP(A, C) add_jump(dyn, ninst); add_next(dyn, (uintptr_t)A); SMEND(); dyn->insts[ninst].x64.jmp = A; dyn->insts[ninst].x64.jmp_cond = C; dyn->insts[ninst].x64.jmp_insts = 0 #define BARRIER(A) if(A!=BARRIER_MAYBE) {fpu_purgecache(dyn, ninst, 0, x1, x2, x3); dyn->insts[ninst].x64.barrier = A;} else dyn->insts[ninst].barrier_maybe = 1 diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h index 4600dfc7..126ce630 100644 --- a/src/dynarec/rv64/dynarec_rv64_private.h +++ b/src/dynarec/rv64/dynarec_rv64_private.h @@ -126,6 +126,13 @@ typedef struct instruction_rv64_s { uint8_t last_write; uint8_t lock; uint8_t df_notneeded; + uint8_t nat_flags_fusion:1; + uint8_t nat_flags_nofusion:1; + uint8_t nat_flags_carry:1; + uint8_t nat_flags_sign:1; + uint8_t nat_flags_needsign:1; + uint8_t nat_flags_op1; + uint8_t nat_flags_op2; flagcache_t f_exit; // flags status at end of instruction extcache_t e; // extcache at end of instruction (but before poping) flagcache_t f_entry; // flags status before the instruction begin diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index fab9aad5..d146c364 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -265,7 +265,11 @@ f28–31 ft8–11 FP temporaries Caller #define BLTU(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b110, 0b1100011)) #define BGEU(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b111, 0b1100011)) -// TODO: Find a better way to have conditionnal jumps? Imm is a relative jump address, so the the 2nd jump needs to be adapted +#define BGT(rs1, rs2, imm13) BLT(rs2, rs1, imm13) +#define BLE(rs1, rs2, imm13) BGE(rs2, rs1, imm13) +#define BGTU(rs1, rs2, imm13) BLTU(rs2, rs1, imm13) +#define BLEU(rs1, rs2, imm13) BGEU(rs2, rs1, imm13) + #define BEQ_safe(rs1, rs2, imm) \ if ((imm) > -0x1000 && (imm) < 0x1000) { \ BEQ(rs1, rs2, imm); \ @@ -287,7 +291,7 @@ f28–31 ft8–11 FP temporaries Caller BLT(rs1, rs2, imm); \ NOP(); \ } else { \ - BGE(rs2, rs1, 8); \ + BGE(rs1, rs2, 8); \ B(imm - 4); \ } #define BGE_safe(rs1, rs2, imm) \ @@ -295,7 +299,7 @@ f28–31 ft8–11 FP temporaries Caller BGE(rs1, rs2, imm); \ NOP(); \ } else { \ - BLT(rs2, rs1, 8); \ + BLT(rs1, rs2, 8); \ B(imm - 4); \ } #define BLTU_safe(rs1, rs2, imm) \ @@ -303,7 +307,7 @@ f28–31 ft8–11 FP temporaries Caller BLTU(rs1, rs2, imm); \ NOP(); \ } else { \ - BGEU(rs2, rs1, 8); \ + BGEU(rs1, rs2, 8); \ B(imm - 4); \ } #define BGEU_safe(rs1, rs2, imm) \ @@ -311,7 +315,39 @@ f28–31 ft8–11 FP temporaries Caller BGEU(rs1, rs2, imm); \ NOP(); \ } else { \ - BLTU(rs2, rs1, 8); \ + BLTU(rs1, rs2, 8); \ + B(imm - 4); \ + } +#define BGT_safe(rs1, rs2, imm) \ + if ((imm) > -0x1000 && (imm) < 0x1000) { \ + BGT(rs1, rs2, imm); \ + NOP(); \ + } else { \ + BLE(rs1, rs2, 8); \ + B(imm - 4); \ + } +#define BLE_safe(rs1, rs2, imm) \ + if ((imm) > -0x1000 && (imm) < 0x1000) { \ + BLE(rs1, rs2, imm); \ + NOP(); \ + } else { \ + BGT(rs1, rs2, 8); \ + B(imm - 4); \ + } +#define BGTU_safe(rs1, rs2, imm) \ + if ((imm) > -0x1000 && (imm) < 0x1000) { \ + BGTU(rs1, rs2, imm); \ + NOP(); \ + } else { \ + BLEU(rs1, rs2, 8); \ + B(imm - 4); \ + } +#define BLEU_safe(rs1, rs2, imm) \ + if ((imm) > -0x1000 && (imm) < 0x1000) { \ + BLEU(rs1, rs2, imm); \ + NOP(); \ + } else { \ + BGTU(rs1, rs2, 8); \ B(imm - 4); \ } diff --git a/src/tools/rcfile.c b/src/tools/rcfile.c index 1bb9695e..2881539c 100644 --- a/src/tools/rcfile.c +++ b/src/tools/rcfile.c @@ -766,6 +766,9 @@ void internal_ApplyParams(const char* name, const my_params_t* param) { box64_dynarec_x87double = 1; box64_dynarec_div0 = 1; box64_dynarec_callret = 0; + #ifdef RV64 + box64_dynarec_nativeflags = 0; + #endif } #endif if(box64_log==3) { |