diff options
Diffstat (limited to 'src')
25 files changed, 1177 insertions, 741 deletions
diff --git a/src/core.c b/src/core.c index c97a57db..aa9def4e 100644 --- a/src/core.c +++ b/src/core.c @@ -525,7 +525,7 @@ HWCAP2_AFP return; } - if (la64_lbt = (((cpucfg2 >> 18) & 0b1) && box64_dynarec_nativeflags)) + if (la64_lbt = ((cpucfg2 >> 18) & 0b1)) printf_log(LOG_INFO, " LBT_X86"); if ((la64_lam_bh = (cpucfg2 >> 27) & 0b1)) printf_log(LOG_INFO, " LAM_BH"); @@ -987,7 +987,7 @@ void LoadLogEnv() box64_dynarec_x87double = 1; box64_dynarec_div0 = 1; box64_dynarec_callret = 0; - #ifdef RV64 + #if defined( RV64) || defined(LA64) box64_dynarec_nativeflags = 0; #endif printf_log(LOG_INFO, "Dynarec will compare it's execution with the interpreter (super slow, only for testing)\n"); diff --git a/src/dynarec/dynarec_arch.h b/src/dynarec/dynarec_arch.h index dc34edd7..3dd07d5f 100644 --- a/src/dynarec/dynarec_arch.h +++ b/src/dynarec/dynarec_arch.h @@ -44,7 +44,7 @@ #define RAZ_SPECIFIC(A, N) #define UPDATE_SPECIFICS(A) -#define PREUPDATE_SPECIFICS(A) +#define PREUPDATE_SPECIFICS(A) updateNativeFlags(A) #elif defined(RV64) #define instruction_native_t instruction_rv64_t diff --git a/src/dynarec/dynarec_native_pass.c b/src/dynarec/dynarec_native_pass.c index d6a7f606..50d5e231 100644 --- a/src/dynarec/dynarec_native_pass.c +++ b/src/dynarec/dynarec_native_pass.c @@ -179,7 +179,7 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int #if STEP > 0 if(dyn->insts[ninst].x64.has_next && dyn->insts[next].x64.barrier) { if(dyn->insts[next].x64.barrier&BARRIER_FLOAT) { - #ifdef RV64 + #if defined (RV64) || defined(LA64) uint8_t tmp1, tmp2, tmp3; if(dyn->insts[next].nat_flags_fusion) get_free_scratch(dyn, next, &tmp1, &tmp2, &tmp3, x1, x2, x3, x4, x5); else { tmp1=x1; tmp2=x2; tmp3=x3; } diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c index e428ee3f..a93d715f 100644 --- a/src/dynarec/la64/dynarec_la64_00.c +++ b/src/dynarec/la64/dynarec_la64_00.c @@ -31,7 +31,7 @@ int isRetX87Wrapper(wrapper_t fun); uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) { uint8_t nextop, opcode; - uint8_t gd, ed; + uint8_t gd, ed, tmp1, tmp2, tmp3; int8_t i8; int32_t i32, tmp; int64_t i64, j64; @@ -56,7 +56,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni switch (opcode) { case 0x00: INST_NAME("ADD Eb, Gb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETEB(x1, 0); GETGB(x2); @@ -65,7 +65,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x01: INST_NAME("ADD Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -74,7 +74,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x02: INST_NAME("ADD Gb, Eb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETEB(x1, 0); GETGB(x2); @@ -83,7 +83,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x03: INST_NAME("ADD Gd, Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -91,7 +91,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x04: INST_NAME("ADD AL, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); u8 = F8; ANDI(x1, xRAX, 0xff); emit_add8c(dyn, ninst, x1, u8, x3, x4, x5); @@ -99,13 +99,13 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x05: INST_NAME("ADD EAX, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i64 = F32S; emit_add32c(dyn, ninst, rex, xRAX, i64, x3, x4, x5, x6); break; case 0x08: INST_NAME("OR Eb, Gb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETEB(x1, 0); GETGB(x2); @@ -114,7 +114,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x09: INST_NAME("OR Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -123,7 +123,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x0A: INST_NAME("OR Gb, Eb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETEB(x1, 0); GETGB(x2); @@ -132,7 +132,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x0B: INST_NAME("OR Gd, Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -140,7 +140,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x0C: INST_NAME("OR AL, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); u8 = F8; ANDI(x1, xRAX, 0xff); emit_or8c(dyn, ninst, x1, u8, x3, x4, x5); @@ -148,7 +148,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x0D: INST_NAME("OR EAX, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i64 = F32S; emit_or32c(dyn, ninst, rex, xRAX, i64, x3, x4); break; @@ -170,7 +170,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x10: INST_NAME("ADC Eb, Gb"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETEB(x1, 0); GETGB(x2); @@ -180,7 +180,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x11: INST_NAME("ADC Ed, Gd"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -190,7 +190,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x12: INST_NAME("ADC Gb, Eb"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETEB(x2, 0); GETGB(x1); @@ -200,7 +200,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x13: INST_NAME("ADC Gd, Ed"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -209,7 +209,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x14: INST_NAME("ADC AL, Ib"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); u8 = F8; ANDI(x1, xRAX, 0xff); emit_adc8c(dyn, ninst, x1, u8, x3, x4, x5, x6); @@ -218,7 +218,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x15: INST_NAME("ADC EAX, Id"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i64 = F32S; MOV64xw(x1, i64); emit_adc32(dyn, ninst, rex, xRAX, x1, x3, x4, x5, x6); @@ -226,7 +226,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x18: INST_NAME("SBB Eb, Gb"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETEB(x1, 0); GETGB(x2); @@ -236,7 +236,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x19: INST_NAME("SBB Ed, Gd"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -246,7 +246,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x1A: INST_NAME("SBB Gb, Eb"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETEB(x2, 0); GETGB(x1); @@ -256,7 +256,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x1B: INST_NAME("SBB Gd, Ed"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -265,7 +265,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x1C: INST_NAME("SBB AL, Ib"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); u8 = F8; ANDI(x1, xRAX, 0xff); emit_sbb8c(dyn, ninst, x1, u8, x3, x4, x5, x6); @@ -274,14 +274,14 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x1D: INST_NAME("SBB EAX, Id"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i64 = F32S; MOV64xw(x2, i64); emit_sbb32(dyn, ninst, rex, xRAX, x2, x3, x4, x5); break; case 0x20: INST_NAME("AND Eb, Gb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETEB(x1, 0); GETGB(x2); @@ -290,7 +290,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x21: INST_NAME("AND Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -299,7 +299,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x22: INST_NAME("AND Gb, Eb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETEB(x1, 0); GETGB(x2); @@ -308,7 +308,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x23: INST_NAME("AND Gd, Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -316,7 +316,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x24: INST_NAME("AND AL, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); u8 = F8; ANDI(x1, xRAX, 0xff); emit_and8c(dyn, ninst, x1, u8, x3, x4); @@ -324,13 +324,13 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x25: INST_NAME("AND EAX, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i64 = F32S; emit_and32c(dyn, ninst, rex, xRAX, i64, x3, x4); break; case 0x28: INST_NAME("SUB Eb, Gb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETEB(x1, 0); GETGB(x2); @@ -339,7 +339,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x29: INST_NAME("SUB Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -348,7 +348,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x2A: INST_NAME("SUB Gb, Eb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETEB(x1, 0); GETGB(x2); @@ -357,7 +357,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x2B: INST_NAME("SUB Gd, Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -365,7 +365,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x2C: INST_NAME("SUB AL, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); u8 = F8; ANDI(x1, xRAX, 0xff); emit_sub8c(dyn, ninst, x1, u8, x2, x3, x4, x5); @@ -373,13 +373,13 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x2D: INST_NAME("SUB EAX, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i64 = F32S; emit_sub32c(dyn, ninst, rex, xRAX, i64, x2, x3, x4, x5); break; case 0x30: INST_NAME("XOR Eb, Gb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETEB(x1, 0); GETGB(x2); @@ -388,7 +388,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x31: INST_NAME("XOR Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -399,7 +399,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x32: INST_NAME("XOR Gb, Eb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETEB(x1, 0); GETGB(x2); @@ -408,7 +408,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x33: INST_NAME("XOR Gd, Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -416,7 +416,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x34: INST_NAME("XOR AL, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); u8 = F8; ANDI(x1, xRAX, 0xff); emit_xor8c(dyn, ninst, x1, u8, x3, x4); @@ -424,13 +424,13 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x35: INST_NAME("XOR EAX, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i64 = F32S; emit_xor32c(dyn, ninst, rex, xRAX, i64, x3, x4); break; case 0x38: INST_NAME("CMP Eb, Gb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETEB(x1, 0); GETGB(x2); @@ -438,7 +438,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x39: INST_NAME("CMP Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -446,7 +446,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x3B: INST_NAME("CMP Gd, Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -454,7 +454,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x3A: INST_NAME("CMP Gb, Eb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETEB(x1, 0); GETGB(x2); @@ -462,7 +462,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x3C: INST_NAME("CMP AL, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); u8 = F8; ANDI(x1, xRAX, 0xff); if (u8) { @@ -474,7 +474,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x3D: INST_NAME("CMP EAX, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i64 = F32S; if (i64) { MOV64xw(x2, i64); @@ -491,7 +491,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x46: case 0x47: INST_NAME("INC Reg (32bits)"); - SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING); + SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING, NAT_FLAGS_FUSION); gd = TO_NAT(opcode & 7); emit_inc32(dyn, ninst, rex, gd, x1, x2, x3, x4); break; @@ -504,7 +504,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x4E: case 0x4F: INST_NAME("DEC Reg (32bits)"); - SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING); + SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING, NAT_FLAGS_FUSION); gd = TO_NAT(opcode & 7); emit_dec32(dyn, ninst, rex, gd, x1, x2, x3, x4); break; @@ -590,7 +590,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x69: INST_NAME("IMUL Gd, Ed, Id"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_PENDING, NAT_FLAGS_NOFUSION); nextop = F8; GETGD; GETED(4); @@ -630,7 +630,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x6B: INST_NAME("IMUL Gd, Ed, Ib"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_PENDING, NAT_FLAGS_NOFUSION); nextop = F8; GETGD; GETED(1); @@ -665,7 +665,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x6C: case 0x6D: INST_NAME(opcode == 0x6C ? "INSB" : "INSD"); - SETFLAGS(X_ALL, SF_SET_NODF); // Hack to set flags in "don't care" state + SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Hack to set flags in "don't care" state GETIP(ip); STORE_XEMU_CALL(); CALL(native_priv, -1); @@ -677,7 +677,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x6E: case 0x6F: INST_NAME(opcode == 0x6C ? "OUTSB" : "OUTSD"); - SETFLAGS(X_ALL, SF_SET_NODF); // Hack to set flags in "don't care" state + SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Hack to set flags in "don't care" state GETIP(ip); STORE_XEMU_CALL(); CALL(native_priv, -1); @@ -687,51 +687,61 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni *ok = 0; break; - #define GO(GETFLAGS, NO, YES, F, I) \ - READFLAGS(F); \ - i8 = F8S; \ - BARRIER(BARRIER_MAYBE); \ - JUMP(addr + i8, 1); \ - if (la64_lbt) { \ - X64_SETJ(x1, I); \ - } else { \ - GETFLAGS; \ - } \ - if (dyn->insts[ninst].x64.jmp_insts == -1 || CHECK_CACHE()) { \ - /* out of block */ \ - i32 = dyn->insts[ninst].epilog - (dyn->native_size); \ - if (la64_lbt) \ - BEQZ_safe(x1, i32); \ - else \ - B##NO##_safe(x1, i32); \ - if (dyn->insts[ninst].x64.jmp_insts == -1) { \ - if (!(dyn->insts[ninst].x64.barrier & BARRIER_FLOAT)) \ - fpu_purgecache(dyn, ninst, 1, x1, x2, x3); \ - jump_to_next(dyn, addr + i8, 0, ninst, rex.is32bits); \ - } else { \ - CacheTransform(dyn, ninst, cacheupd, x1, x2, x3); \ - i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address - (dyn->native_size); \ - B(i32); \ - } \ - } else { \ - /* inside the block */ \ - i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address - (dyn->native_size); \ - if (la64_lbt) \ - BNEZ_safe(x1, i32); \ - else \ - B##YES##_safe(x1, i32); \ - } +#define GO(GETFLAGS, NO, YES, NATNO, NATYES, F, I) \ + READFLAGS_FUSION(F, x1, x2, x3, x4, x5); \ + i8 = F8S; \ + BARRIER(BARRIER_MAYBE); \ + JUMP(addr + i8, 1); \ + if (!dyn->insts[ninst].nat_flags_fusion) { \ + if (la64_lbt) { \ + X64_SETJ(tmp1, I); \ + } else { \ + GETFLAGS; \ + } \ + } \ + if (dyn->insts[ninst].x64.jmp_insts == -1 || CHECK_CACHE()) { \ + /* out of block */ \ + i32 = dyn->insts[ninst].epilog - (dyn->native_size); \ + if (dyn->insts[ninst].nat_flags_fusion) { \ + NATIVEJUMP_safe(NATNO, i32); \ + } else { \ + if (la64_lbt) \ + BEQZ_safe(tmp1, i32); \ + else \ + B##NO##_safe(tmp1, i32); \ + } \ + if (dyn->insts[ninst].x64.jmp_insts == -1) { \ + if (!(dyn->insts[ninst].x64.barrier & BARRIER_FLOAT)) \ + fpu_purgecache(dyn, ninst, 1, tmp1, tmp2, tmp3); \ + jump_to_next(dyn, addr + i8, 0, ninst, rex.is32bits); \ + } else { \ + CacheTransform(dyn, ninst, cacheupd, tmp1, tmp2, tmp3); \ + i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address - (dyn->native_size); \ + B(i32); \ + } \ + } else { \ + /* inside the block */ \ + i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address - (dyn->native_size); \ + if (dyn->insts[ninst].nat_flags_fusion) { \ + NATIVEJUMP_safe(NATYES, i32); \ + } else { \ + if (la64_lbt) \ + BNEZ_safe(tmp1, i32); \ + else \ + B##YES##_safe(tmp1, i32); \ + } \ + } GOCOND(0x70, "J", "ib"); - #undef GO +#undef GO case 0x80: nextop = F8; switch ((nextop >> 3) & 7) { case 0: // ADD INST_NAME("ADD Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEB(x1, 1); u8 = F8; emit_add8c(dyn, ninst, x1, u8, x2, x4, x5); @@ -739,7 +749,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 1: // OR INST_NAME("OR Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEB(x1, 1); u8 = F8; emit_or8c(dyn, ninst, x1, u8, x2, x4, x5); @@ -748,7 +758,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 2: // ADC INST_NAME("ADC Eb, Ib"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEB(x1, 1); u8 = F8; emit_adc8c(dyn, ninst, x1, u8, x2, x4, x5, x6); @@ -757,7 +767,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 3: // SBB INST_NAME("SBB Eb, Ib"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEB(x1, 1); u8 = F8; emit_sbb8c(dyn, ninst, x1, u8, x2, x4, x5, x6); @@ -765,7 +775,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 4: // AND INST_NAME("AND Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEB(x1, 1); u8 = F8; emit_and8c(dyn, ninst, x1, u8, x2, x4); @@ -773,7 +783,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 5: // SUB INST_NAME("SUB Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEB(x1, 1); u8 = F8; emit_sub8c(dyn, ninst, x1, u8, x2, x4, x5, x6); @@ -781,7 +791,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 6: // XOR INST_NAME("XOR Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEB(x1, 1); u8 = F8; emit_xor8c(dyn, ninst, x1, u8, x2, x4); @@ -789,7 +799,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 7: // CMP INST_NAME("CMP Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEB(x1, 1); u8 = F8; if (u8) { @@ -813,7 +823,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("ADD Ed, Ib"); } - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETED((opcode == 0x81) ? 4 : 1); if (opcode == 0x81) i64 = F32S; @@ -828,7 +838,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("OR Ed, Ib"); } - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETED((opcode == 0x81) ? 4 : 1); if (opcode == 0x81) i64 = F32S; @@ -844,7 +854,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("ADC Ed, Ib"); } READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETED((opcode == 0x81) ? 4 : 1); if (opcode == 0x81) i64 = F32S; @@ -861,7 +871,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("SBB Ed, Ib"); } READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETED((opcode == 0x81) ? 4 : 1); if (opcode == 0x81) i64 = F32S; @@ -877,7 +887,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("AND Ed, Ib"); } - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETED((opcode == 0x81) ? 4 : 1); if (opcode == 0x81) i64 = F32S; @@ -892,7 +902,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("SUB Ed, Ib"); } - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETED((opcode == 0x81) ? 4 : 1); if (opcode == 0x81) i64 = F32S; @@ -907,7 +917,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("XOR Ed, Ib"); } - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETED((opcode == 0x81) ? 4 : 1); if (opcode == 0x81) i64 = F32S; @@ -922,7 +932,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("CMP Ed, Ib"); } - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETED((opcode == 0x81) ? 4 : 1); if (opcode == 0x81) i64 = F32S; @@ -943,7 +953,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x84: INST_NAME("TEST Eb, Gb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETEB(x1, 0); GETGB(x2); @@ -951,7 +961,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x85: INST_NAME("TEST Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED(0); @@ -1202,7 +1212,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x9D: INST_NAME("POPF"); - SETFLAGS(X_ALL, SF_SET); + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); POP1z(xFlags); MOV32w(x1, 0x3F7FD7); AND(xFlags, xFlags, x1); @@ -1326,7 +1336,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("REPZ CMPSB"); } MAYSETFLAGS(); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_NOFUSION); CBZ_NEXT(xRCX); ANDI(x1, xFlags, 1 << F_DF); BNEZ_MARK2(x1); @@ -1360,7 +1370,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; default: INST_NAME("CMPSB"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_NOFUSION); GETDIR(x3, x1, 1); LD_BU(x1, xRSI, 0); LD_BU(x2, xRDI, 0); @@ -1372,7 +1382,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0xA8: INST_NAME("TEST AL, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); ANDI(x1, xRAX, 0xff); u8 = F8; MOV32w(x2, u8); @@ -1380,7 +1390,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0xA9: INST_NAME("TEST EAX, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i64 = F32S; MOV64xw(x2, i64); emit_test32(dyn, ninst, rex, xRAX, x2, x3, x4, x5); @@ -1445,7 +1455,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("REPZ SCASB"); } MAYSETFLAGS(); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_NOFUSION); CBZ_NEXT(xRCX); ANDI(x1, xRAX, 0xff); ANDI(x2, xFlags, 1 << F_DF); @@ -1476,7 +1486,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; default: INST_NAME("SCASB"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_NOFUSION); GETDIR(x3, x1, 1); ANDI(x1, xRAX, 0xff); LD_BU(x2, xRDI, 0); @@ -1495,7 +1505,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("REPZ SCASD"); } MAYSETFLAGS(); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_NOFUSION); CBZ_NEXT(xRCX); if (rex.w) { MV(x1, xRAX); @@ -1530,7 +1540,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; default: INST_NAME("SCASD"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_NOFUSION); GETDIR(x3, x1, rex.w ? 8 : 4); LDxw(x2, xRDI, 0); ADD_D(xRDI, xRDI, x3); @@ -1594,7 +1604,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEB(x1, 1); u8 = (F8) & 0x1f; if (u8) { - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_PENDING, NAT_FLAGS_NOFUSION); UFLAG_IF { MOV32w(x4, u8); UFLAG_OP2(x4); @@ -1613,7 +1623,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEB(x1, 1); u8 = (F8) & 0x1f; if (u8) { - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_PENDING, NAT_FLAGS_NOFUSION); UFLAG_IF { MOV32w(x4, u8); UFLAG_OP2(x4); @@ -1634,7 +1644,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETSEB(x1, 1); u8 = (F8) & 0x1f; if (u8) { - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_PENDING, NAT_FLAGS_NOFUSION); UFLAG_IF { MOV32w(x4, u8); UFLAG_OP2(x4); @@ -1662,7 +1672,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni u8 = geted_ib(dyn, addr, ninst, nextop) & (rex.w ? 0x3f : 0x1f); // flags are not affected if count is 0, we make it a nop if possible. if (u8) { - SETFLAGS(X_OF | X_CF, SF_SUBSET_PENDING); + SETFLAGS(X_OF | X_CF, SF_SUBSET_PENDING, NAT_FLAGS_FUSION); GETED(1); F8; emit_rol32c(dyn, ninst, rex, ed, u8, x3, x4); @@ -1682,7 +1692,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni u8 = geted_ib(dyn, addr, ninst, nextop) & (rex.w ? 0x3f : 0x1f); // flags are not affected if count is 0, we make it a nop if possible. if (u8) { - SETFLAGS(X_OF | X_CF, SF_SUBSET_PENDING); + SETFLAGS(X_OF | X_CF, SF_SUBSET_PENDING, NAT_FLAGS_FUSION); GETED(1); F8; emit_ror32c(dyn, ninst, rex, ed, u8, x3, x4); @@ -1703,7 +1713,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni u8 = geted_ib(dyn, addr, ninst, nextop) & (rex.w ? 0x3f : 0x1f); // flags are not affected if count is 0, we make it a nop if possible. if (u8) { - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_NOFUSION); // some flags are left undefined GETED(1); u8 = (F8) & (rex.w ? 0x3f : 0x1f); emit_shl32c(dyn, ninst, rex, ed, u8, x3, x4, x5); @@ -1722,7 +1732,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("SHR Ed, Ib"); u8 = geted_ib(dyn, addr, ninst, nextop) & (rex.w ? 0x3f : 0x1f); if (u8) { - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_NOFUSION); // some flags are left undefined GETED(1); u8 = (F8) & (rex.w ? 0x3f : 0x1f); emit_shr32c(dyn, ninst, rex, ed, u8, x3, x4); @@ -1742,7 +1752,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni // flags are not affected if count is 0, we make it a nop if possible. u8 = geted_ib(dyn, addr, ninst, nextop) & (rex.w ? 0x3f : 0x1f); if (u8) { - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_NOFUSION); // some flags are left undefined GETED(1); u8 = (F8) & (rex.w ? 0x3f : 0x1f); emit_sar32c(dyn, ninst, rex, ed, u8, x3, x4); @@ -1763,7 +1773,6 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0xC2: INST_NAME("RETN"); - // SETFLAGS(X_ALL, SF_SET); // Hack, set all flags (to an unknown state...) if (box64_dynarec_safeflags) { READFLAGS(X_PEND); // lets play safe here too } @@ -1775,7 +1784,6 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0xC3: INST_NAME("RET"); - // SETFLAGS(X_ALL, SF_SET); // Hack, set all flags (to an unknown state...) if (box64_dynarec_safeflags) { READFLAGS(X_PEND); // so instead, force the deferred flags, so it's not too slow, and flags are not lost } @@ -1791,8 +1799,8 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni u8 = F8; if (!rex.rex) { ed = (nextop & 7); - eb1 = TO_NAT((ed & 3)); // Ax, Cx, Dx or Bx - eb2 = (ed & 4) >> 2; // L or H + eb1 = TO_NAT((ed & 3)); // Ax, Cx, Dx or Bx + eb2 = (ed & 4) >> 2; // L or H } else { eb1 = TO_NAT((nextop & 7) + (rex.b << 3)); eb2 = 0; @@ -1836,7 +1844,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni POP1z(xRBP); break; case 0xCC: - SETFLAGS(X_ALL, SF_SET); + SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); SKIPTEST(x1); if (PK(0) == 'S' && PK(1) == 'C') { addr += 2; @@ -1903,7 +1911,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ANDI(x2, xRCX, 0x1F); BEQ_NEXT(x2, xZR); } - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined if (box64_dynarec_safeflags > 1) MAYSETFLAGS(); emit_shr8(dyn, ninst, x1, x2, x5, x4, x6); @@ -1918,7 +1926,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni switch ((nextop >> 3) & 7) { case 0: INST_NAME("ROL Ed, 1"); - SETFLAGS(X_OF | X_CF, SF_SUBSET_PENDING); + SETFLAGS(X_OF | X_CF, SF_SUBSET_PENDING, NAT_FLAGS_FUSION); GETED(0); emit_rol32c(dyn, ninst, rex, ed, 1, x3, x4); WBACK; @@ -1926,7 +1934,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 1: INST_NAME("ROR Ed, 1"); - SETFLAGS(X_OF | X_CF, SF_SUBSET_PENDING); + SETFLAGS(X_OF | X_CF, SF_SUBSET_PENDING, NAT_FLAGS_FUSION); GETED(0); emit_ror32c(dyn, ninst, rex, ed, 1, x3, x4); WBACK; @@ -1935,21 +1943,21 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 4: case 6: INST_NAME("SHL Ed, 1"); - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined GETED(0); emit_shl32c(dyn, ninst, rex, ed, 1, x3, x4, x5); WBACK; break; case 5: INST_NAME("SHR Ed, 1"); - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined GETED(0); emit_shr32c(dyn, ninst, rex, ed, 1, x3, x4); WBACK; break; case 7: INST_NAME("SAR Ed, 1"); - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined GETED(0); emit_sar32c(dyn, ninst, rex, ed, 1, x3, x4); WBACK; @@ -1963,7 +1971,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni switch ((nextop >> 3) & 7) { case 0: INST_NAME("ROL Ed, CL"); - SETFLAGS(X_OF | X_CF, SF_SUBSET); + SETFLAGS(X_OF | X_CF, SF_SUBSET, NAT_FLAGS_FUSION); GETED(0); ANDI(x6, xRCX, rex.w ? 0x3f : 0x1f); emit_rol32(dyn, ninst, rex, ed, x6, x3, x4); @@ -1972,7 +1980,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 1: INST_NAME("ROR Ed, CL"); - SETFLAGS(X_OF | X_CF, SF_SUBSET); + SETFLAGS(X_OF | X_CF, SF_SUBSET, NAT_FLAGS_FUSION); GETED(0); ANDI(x6, xRCX, rex.w ? 0x3f : 0x1f); emit_ror32(dyn, ninst, rex, ed, x6, x3, x4); @@ -1982,7 +1990,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 4: case 6: INST_NAME("SHL Ed, CL"); - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined ANDI(x3, xRCX, rex.w ? 0x3f : 0x1f); GETED(0); if (!rex.w && MODREG) { ZEROUP(ed); } @@ -1992,7 +2000,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 5: INST_NAME("SHR Ed, CL"); - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined ANDI(x3, xRCX, rex.w ? 0x3f : 0x1f); GETED(0); if (!rex.w && MODREG) { ZEROUP(ed); } @@ -2002,7 +2010,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 7: INST_NAME("SAR Ed, CL"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_PENDING, NAT_FLAGS_NOFUSION); ANDI(x3, xRCX, rex.w ? 0x3f : 0x1f); GETED(0); if (!rex.w && MODREG) { ZEROUP(ed); } @@ -2018,35 +2026,35 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } break; - #define GO(Z) \ - BARRIER(BARRIER_MAYBE); \ - JUMP(addr + i8, 1); \ - if (dyn->insts[ninst].x64.jmp_insts == -1 || CHECK_CACHE()) { \ - /* out of the block */ \ - i32 = dyn->insts[ninst].epilog - (dyn->native_size); \ - if (Z) { \ - BNE(xRCX, xZR, i32); \ - } else { \ - BEQ(xRCX, xZR, i32); \ - } \ - if (dyn->insts[ninst].x64.jmp_insts == -1) { \ - if (!(dyn->insts[ninst].x64.barrier & BARRIER_FLOAT)) \ - fpu_purgecache(dyn, ninst, 1, x1, x2, x3); \ - jump_to_next(dyn, addr + i8, 0, ninst, rex.is32bits); \ - } else { \ - CacheTransform(dyn, ninst, cacheupd, x1, x2, x3); \ - i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address - (dyn->native_size); \ - B(i32); \ - } \ - } else { \ - /* inside the block */ \ - i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address - (dyn->native_size); \ - if (Z) { \ - BEQ(xRCX, xZR, i32); \ - } else { \ - BNE(xRCX, xZR, i32); \ - }; \ - } +#define GO(Z) \ + BARRIER(BARRIER_MAYBE); \ + JUMP(addr + i8, 1); \ + if (dyn->insts[ninst].x64.jmp_insts == -1 || CHECK_CACHE()) { \ + /* out of the block */ \ + i32 = dyn->insts[ninst].epilog - (dyn->native_size); \ + if (Z) { \ + BNE(xRCX, xZR, i32); \ + } else { \ + BEQ(xRCX, xZR, i32); \ + } \ + if (dyn->insts[ninst].x64.jmp_insts == -1) { \ + if (!(dyn->insts[ninst].x64.barrier & BARRIER_FLOAT)) \ + fpu_purgecache(dyn, ninst, 1, x1, x2, x3); \ + jump_to_next(dyn, addr + i8, 0, ninst, rex.is32bits); \ + } else { \ + CacheTransform(dyn, ninst, cacheupd, x1, x2, x3); \ + i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address - (dyn->native_size); \ + B(i32); \ + } \ + } else { \ + /* inside the block */ \ + i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address - (dyn->native_size); \ + if (Z) { \ + BEQ(xRCX, xZR, i32); \ + } else { \ + BNE(xRCX, xZR, i32); \ + }; \ + } case 0xE0: INST_NAME("LOOPNZ"); @@ -2083,7 +2091,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni i8 = F8S; GO(1); break; - #undef GO +#undef GO case 0xE8: INST_NAME("CALL Id"); @@ -2103,7 +2111,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni #endif switch (tmp) { case 3: - SETFLAGS(X_ALL, SF_SET); // Hack to set flags to "dont'care" state + SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Hack to set flags to "dont'care" state SKIPTEST(x1); BARRIER(BARRIER_FULL); if (dyn->last_ip && (addr - dyn->last_ip < 0x1000)) { @@ -2161,7 +2169,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if ((box64_dynarec_safeflags > 1) || (ninst && dyn->insts[ninst - 1].x64.set_flags)) { READFLAGS(X_PEND); // that's suspicious } else { - SETFLAGS(X_ALL, SF_SET); // Hack to set flags to "dont'care" state + SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Hack to set flags to "dont'care" state } // regular call /*if (box64_dynarec_callret && box64_dynarec_bigblock > 1) { @@ -2189,10 +2197,10 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ADDI_D(x4, x4, j64 & 0xfff); MESSAGE(LOG_NONE, "\tCALLRET set return to +%di\n", j64 >> 2); } else { - j64 = (dyn->insts)?(GETMARK-(dyn->native_size)):0; + j64 = (dyn->insts) ? (GETMARK - (dyn->native_size)) : 0; PCADDU12I(x4, ((j64 + 0x800) >> 12) & 0xfffff); ADDI_D(x4, x4, j64 & 0xfff); - MESSAGE(LOG_NONE, "\tCALLRET set return to +%di\n", j64>>2); + MESSAGE(LOG_NONE, "\tCALLRET set return to +%di\n", j64 >> 2); } ADDI_D(xSP, xSP, -16); ST_D(x4, xSP, 0); @@ -2201,12 +2209,12 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni *ok = 0; *need_epilog = 0; } - if(rex.is32bits) - j64 = (uint32_t)(addr+i32); + if (rex.is32bits) + j64 = (uint32_t)(addr + i32); else - j64 = addr+i32; + j64 = addr + i32; jump_to_next(dyn, j64, 0, ninst, rex.is32bits); - if(box64_dynarec_callret && addr >= (dyn->start + dyn->isize)) { + if (box64_dynarec_callret && addr >= (dyn->start + dyn->isize)) { // jumps out of current dynablock... MARK; j64 = getJumpTableAddress64(addr); @@ -2227,10 +2235,10 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("JMP Ib"); i32 = F8S; } - if(rex.is32bits) - j64 = (uint32_t)(addr+i32); + if (rex.is32bits) + j64 = (uint32_t)(addr + i32); else - j64 = addr+i32; + j64 = addr + i32; JUMP((uintptr_t)getAlternate((void*)j64), 0); if (dyn->insts[ninst].x64.jmp_insts == -1) { // out of the block @@ -2255,7 +2263,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0xF4: INST_NAME("HLT"); - SETFLAGS(X_ALL, SF_SET_NODF); // Hack to set flags in "don't care" state + SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Hack to set flags in "don't care" state GETIP(ip); STORE_XEMU_CALL(); CALL(native_priv, -1); @@ -2270,7 +2278,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0: case 1: INST_NAME("TEST Eb, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEB(x1, 1); u8 = F8; MOV32w(x2, u8); @@ -2284,14 +2292,14 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 3: INST_NAME("NEG Eb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEB(x1, 0); emit_neg8(dyn, ninst, x1, x2, x4); EBBACK(); break; case 4: INST_NAME("MUL AL, Ed"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_PENDING, NAT_FLAGS_NOFUSION); GETEB(x1, 0); ANDI(x2, xRAX, 0xff); MUL_W(x1, x2, x1); @@ -2302,7 +2310,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 6: INST_NAME("DIV Eb"); MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_ALL, SF_SET_DF); + SETFLAGS(X_ALL, SF_SET_DF, NAT_FLAGS_NOFUSION); GETEB(x1, 0); CALL(div8, -1); break; @@ -2316,7 +2324,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0: case 1: INST_NAME("TEST Ed, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETED(4); i64 = F32S; emit_test32c(dyn, ninst, rex, ed, i64, x3, x4, x5); @@ -2331,14 +2339,14 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 3: INST_NAME("NEG Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETED(0); emit_neg32(dyn, ninst, rex, ed, x3, x4); WBACK; break; case 4: INST_NAME("MUL EAX, Ed"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_PENDING, NAT_FLAGS_NOFUSION); GETED(0); if (rex.w) { if (ed == xRDX) @@ -2364,7 +2372,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 5: INST_NAME("IMUL EAX, Ed"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_PENDING, NAT_FLAGS_NOFUSION); GETSED(0); if (rex.w) { if (ed == xRDX) @@ -2386,10 +2394,10 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 6: INST_NAME("DIV Ed"); - SETFLAGS(X_ALL, SF_SET); + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); + SET_DFNONE(); // TODO: handle zero divisor if (!rex.w) { - SET_DFNONE(); GETED(0); SLLI_D(x3, xRDX, 32); ZEROUP2(x2, xRAX); @@ -2407,7 +2415,6 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni && dyn->insts[ninst - 1].x64.addr && *(uint8_t*)(dyn->insts[ninst - 1].x64.addr) == 0x31 && *(uint8_t*)(dyn->insts[ninst - 1].x64.addr + 1) == 0xD2) { - SET_DFNONE(); GETED(0); DIV_DU(x2, xRAX, ed); MOD_DU(xRDX, xRAX, ed); @@ -2422,14 +2429,13 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni DIV_DU(x2, xRAX, ed); MOD_DU(xRDX, xRAX, ed); MV(xRAX, x2); - SET_DFNONE(); } } break; case 7: INST_NAME("IDIV Ed"); SKIPTEST(x1); - SETFLAGS(X_ALL, SF_SET); + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); // TODO: handle zero divisor if (!rex.w) { SET_DFNONE() @@ -2479,7 +2485,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0xF8: INST_NAME("CLC"); - SETFLAGS(X_CF, SF_SUBSET); + SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); if (la64_lbt) X64_SET_EFLAGS(xZR, X_CF); @@ -2499,14 +2505,14 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni switch ((nextop >> 3) & 7) { case 0: INST_NAME("INC Eb"); - SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING); + SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING, NAT_FLAGS_FUSION); GETEB(x1, 0); emit_inc8(dyn, ninst, ed, x2, x4, x5); EBBACK(); break; case 1: INST_NAME("DEC Eb"); - SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING); + SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING, NAT_FLAGS_FUSION); GETEB(x1, 0); emit_dec8(dyn, ninst, ed, x2, x4, x5); EBBACK(); @@ -2520,14 +2526,14 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni switch ((nextop >> 3) & 7) { case 0: // INC Ed INST_NAME("INC Ed"); - SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING); + SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING, NAT_FLAGS_FUSION); GETED(0); emit_inc32(dyn, ninst, rex, ed, x3, x4, x5, x6); WBACK; break; case 1: // DEC Ed INST_NAME("DEC Ed"); - SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING); + SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING, NAT_FLAGS_FUSION); GETED(0); emit_dec32(dyn, ninst, rex, ed, x3, x4, x5, x6); WBACK; @@ -2537,7 +2543,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni PASS2IF ((box64_dynarec_safeflags > 1) || ((ninst && dyn->insts[ninst - 1].x64.set_flags) || ((ninst > 1) && dyn->insts[ninst - 2].x64.set_flags)), 1) { READFLAGS(X_PEND); // that's suspicious } else { - SETFLAGS(X_ALL, SF_SET); // Hack to put flag in "don't care" state + SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Hack to put flag in "don't care" state } GETEDz(0); if (box64_dynarec_callret && box64_dynarec_bigblock > 1) { @@ -2558,10 +2564,10 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ADDI_D(x4, x4, j64 & 0xfff); MESSAGE(LOG_NONE, "\tCALLRET set return to +%di\n", j64 >> 2); } else { - j64 = (dyn->insts)?(GETMARK-(dyn->native_size)):0; + j64 = (dyn->insts) ? (GETMARK - (dyn->native_size)) : 0; PCADDU12I(x4, ((j64 + 0x800) >> 12) & 0xfffff); ADDI_D(x4, x4, j64 & 0xfff); - MESSAGE(LOG_NONE, "\tCALLRET set return to +%di\n", j64>>2); + MESSAGE(LOG_NONE, "\tCALLRET set return to +%di\n", j64 >> 2); } ADDI_D(xSP, xSP, -16); ST_D(x4, xSP, 0); @@ -2569,7 +2575,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } PUSH1z(xRIP); jump_to_next(dyn, 0, ed, ninst, rex.is32bits); - if(box64_dynarec_callret && addr >= (dyn->start + dyn->isize)) { + if (box64_dynarec_callret && addr >= (dyn->start + dyn->isize)) { // jumps out of current dynablock... MARK; j64 = getJumpTableAddress64(addr); diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c index 725e7158..b326c71c 100644 --- a/src/dynarec/la64/dynarec_la64_0f.c +++ b/src/dynarec/la64/dynarec_la64_0f.c @@ -37,6 +37,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni uint8_t wb1, wback, wb2, gback; uint8_t eb1, eb2; uint8_t gb1, gb2; + uint8_t tmp1, tmp2, tmp3; int32_t i32, i32_; int cacheupd = 0; int v0, v1; @@ -63,8 +64,8 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x01: // TODO:, /0 is SGDT. While 0F 01 D0 is XGETBV, etc... nextop = F8; - if(MODREG) { - switch(nextop) { + if (MODREG) { + switch (nextop) { case 0xD0: INST_NAME("XGETBV"); BEQZ_MARK(xRCX); @@ -77,7 +78,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni DEFAULT; } } else { - switch((nextop>>3)&7) { + switch ((nextop >> 3) & 7) { default: DEFAULT; } @@ -101,7 +102,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x0B: INST_NAME("UD2"); - SETFLAGS(X_ALL, SF_SET_NODF); // Hack to set flags in "don't care" state + SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Hack to set flags in "don't care" state GETIP(ip); STORE_XEMU_CALL(); CALL(native_ud, -1); @@ -171,7 +172,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni nextop = F8; INST_NAME("MOVLPS Ex,Gx"); GETGX(v0, 0); - if(MODREG) { + if (MODREG) { v1 = sse_get_reg(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 1); VEXTRINS_D(v1, v0, 0); } else { @@ -216,7 +217,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni nextop = F8; INST_NAME("MOVHPS Ex,Gx"); GETGX(v0, 0); - if(MODREG) { + if (MODREG) { v1 = sse_get_reg(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 1); VEXTRINS_D(v1, v0, 0x01); } else { @@ -302,7 +303,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("UCOMISS Gx, Ex"); } - SETFLAGS(X_ALL, SF_SET); + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); SET_DFNONE(); nextop = F8; GETGX(d0, 0); @@ -440,35 +441,45 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } break; -#define GO(GETFLAGS, NO, YES, F, I) \ - READFLAGS(F); \ - if (la64_lbt) { \ - X64_SETJ(x1, I); \ - } else { \ - GETFLAGS; \ - } \ - nextop = F8; \ - GETGD; \ - if (MODREG) { \ - ed = TO_NAT((nextop & 7) + (rex.b << 3)); \ - if (la64_lbt) \ - BEQZ(x1, 8); \ - else \ - B##NO(x1, 8); \ - MV(gd, ed); \ - } else { \ - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x4, &fixedaddress, rex, NULL, 1, 0); \ - if (la64_lbt) \ - BEQZ(x1, 8); \ - else \ - B##NO(x1, 8); \ - LDxw(gd, ed, fixedaddress); \ - } \ +#define GO(GETFLAGS, NO, YES, NATNO, NATYES, F, I) \ + READFLAGS_FUSION(F, x1, x2, x3, x4, x5); \ + if (!dyn->insts[ninst].nat_flags_fusion) { \ + if (la64_lbt) { \ + X64_SETJ(tmp1, I); \ + } else { \ + GETFLAGS; \ + } \ + } \ + nextop = F8; \ + GETGD; \ + if (MODREG) { \ + ed = TO_NAT((nextop & 7) + (rex.b << 3)); \ + if (dyn->insts[ninst].nat_flags_fusion) { \ + NATIVEJUMP(NATNO, 8); \ + } else { \ + if (la64_lbt) \ + BEQZ(tmp1, 8); \ + else \ + B##NO(tmp1, 8); \ + } \ + MV(gd, ed); \ + } else { \ + addr = geted(dyn, addr, ninst, nextop, &ed, tmp2, tmp3, &fixedaddress, rex, NULL, 1, 0); \ + if (dyn->insts[ninst].nat_flags_fusion) { \ + NATIVEJUMP(NATNO, 8); \ + } else { \ + if (la64_lbt) \ + BEQZ(tmp1, 8); \ + else \ + B##NO(tmp1, 8); \ + } \ + LDxw(gd, ed, fixedaddress); \ + } \ if (!rex.w) ZEROUP(gd); GOCOND(0x40, "CMOV", "Gd, Ed"); - #undef GO +#undef GO case 0x50: INST_NAME("MOVMSPKPS Gd, Ex"); @@ -612,52 +623,64 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni VFMAX_S(v0, v0, v1); break; - #define GO(GETFLAGS, NO, YES, F, I) \ - READFLAGS(F); \ - i32_ = F32S; \ - if(rex.is32bits) \ - j64 = (uint32_t)(addr+i32_); \ - else \ - j64 = addr+i32_; \ - BARRIER(BARRIER_MAYBE); \ - JUMP(j64, 1); \ - if (la64_lbt) { \ - X64_SETJ(x1, I); \ - } else { \ - GETFLAGS; \ - } \ - if (dyn->insts[ninst].x64.jmp_insts == -1 || CHECK_CACHE()) { \ - /* out of the block */ \ - i32 = dyn->insts[ninst].epilog - (dyn->native_size); \ - if (la64_lbt) \ - BEQZ_safe(x1, i32); \ - else \ - B##NO##_safe(x1, i32); \ - if (dyn->insts[ninst].x64.jmp_insts == -1) { \ - if (!(dyn->insts[ninst].x64.barrier & BARRIER_FLOAT)) \ - fpu_purgecache(dyn, ninst, 1, x1, x2, x3); \ - jump_to_next(dyn, j64, 0, ninst, rex.is32bits); \ - } else { \ - CacheTransform(dyn, ninst, cacheupd, x1, x2, x3); \ - i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address - (dyn->native_size); \ - B(i32); \ - } \ - } else { \ - /* inside the block */ \ - i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address - (dyn->native_size); \ - if (la64_lbt) \ - BNEZ_safe(x1, i32); \ - else \ - B##YES##_safe(x1, i32); \ - } +#define GO(GETFLAGS, NO, YES, NATNO, NATYES, F, I) \ + READFLAGS_FUSION(F, x1, x2, x3, x4, x5); \ + i32_ = F32S; \ + if (rex.is32bits) \ + j64 = (uint32_t)(addr + i32_); \ + else \ + j64 = addr + i32_; \ + BARRIER(BARRIER_MAYBE); \ + JUMP(j64, 1); \ + if (!dyn->insts[ninst].nat_flags_fusion) { \ + if (la64_lbt) { \ + X64_SETJ(x1, I); \ + } else { \ + GETFLAGS; \ + } \ + } \ + if (dyn->insts[ninst].x64.jmp_insts == -1 || CHECK_CACHE()) { \ + /* out of the block */ \ + i32 = dyn->insts[ninst].epilog - (dyn->native_size); \ + if (dyn->insts[ninst].nat_flags_fusion) { \ + NATIVEJUMP_safe(NATNO, i32); \ + } else { \ + if (la64_lbt) \ + BEQZ_safe(x1, i32); \ + else \ + B##NO##_safe(x1, i32); \ + } \ + if (dyn->insts[ninst].x64.jmp_insts == -1) { \ + if (!(dyn->insts[ninst].x64.barrier & BARRIER_FLOAT)) \ + fpu_purgecache(dyn, ninst, 1, tmp1, tmp2, tmp3); \ + jump_to_next(dyn, j64, 0, ninst, rex.is32bits); \ + } else { \ + CacheTransform(dyn, ninst, cacheupd, tmp1, tmp2, tmp3); \ + i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address - (dyn->native_size); \ + B(i32); \ + } \ + } else { \ + /* inside the block */ \ + i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address - (dyn->native_size); \ + if (dyn->insts[ninst].nat_flags_fusion) { \ + NATIVEJUMP_safe(NATYES, i32); \ + } else { \ + if (la64_lbt) \ + BNEZ_safe(tmp1, i32); \ + else \ + B##YES##_safe(tmp1, i32); \ + } \ + } GOCOND(0x80, "J", "Id"); - #undef GO +#undef GO -#define GO(GETFLAGS, NO, YES, F, I) \ +#define GO(GETFLAGS, NO, YES, NATNO, NATYES, F, I) \ READFLAGS(F); \ + tmp1 = x1; \ + tmp3 = x3; \ if (la64_lbt) { \ X64_SETJ(x3, I); \ } else { \ @@ -682,7 +705,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } GOCOND(0x90, "SET", "Eb"); - #undef GO +#undef GO case 0xA2: INST_NAME("CPUID"); @@ -695,7 +718,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0xA3: INST_NAME("BT Ed, Gd"); - SETFLAGS(X_CF, SF_SUBSET); + SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); nextop = F8; GETGD; @@ -720,7 +743,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni nextop = F8; INST_NAME("SHLD Ed, Gd, Ib"); if (geted_ib(dyn, addr, ninst, nextop)) { - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETED(1); GETGD; u8 = F8; @@ -733,7 +756,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0xAB: INST_NAME("BTS Ed, Gd"); - SETFLAGS(X_CF, SF_SUBSET); + SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); nextop = F8; GETGD; @@ -772,7 +795,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni nextop = F8; INST_NAME("SHRD Ed, Gd, Ib"); if (geted_ib(dyn, addr, ninst, nextop)) { - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETED(1); GETGD; u8 = F8; @@ -847,7 +870,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni fpu_purgecache(dyn, ninst, 0, x1, x2, x3); addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, NULL, 0, 0); if (ed != x1) { MV(x1, ed); } - MOV32w(x2, rex.w?0:1); + MOV32w(x2, rex.w ? 0 : 1); CALL((void*)fpu_xsave, -1); break; case 5: @@ -856,7 +879,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni fpu_purgecache(dyn, ninst, 0, x1, x2, x3); addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, NULL, 0, 0); if (ed != x1) { MV(x1, ed); } - MOV32w(x2, rex.w?0:1); + MOV32w(x2, rex.w ? 0 : 1); CALL((void*)fpu_xrstor, -1); break; case 7: @@ -874,7 +897,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0xAF: INST_NAME("IMUL Gd, Ed"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_PENDING, NAT_FLAGS_NOFUSION); nextop = F8; GETGD; GETED(0); @@ -956,8 +979,8 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni eb2 = 0; } else { ed = (nextop & 7); - eb1 = TO_NAT(ed & 3); // Ax, Cx, Dx or Bx - eb2 = (ed & 4) >> 2; // L or H + eb1 = TO_NAT(ed & 3); // Ax, Cx, Dx or Bx + eb2 = (ed & 4) >> 2; // L or H } BSTRPICK_D(gd, eb1, eb2 * 8 + 7, eb2 * 8); } else { @@ -984,7 +1007,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni switch ((nextop >> 3) & 7) { case 4: INST_NAME("BT Ed, Ib"); - SETFLAGS(X_CF, SF_SUBSET); + SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); GETED(1); u8 = F8; @@ -997,7 +1020,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 5: INST_NAME("BTS Ed, Ib"); - SETFLAGS(X_CF, SF_SUBSET); + SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); GETED(1); u8 = F8; @@ -1019,7 +1042,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 6: INST_NAME("BTR Ed, Ib"); - SETFLAGS(X_CF, SF_SUBSET); + SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); GETED(1); u8 = F8; @@ -1040,7 +1063,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 7: INST_NAME("BTC Ed, Ib"); - SETFLAGS(X_CF, SF_SUBSET); + SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); GETED(1); u8 = F8; @@ -1066,7 +1089,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0xBC: INST_NAME("BSF Gd, Ed"); - SETFLAGS(X_ZF, SF_SUBSET); + SETFLAGS(X_ZF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); nextop = F8; GETED(0); @@ -1097,7 +1120,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0xBD: INST_NAME("BSR Gd, Ed"); - SETFLAGS(X_ZF, SF_SUBSET); + SETFLAGS(X_ZF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); nextop = F8; GETED(0); diff --git a/src/dynarec/la64/dynarec_la64_64.c b/src/dynarec/la64/dynarec_la64_64.c index 19082933..9351017f 100644 --- a/src/dynarec/la64/dynarec_la64_64.c +++ b/src/dynarec/la64/dynarec_la64_64.c @@ -63,7 +63,7 @@ uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni switch (opcode) { case 0x03: INST_NAME("ADD Gd, Seg:Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); grab_segdata(dyn, addr, ninst, x4, seg); nextop = F8; GETGD; @@ -72,7 +72,7 @@ uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x2B: INST_NAME("SUB Gd, Seg:Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); grab_segdata(dyn, addr, ninst, x4, seg); nextop = F8; GETGD; @@ -81,7 +81,7 @@ uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x33: INST_NAME("XOR Gd, Seg:Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); grab_segdata(dyn, addr, ninst, x4, seg); nextop = F8; GETGD; @@ -103,7 +103,7 @@ uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0: // ADD INST_NAME("ADD Eb, Ib"); grab_segdata(dyn, addr, ninst, x1, seg); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEBO(x1, 1); u8 = F8; emit_add8c(dyn, ninst, x1, u8, x2, x4, x5); @@ -112,7 +112,7 @@ uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 1: // OR INST_NAME("OR Eb, Ib"); grab_segdata(dyn, addr, ninst, x1, seg); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEBO(x1, 1); u8 = F8; emit_or8c(dyn, ninst, x1, u8, x2, x4, x5); @@ -122,7 +122,7 @@ uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("ADC Eb, Ib"); grab_segdata(dyn, addr, ninst, x1, seg); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEBO(x1, 1); u8 = F8; emit_adc8c(dyn, ninst, x1, u8, x2, x4, x5, x6); @@ -132,7 +132,7 @@ uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("SBB Eb, Ib"); grab_segdata(dyn, addr, ninst, x1, seg); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEBO(x1, 1); u8 = F8; emit_sbb8c(dyn, ninst, x1, u8, x2, x4, x5, x6); @@ -141,7 +141,7 @@ uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 4: // AND INST_NAME("AND Eb, Ib"); grab_segdata(dyn, addr, ninst, x1, seg); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEBO(x1, 1); u8 = F8; emit_and8c(dyn, ninst, x1, u8, x2, x4); @@ -150,7 +150,7 @@ uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 5: // SUB INST_NAME("SUB Eb, Ib"); grab_segdata(dyn, addr, ninst, x1, seg); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEBO(x1, 1); u8 = F8; emit_sub8c(dyn, ninst, x1, u8, x2, x4, x5, x6); @@ -159,7 +159,7 @@ uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 6: // XOR INST_NAME("XOR Eb, Ib"); grab_segdata(dyn, addr, ninst, x1, seg); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEBO(x1, 1); u8 = F8; emit_xor8c(dyn, ninst, x1, u8, x2, x4); @@ -168,7 +168,7 @@ uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 7: // CMP INST_NAME("CMP Eb, Ib"); grab_segdata(dyn, addr, ninst, x1, seg); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEBO(x1, 1); u8 = F8; if (u8) { @@ -193,7 +193,7 @@ uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("ADD Ed, Ib"); } - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEDO(x6, (opcode == 0x81) ? 4 : 1); if (opcode == 0x81) i64 = F32S; @@ -208,7 +208,7 @@ uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("OR Ed, Ib"); } - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEDO(x6, (opcode == 0x81) ? 4 : 1); if (opcode == 0x81) i64 = F32S; @@ -224,7 +224,7 @@ uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("ADC Ed, Ib"); } READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEDO(x6, (opcode == 0x81) ? 4 : 1); if (opcode == 0x81) i64 = F32S; @@ -246,7 +246,7 @@ uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("SBB Ed, Ib"); } READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEDO(x6, (opcode == 0x81) ? 4 : 1); if (opcode == 0x81) i64 = F32S; @@ -262,7 +262,7 @@ uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("AND Ed, Ib"); } - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEDO(x6, (opcode == 0x81) ? 4 : 1); if (opcode == 0x81) i64 = F32S; @@ -277,7 +277,7 @@ uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("SUB Ed, Ib"); } - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEDO(x6, (opcode == 0x81) ? 4 : 1); if (opcode == 0x81) i64 = F32S; @@ -292,7 +292,7 @@ uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("XOR Ed, Ib"); } - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEDO(x6, (opcode == 0x81) ? 4 : 1); if (opcode == 0x81) i64 = F32S; @@ -307,7 +307,7 @@ uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("CMP Ed, Ib"); } - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEDO(x6, (opcode == 0x81) ? 4 : 1); if (opcode == 0x81) i64 = F32S; diff --git a/src/dynarec/la64/dynarec_la64_66.c b/src/dynarec/la64/dynarec_la64_66.c index ef8f5c8d..18322121 100644 --- a/src/dynarec/la64/dynarec_la64_66.c +++ b/src/dynarec/la64/dynarec_la64_66.c @@ -60,7 +60,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni switch (opcode) { case 0x01: INST_NAME("ADD Ew, Gw"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGW(x2); GETEW(x1, 0); @@ -69,7 +69,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x03: INST_NAME("ADD Gw, Ew"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGW(x1); GETEW(x2, 0); @@ -78,7 +78,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x05: INST_NAME("ADD AX, Iw"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i32 = F16; BSTRPICK_D(x1, xRAX, 15, 0); MOV32w(x2, i32); @@ -87,7 +87,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x09: INST_NAME("OR Ew, Gw"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGW(x2); GETEW(x1, 0); @@ -96,7 +96,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x0B: INST_NAME("OR Gw, Ew"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGW(x1); GETEW(x2, 0); @@ -105,7 +105,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x0D: INST_NAME("OR AX, Iw"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i32 = F16; BSTRPICK_D(x1, xRAX, 15, 0); MOV32w(x2, i32); @@ -122,7 +122,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x11: INST_NAME("ADC Ew, Gw"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGW(x2); GETEW(x1, 0); @@ -132,7 +132,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x13: INST_NAME("ADC Gw, Ew"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGW(x1); GETEW(x2, 0); @@ -142,7 +142,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x15: INST_NAME("ADC AX, Iw"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); u64 = F16; BSTRPICK_D(x1, xRAX, 15, 0); MOV64x(x2, u64); @@ -152,7 +152,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x19: INST_NAME("SBB Ew, Gw"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGW(x2); GETEW(x1, 0); @@ -162,7 +162,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x1B: INST_NAME("SBB Gw, Ew"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGW(x1); GETEW(x2, 0); @@ -172,7 +172,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x1D: INST_NAME("SBB AX, Iw"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); BSTRPICK_D(x1, xRAX, 15, 0); u64 = F16; MOV64x(x2, u64); @@ -181,7 +181,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x21: INST_NAME("AND Ew, Gw"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGW(x2); GETEW(x1, 0); @@ -190,7 +190,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x23: INST_NAME("AND Gw, Ew"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGW(x1); GETEW(x2, 0); @@ -199,7 +199,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x25: INST_NAME("AND AX, Iw"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i32 = F16; BSTRPICK_D(x1, xRAX, 15, 0); MOV32w(x2, i32); @@ -208,7 +208,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x29: INST_NAME("SUB Ew, Gw"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGW(x1); GETEW(x2, 0); @@ -217,7 +217,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x2B: INST_NAME("SUB Gw, Ew"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGW(x1); GETEW(x2, 0); @@ -226,7 +226,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x2D: INST_NAME("SUB AX, Iw"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i32 = F16; BSTRPICK_D(x1, xRAX, 15, 0); MOV32w(x2, i32); @@ -235,7 +235,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x31: INST_NAME("XOR Ew, Gw"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGW(x2); GETEW(x1, 0); @@ -244,7 +244,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x33: INST_NAME("XOR Gw, Ew"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGW(x1); GETEW(x2, 0); @@ -253,7 +253,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x35: INST_NAME("XOR AX, Iw"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i32 = F16; BSTRPICK_D(x1, xRAX, 15, 0); MOV32w(x2, i32); @@ -262,7 +262,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x39: INST_NAME("CMP Ew, Gw"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGW(x2); GETEW(x1, 0); @@ -270,7 +270,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x3B: INST_NAME("CMP Gw, Ew"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGW(x1); GETEW(x2, 0); @@ -278,7 +278,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x3D: INST_NAME("CMP AX, Iw"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i32 = F16; BSTRPICK_D(x1, xRAX, 15, 0); if (i32) { @@ -304,7 +304,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("IMUL Gw,Ew,Ib"); } - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_PENDING, NAT_FLAGS_NOFUSION); nextop = F8; GETSEW(x1, (opcode == 0x69) ? 2 : 1); if (opcode == 0x69) @@ -328,7 +328,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("ADD Ew, Ib"); } - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEW(x1, (opcode == 0x81) ? 2 : 1); if (opcode == 0x81) u64 = F16; @@ -344,7 +344,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("OR Ew, Ib"); } - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEW(x1, (opcode == 0x81) ? 2 : 1); if (opcode == 0x81) u64 = F16; @@ -361,7 +361,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("ADC Ew, Ib"); } READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEW(x1, (opcode == 0x81) ? 2 : 1); if (opcode == 0x81) u64 = F16; @@ -378,7 +378,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("SBB Ew, Ib"); } READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEW(x1, (opcode == 0x81) ? 2 : 1); if (opcode == 0x81) u64 = F16; @@ -394,7 +394,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("AND Ew, Ib"); } - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEW(x1, (opcode == 0x81) ? 2 : 1); if (opcode == 0x81) u64 = F16; @@ -410,7 +410,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("SUB Ew, Ib"); } - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEW(x1, (opcode == 0x81) ? 2 : 1); if (opcode == 0x81) u64 = F16; @@ -426,7 +426,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("XOR Ew, Ib"); } - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEW(x1, (opcode == 0x81) ? 2 : 1); if (opcode == 0x81) u64 = F16; @@ -442,7 +442,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("CMP Ew, Ib"); } - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEW(x1, (opcode == 0x81) ? 2 : 1); if (opcode == 0x81) u64 = F16; @@ -460,7 +460,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x85: INST_NAME("TEST Ew, Gw"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETEW(x1, 0); GETGW(x2); @@ -579,7 +579,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0xA9: INST_NAME("TEST AX,Iw"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); u16 = F16; MOV32w(x2, u16); BSTRPICK_D(x1, xRAX, 15, 0); @@ -630,7 +630,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0: INST_NAME("ROL Ew, Ib"); MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_OF | X_CF, SF_SET_DF); + SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); GETEW(x1, 1); u8 = F8; MOV32w(x2, u8); @@ -640,7 +640,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 1: INST_NAME("ROR Ew, Ib"); MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_OF | X_CF, SF_SET_DF); + SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); GETEW(x1, 1); u8 = F8; MOV32w(x2, u8); @@ -651,7 +651,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 6: INST_NAME("SHL Ew, Ib"); if (geted_ib(dyn, addr, ninst, nextop) & 0x1f) { - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined GETEW(x1, 0); u8 = (F8) & 0x1f; emit_shl16c(dyn, ninst, x1, u8, x5, x4, x6); @@ -664,7 +664,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 5: INST_NAME("SHR Ew, Ib"); if (geted_ib(dyn, addr, ninst, nextop) & 0x1f) { - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined GETEW(x1, 0); u8 = (F8) & 0x1f; emit_shr16c(dyn, ninst, x1, u8, x5, x4, x6); @@ -676,9 +676,8 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 7: INST_NAME("SAR Ew, Ib"); - SETFLAGS(X_ALL, SF_PENDING); if (geted_ib(dyn, addr, ninst, nextop) & 0x1f) { - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined GETSEW(x1, 0); u8 = (F8) & 0x1f; emit_sar16c(dyn, ninst, x1, u8, x5, x4, x6); @@ -721,7 +720,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ANDI(x2, xRCX, 0x1f); BEQ_NEXT(x2, xZR); } - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined if (box64_dynarec_safeflags > 1) MAYSETFLAGS(); GETEW(x1, 0); emit_shr16(dyn, ninst, x1, x2, x5, x4, x6); @@ -737,7 +736,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ANDI(x2, xRCX, 0x1f); BEQ_NEXT(x2, xZR); } - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined if (box64_dynarec_safeflags > 1) MAYSETFLAGS(); GETEW(x1, 0); @@ -753,7 +752,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ANDI(x2, xRCX, 0x1f); BEQ_NEXT(x2, xZR); } - SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined if (box64_dynarec_safeflags > 1) MAYSETFLAGS(); GETSEW(x1, 0); @@ -770,7 +769,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0: case 1: INST_NAME("TEST Ew, Iw"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEW(x1, 2); u16 = F16; MOV32w(x2, u16); @@ -778,14 +777,14 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 3: INST_NAME("NEG Ew"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); GETEW(x1, 0); emit_neg16(dyn, ninst, ed, x2, x4); EWBACK; break; case 6: INST_NAME("DIV Ew"); - SETFLAGS(X_ALL, SF_SET); + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); SET_DFNONE(); GETEW(x1, 0); BSTRPICK_D(x2, xRAX, 15, 0); @@ -810,7 +809,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 7: INST_NAME("IDIV Ew"); NOTEST(x1); - SETFLAGS(X_ALL, SF_SET); + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); SET_DFNONE(); GETSEW(x1, 0); if (box64_dynarec_div0) { @@ -840,14 +839,14 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni switch ((nextop >> 3) & 7) { case 0: INST_NAME("INC Ew"); - SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING); + SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING, NAT_FLAGS_FUSION); GETEW(x1, 0); emit_inc16(dyn, ninst, x1, x2, x4, x5); EWBACK; break; case 1: INST_NAME("DEC Ew"); - SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING); + SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING, NAT_FLAGS_FUSION); GETEW(x1, 0); emit_dec16(dyn, ninst, x1, x2, x4, x5, x6); EWBACK; diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c index 03261d5e..90c7fdad 100644 --- a/src/dynarec/la64/dynarec_la64_660f.c +++ b/src/dynarec/la64/dynarec_la64_660f.c @@ -34,6 +34,7 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int uint8_t gd, ed; uint8_t wback, wb1, wb2, gback; uint8_t eb1, eb2; + uint8_t tmp1, tmp2, tmp3; int64_t j64; uint64_t tmp64u, tmp64u2; int v0, v1, v2; @@ -190,7 +191,7 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int } else { INST_NAME("UCOMISD Gx, Ex"); } - SETFLAGS(X_ALL, SF_SET); + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); SET_DFNONE(); nextop = F8; GETGX(d0, 0); @@ -358,7 +359,7 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x17: INST_NAME("PTEST Gx, Ex"); nextop = F8; - SETFLAGS(X_ALL, SF_SET); + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); GETGX(q0, 0); GETEX(q1, 0, 0); if (!la64_lbt) { @@ -730,8 +731,10 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int } break; -#define GO(GETFLAGS, NO, YES, F, I) \ +#define GO(GETFLAGS, NO, YES, NATNO, NATYES, F, I) \ READFLAGS(F); \ + tmp1 = x1; \ + tmp3 = x3; \ if (la64_lbt) { \ X64_SETJ(x1, I); \ } else { \ @@ -1300,7 +1303,7 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0xAF: INST_NAME("IMUL Gw,Ew"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_PENDING, NAT_FLAGS_NOFUSION); nextop = F8; GETSEW(x1, 0); GETSGW(x2); diff --git a/src/dynarec/la64/dynarec_la64_67.c b/src/dynarec/la64/dynarec_la64_67.c index 7ba065d6..899ed70b 100644 --- a/src/dynarec/la64/dynarec_la64_67.c +++ b/src/dynarec/la64/dynarec_la64_67.c @@ -65,7 +65,7 @@ uintptr_t dynarec64_67(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x19: INST_NAME("SBB Ed, Gd"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED32(0); @@ -75,7 +75,7 @@ uintptr_t dynarec64_67(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x1A: INST_NAME("SBB Gb, Eb"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETEB32(x2, 0); GETGB(x1); @@ -85,7 +85,7 @@ uintptr_t dynarec64_67(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x1B: INST_NAME("SBB Gd, Ed"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; GETED32(0); @@ -94,7 +94,7 @@ uintptr_t dynarec64_67(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x1D: INST_NAME("SBB EAX, Id"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); i64 = F32S; MOV64xw(x2, i64); emit_sbb32(dyn, ninst, rex, xRAX, x2, x3, x4, x5); @@ -162,7 +162,7 @@ uintptr_t dynarec64_67(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni switch ((nextop >> 3) & 7) { case 4: INST_NAME("MUL EAX, Ed"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_PENDING, NAT_FLAGS_NOFUSION); GETED32(0); if (rex.w) { if (ed == xRDX) diff --git a/src/dynarec/la64/dynarec_la64_emit_logic.c b/src/dynarec/la64/dynarec_la64_emit_logic.c index a9ee3fa3..b92c2e48 100644 --- a/src/dynarec/la64/dynarec_la64_emit_logic.c +++ b/src/dynarec/la64/dynarec_la64_emit_logic.c @@ -25,9 +25,10 @@ // emit XOR8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch, s4 can be same as s2 (and so s2 destroyed) void emit_xor8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { SET_DF(s4, d_xor8); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -62,9 +63,10 @@ void emit_xor8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) // emit XOR8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch void emit_xor8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { SET_DF(s4, d_xor8); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -103,9 +105,10 @@ void emit_xor8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s3, int s // emit XOR16 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch, s4 can be same as s2 (and so s2 destroyed) void emit_xor16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { SET_DF(s4, d_xor16); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -143,9 +146,10 @@ void emit_xor16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, // emit XOR32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch void emit_xor32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX(X_PEND) { SET_DF(s4, rex.w ? d_xor64 : d_xor32); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -194,9 +198,10 @@ void emit_xor32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s // emit XOR32 instruction, from s1, c, store result in s1 using s3 and s4 as scratch void emit_xor32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { SET_DF(s4, rex.w ? d_xor64 : d_xor32); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -260,9 +265,10 @@ void emit_xor32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i // emit AND8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch, s4 can be same as s2 (and so s2 destroyed) void emit_and8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX(X_PEND) { SET_DF(s3, d_and8); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -297,9 +303,10 @@ void emit_and8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) // emit AND8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch void emit_and8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4) { - IFX(X_PEND) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX (X_PEND) { SET_DF(s3, d_and8); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -334,9 +341,10 @@ void emit_and8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s3, int s void emit_and16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { SET_DF(s3, d_tst16); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -371,9 +379,10 @@ void emit_and16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) // emit AND32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch void emit_and32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4) { - IFX(X_PEND) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX (X_PEND) { SET_DF(s3, rex.w ? d_tst64 : d_tst32); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -413,9 +422,10 @@ void emit_and32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s // emit AND32 instruction, from s1, c, store result in s1 using s3 and s4 as scratch void emit_and32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4) { - IFX(X_PEND) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX (X_PEND) { SET_DF(s3, rex.w ? d_tst64 : d_tst32); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -459,9 +469,10 @@ void emit_and32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i // emit OR16 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch, s4 can be same as s2 (and so s2 destroyed) void emit_or16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { SET_DF(s3, d_or16); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -496,9 +507,10 @@ void emit_or16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) // emit OR32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch void emit_or32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4) { - IFX(X_PEND) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX (X_PEND) { SET_DF(s4, rex.w?d_or64:d_or32); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -541,9 +553,10 @@ void emit_or32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 // emit OR32 instruction, from s1, c, store result in s1 using s3 and s4 as scratch void emit_or32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4) { - IFX(X_PEND) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX (X_PEND) { SET_DF(s4, rex.w ? d_or64 : d_or32); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -595,9 +608,10 @@ void emit_or32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in // emit OR8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch, s4 can be same as s2 (and so s2 destroyed) void emit_or8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { SET_DF(s3, d_or8); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } diff --git a/src/dynarec/la64/dynarec_la64_emit_math.c b/src/dynarec/la64/dynarec_la64_emit_math.c index df2e1d5d..cbdfaec2 100644 --- a/src/dynarec/la64/dynarec_la64_emit_math.c +++ b/src/dynarec/la64/dynarec_la64_emit_math.c @@ -25,7 +25,8 @@ // emit ADD32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch void emit_add32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5) { - IFX(X_PEND) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX (X_PEND) { if (rex.w) { ST_D(s1, xEmu, offsetof(x64emu_t, op1)); ST_D(s2, xEmu, offsetof(x64emu_t, op2)); @@ -34,12 +35,12 @@ void emit_add32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s ST_W(s2, xEmu, offsetof(x64emu_t, op2)); } SET_DF(s3, rex.w ? d_add64 : d_add32b); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } if (la64_lbt) { - IFX(X_ALL) { + IFX (X_ALL) { if (rex.w) X64_ADD_DU(s1, s2); else @@ -48,14 +49,13 @@ void emit_add32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s ADDxw(s1, s1, s2); if (!rex.w) ZEROUP(s1); - IFX(X_PEND) + IFX (X_PEND) SDxw(s1, xEmu, offsetof(x64emu_t, res)); return; } CLEAR_FLAGS(s3); - IFX(X_CF) - { + IFX (X_CF) { if (rex.w) { ZEROUP2(s5, s1); ZEROUP2(s4, s2); @@ -77,30 +77,25 @@ void emit_add32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s ORI(xFlags, xFlags, 1 << F_CF); } } - IFX(X_AF | X_OF) - { + IFX (X_AF | X_OF) { OR(s3, s1, s2); // s3 = op1 | op2 AND(s4, s1, s2); // s4 = op1 & op2 } ADDxw(s1, s1, s2); - IFX(X_PEND) - { + IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } - IFX(X_AF | X_OF) - { + IFX (X_AF | X_OF) { ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2) - IFX(X_AF) - { + IFX (X_AF) { ANDI(s4, s3, 0x08); // AF: cc & 0x08 BEQZ(s4, 8); ORI(xFlags, xFlags, 1 << F_AF); } - IFX(X_OF) - { + IFX (X_OF) { SRLI_D(s3, s3, rex.w ? 62 : 30); SRLI_D(s4, s3, 1); XOR(s3, s3, s4); @@ -109,20 +104,17 @@ void emit_add32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s ORI(xFlags, xFlags, 1 << F_OF); } } - IFX(X_SF) - { + IFX (X_SF) { BGE(s1, xZR, 8); ORI(xFlags, xFlags, 1 << F_SF); } if (!rex.w) { ZEROUP(s1); } - IFX(X_PF) - { + IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } - IFX(X_ZF) - { + IFX (X_ZF) { BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } @@ -131,6 +123,7 @@ void emit_add32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s // emit ADD32 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch void emit_add32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); if (s1 == xRSP && (!dyn->insts || dyn->insts[ninst].x64.gen_flags == X_PEND)) { // special case when doing math on ESP and only PEND is needed: ignoring it! if (c >= -2048 && c < 2048) { @@ -151,12 +144,12 @@ void emit_add32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SDxw(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, rex.w ? d_add64 : d_add32b); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } if (la64_lbt) { - IFX(X_ALL) { + IFX (X_ALL) { if (rex.w) X64_ADD_DU(s1, s2); else @@ -200,9 +193,8 @@ void emit_add32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i if (c >= -2048 && c < 2048) { ADDIxw(s1, s1, c); } else { - IFX(X_PEND | X_AF | X_CF | X_OF) { } - else - { + IFX (X_PEND | X_AF | X_CF | X_OF) { + } else { MOV64xw(s2, c); } ADDxw(s1, s1, s2); @@ -247,41 +239,42 @@ void emit_add32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i // emit ADD8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch void emit_add8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) { - IFX(X_PEND) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX (X_PEND) { ST_B(s1, xEmu, offsetof(x64emu_t, op1)); ST_B(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, d_add8); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } if (la64_lbt) { - IFX(X_ALL) { + IFX (X_ALL) { X64_ADD_B(s1, s2); } ADD_D(s1, s1, s2); ANDI(s1, s1, 0xff); - IFX(X_PEND) + IFX (X_PEND) ST_H(s1, xEmu, offsetof(x64emu_t, res)); return; } CLEAR_FLAGS(s3); - IFX(X_AF | X_OF) { + IFX (X_AF | X_OF) { OR(s3, s1, s2); // s3 = op1 | op2 AND(s4, s1, s2); // s4 = op1 & op2 } ADD_D(s1, s1, s2); - IFX(X_AF | X_OF) { + IFX (X_AF | X_OF) { ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2) - IFX(X_AF) { + IFX (X_AF) { ANDI(s4, s3, 0x08); // AF: cc & 0x08 BEQZ(s4, 8); ORI(xFlags, xFlags, 1 << F_AF); } - IFX(X_OF) { + IFX (X_OF) { SRLI_D(s3, s3, 6); SRLI_D(s4, s3, 1); XOR(s3, s3, s4); @@ -290,25 +283,25 @@ void emit_add8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) ORI(xFlags, xFlags, 1 << F_OF); } } - IFX(X_CF) { + IFX (X_CF) { SRLI_D(s3, s1, 8); BEQZ(s3, 8); ORI(xFlags, xFlags, 1 << F_CF); } - IFX(X_PEND) { + IFX (X_PEND) { ST_H(s1, xEmu, offsetof(x64emu_t, res)); } ANDI(s1, s1, 0xff); - IFX(X_ZF) { + IFX (X_ZF) { BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } - IFX(X_SF) { + IFX (X_SF) { SRLI_D(s3, s1, 7); BEQZ(s3, 8); ORI(xFlags, xFlags, 1 << F_SF); } - IFX(X_PF) { + IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } } @@ -316,48 +309,51 @@ void emit_add8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) // emit ADD8 instruction, from s1, const c, store result in s1 using s3 and s4 as scratch void emit_add8c(dynarec_la64_t* dyn, int ninst, int s1, int c, int s2, int s3, int s4) { - IFX(X_PEND) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX (X_PEND) { MOV32w(s4, c & 0xff); ST_B(s1, xEmu, offsetof(x64emu_t, op1)); ST_B(s4, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, d_add8); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } if (la64_lbt) { - IFX(X_ALL) { - IFX(X_PEND) {} else { MOV32w(s4, c & 0xff); } + IFX (X_ALL) { + IFX (X_PEND) { + } else { + MOV32w(s4, c & 0xff); + } X64_ADD_B(s1, s4); } ADDI_D(s1, s1, c & 0xff); ANDI(s1, s1, 0xff); - IFX(X_PEND) + IFX (X_PEND) ST_H(s1, xEmu, offsetof(x64emu_t, res)); return; } CLEAR_FLAGS(s3); - IFX(X_AF | X_OF) - { - IFX(X_PEND) {} else { MOV32w(s4, c & 0xff); } + IFX (X_AF | X_OF) { + IFX (X_PEND) { + } else { + MOV32w(s4, c & 0xff); + } OR(s3, s1, s4); // s3 = op1 | op2 AND(s4, s1, s4); // s4 = op1 & op2 } ADDI_D(s1, s1, c & 0xff); - IFX(X_AF | X_OF) - { + IFX (X_AF | X_OF) { ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2) - IFX(X_AF) - { + IFX (X_AF) { ANDI(s4, s3, 0x08); // AF: cc & 0x08 BEQZ(s4, 8); ORI(xFlags, xFlags, 1 << F_AF); } - IFX(X_OF) - { + IFX (X_OF) { SRLI_D(s3, s3, 6); SRLI_D(s4, s3, 1); XOR(s3, s3, s4); @@ -366,30 +362,25 @@ void emit_add8c(dynarec_la64_t* dyn, int ninst, int s1, int c, int s2, int s3, i ORI(xFlags, xFlags, 1 << F_OF); } } - IFX(X_CF) - { + IFX (X_CF) { SRLI_D(s3, s1, 8); BEQZ(s3, 8); ORI(xFlags, xFlags, 1 << F_CF); } - IFX(X_PEND) - { + IFX (X_PEND) { ST_H(s1, xEmu, offsetof(x64emu_t, res)); } ANDI(s1, s1, 0xff); - IFX(X_ZF) - { + IFX (X_ZF) { BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } - IFX(X_SF) - { + IFX (X_SF) { SRLI_D(s3, s1, 7); BEQZ(s3, 8); ORI(xFlags, xFlags, 1 << F_SF); } - IFX(X_PF) - { + IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } } @@ -397,11 +388,12 @@ void emit_add8c(dynarec_la64_t* dyn, int ninst, int s1, int c, int s2, int s3, i // emit ADD16 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch void emit_add16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { ST_H(s1, xEmu, offsetof(x64emu_t, op1)); ST_H(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, d_add16); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } IFXA (X_AF | X_OF, !la64_lbt) { @@ -468,47 +460,48 @@ void emit_add16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, // emit SUB8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch void emit_sub8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) { - IFX(X_PEND) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX (X_PEND) { ST_B(s1, xEmu, offsetof(x64emu_t, op1)); ST_B(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, d_sub8); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } if (la64_lbt) { - IFX(X_ALL) { + IFX (X_ALL) { X64_SUB_B(s1, s2); } SUB_D(s1, s1, s2); ANDI(s1, s1, 0xff); - IFX(X_PEND) + IFX (X_PEND) ST_H(s1, xEmu, offsetof(x64emu_t, res)); return; } CLEAR_FLAGS(s3); - IFX(X_AF | X_CF | X_OF) { + IFX (X_AF | X_CF | X_OF) { // for later flag calculation NOR(s5, xZR, s1); } SUB_D(s1, s1, s2); ANDI(s1, s1, 0xff); - IFX(X_SF) { + IFX (X_SF) { SRLI_D(s3, s1, 7); BEQZ(s3, 8); ORI(xFlags, xFlags, 1 << F_SF); } - IFX(X_PEND) { + IFX (X_PEND) { ST_B(s1, xEmu, offsetof(x64emu_t, res)); } CALC_SUB_FLAGS(s5, s2, s1, s3, s4, 8); - IFX(X_ZF) { + IFX (X_ZF) { BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } - IFX(X_PF) { + IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } } @@ -516,18 +509,19 @@ void emit_sub8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i // emit SUB8 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch void emit_sub8c(dynarec_la64_t* dyn, int ninst, int s1, int c, int s2, int s3, int s4, int s5) { - MOV32w(s2, c&0xff); + MOV32w(s2, c & 0xff); emit_sub8(dyn, ninst, s1, s2, s3, s4, s5); } // emit SUB16 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch void emit_sub16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { ST_H(s1, xEmu, offsetof(x64emu_t, op1)); ST_H(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, d_sub16); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -571,16 +565,17 @@ void emit_sub16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, // emit SUB32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch void emit_sub32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5) { - IFX(X_PEND) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SDxw(s2, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s3, rex.w?d_sub64:d_sub32); - } else IFX(X_ALL) { + SET_DF(s3, rex.w ? d_sub64 : d_sub32); + } else IFXORNAT (X_ALL) { SET_DFNONE(); } if (la64_lbt) { - IFX(X_ALL) { + IFX (X_ALL) { if (rex.w) X64_SUB_DU(s1, s2); else @@ -589,34 +584,34 @@ void emit_sub32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SUBxw(s1, s1, s2); if (!rex.w) ZEROUP(s1); - IFX(X_PEND) + IFX (X_PEND) SDxw(s1, xEmu, offsetof(x64emu_t, res)); return; } CLEAR_FLAGS(s3); - IFX(X_AF | X_CF | X_OF) { + IFX (X_AF | X_CF | X_OF) { // for later flag calculation NOR(s5, xZR, s1); } SUBxw(s1, s1, s2); - IFX(X_PEND) { + IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } - IFX(X_SF) { + IFX (X_SF) { BGE(s1, xZR, 8); ORI(xFlags, xFlags, 1 << F_SF); } if (!rex.w) { ZEROUP(s1); } - CALC_SUB_FLAGS(s5, s2, s1, s3, s4, rex.w?64:32); - IFX(X_ZF) { + CALC_SUB_FLAGS(s5, s2, s1, s3, s4, rex.w ? 64 : 32); + IFX (X_ZF) { BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } - IFX(X_PF) { + IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } } @@ -624,8 +619,8 @@ void emit_sub32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s // emit SUB32 instruction, from s1, constant c, store result in s1 using s2, s3, s4 and s5 as scratch void emit_sub32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5) { - if(s1==xRSP && (!dyn->insts || dyn->insts[ninst].x64.gen_flags==X_PEND)) - { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + if (s1 == xRSP && (!dyn->insts || dyn->insts[ninst].x64.gen_flags == X_PEND)) { // special case when doing math on RSP and only PEND is needed: ignoring it! if (c > -2048 && c <= 2048) { ADDI_D(s1, s1, -c); @@ -637,18 +632,21 @@ void emit_sub32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i return; } - IFX(X_PEND) { + IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, op1)); MOV64xw(s2, c); SDxw(s2, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s3, rex.w?d_sub64:d_sub32); - } else IFX(X_ALL) { + SET_DF(s3, rex.w ? d_sub64 : d_sub32); + } else IFXORNAT (X_ALL) { SET_DFNONE(); } if (la64_lbt) { - IFX(X_PEND) {} else {MOV64xw(s2, c);} - IFX(X_ALL) { + IFX (X_PEND) { + } else { + MOV64xw(s2, c); + } + IFX (X_ALL) { if (rex.w) { X64_SUB_DU(s1, s2); } else { @@ -658,13 +656,13 @@ void emit_sub32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i SUBxw(s1, s1, s2); if (!rex.w) ZEROUP(s1); - IFX(X_PEND) + IFX (X_PEND) SDxw(s1, xEmu, offsetof(x64emu_t, res)); return; } CLEAR_FLAGS(s3); - IFX(X_AF | X_CF | X_OF) { + IFX (X_AF | X_CF | X_OF) { // for later flag calculation NOR(s5, xZR, s1); } @@ -672,32 +670,35 @@ void emit_sub32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i if (c > -2048 && c <= 2048) { ADDIxw(s1, s1, -c); } else { - IFX(X_PEND) {} else {MOV64xw(s2, c);} + IFX (X_PEND) { + } else { + MOV64xw(s2, c); + } SUBxw(s1, s1, s2); } - IFX(X_AF | X_CF | X_OF) { - IFX(X_PEND) {} - else if (c > -2048 && c <= 2048) { + IFX (X_AF | X_CF | X_OF) { + IFX (X_PEND) { + } else if (c > -2048 && c <= 2048) { MOV64xw(s2, c); } } - IFX(X_PEND) { + IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } - IFX(X_SF) { + IFX (X_SF) { BGE(s1, xZR, 8); ORI(xFlags, xFlags, 1 << F_SF); } if (!rex.w) { ZEROUP(s1); } - CALC_SUB_FLAGS(s5, s2, s1, s3, s4, rex.w?64:32); - IFX(X_ZF) { + CALC_SUB_FLAGS(s5, s2, s1, s3, s4, rex.w ? 64 : 32); + IFX (X_ZF) { BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } - IFX(X_PF) { + IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } } @@ -706,11 +707,12 @@ void emit_sub32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i // emit SBB8 instruction, from s1, s2, store result in s1 using s3, s4 and s5 as scratch void emit_sbb8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { ST_B(s1, xEmu, offsetof(x64emu_t, op1)); ST_B(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, d_sbb8); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -767,11 +769,12 @@ void emit_sbb8c(dynarec_la64_t* dyn, int ninst, int s1, int c, int s3, int s4, i // emit SBB16 instruction, from s1, s2, store result in s1 using s3, s4 and s5 as scratch void emit_sbb16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { ST_H(s1, xEmu, offsetof(x64emu_t, op1)); ST_H(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, d_sbb16); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -821,11 +824,12 @@ void emit_sbb16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, // emit SBB32 instruction, from s1, s2, store result in s1 using s3, s4 and s5 as scratch void emit_sbb32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SDxw(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, rex.w ? d_sbb64 : d_sbb32); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -886,10 +890,11 @@ void emit_sbb32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s // emit NEG8 instruction, from s1, store result in s1 using s2 and s3 as scratch void emit_neg8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { ST_B(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s3, d_neg8); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } IFX (X_AF | X_OF) { @@ -948,10 +953,11 @@ void emit_neg8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3) // emit NEG32 instruction, from s1, store result in s1 using s2 and s3 as scratch void emit_neg32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s3, rex.w ? d_neg64 : d_neg32); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -1021,11 +1027,12 @@ void emit_neg32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s // emit ADC8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch void emit_adc8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { ST_H(s1, xEmu, offsetof(x64emu_t, op1)); ST_H(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, d_adc8); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -1104,11 +1111,12 @@ void emit_adc8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s3, int s // emit ADC16 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch void emit_adc16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { ST_H(s1, xEmu, offsetof(x64emu_t, op1)); ST_H(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, d_adc16); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -1180,11 +1188,12 @@ void emit_adc16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, // emit ADC32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch void emit_adc32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5, int s6) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SDxw(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, rex.w ? d_adc64 : d_adc32b); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -1286,10 +1295,11 @@ void emit_adc32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s // emit INC8 instruction, from s1, store result in s1 using s2, s3 and s4 as scratch void emit_inc8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { ST_B(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s3, d_inc8); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } IFXA (X_AF | X_OF, !la64_lbt) { @@ -1352,10 +1362,11 @@ void emit_inc8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) // emit INC16 instruction, from s1, store result in s1 using s3 and s4 as scratch void emit_inc16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { ST_H(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s3, d_inc16); - } else IFX (X_ZF | X_OF | X_AF | X_SF | X_PF) { + } else IFXORNAT (X_ZF | X_OF | X_AF | X_SF | X_PF) { SET_DFNONE(); } IFXA (X_AF | X_OF, !la64_lbt) { @@ -1421,10 +1432,11 @@ void emit_inc16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) // emit INC32 instruction, from s1, store result in s1 using s3 and s4 as scratch void emit_inc32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s3, rex.w ? d_inc64 : d_inc32); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -1493,10 +1505,11 @@ void emit_inc32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s // emit DEC8 instruction, from s1, store result in s1 using s2, s3 and s4 as scratch void emit_dec8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { ST_B(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s3, d_dec8); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } IFXA (X_AF | X_OF, !la64_lbt) { @@ -1560,10 +1573,11 @@ void emit_dec8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) // emit DEC16 instruction, from s1, store result in s1 using s3 and s4 as scratch void emit_dec16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { ST_H(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s3, d_dec16); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } IFX (X_AF | X_OF) { @@ -1628,10 +1642,11 @@ void emit_dec16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, // emit DEC32 instruction, from s1, store result in s1 using s3 and s4 as scratch void emit_dec32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s3, rex.w ? d_dec64 : d_dec32); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } IFXA (X_AF | X_OF, !la64_lbt) { @@ -1701,10 +1716,11 @@ void emit_dec32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s // emit NEG16 instruction, from s1, store result in s1 using s2 and s3 as scratch void emit_neg16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { ST_H(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s3, d_neg16); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } IFX (X_AF | X_OF) { diff --git a/src/dynarec/la64/dynarec_la64_emit_shift.c b/src/dynarec/la64/dynarec_la64_emit_shift.c index c15c630c..d1fd7290 100644 --- a/src/dynarec/la64/dynarec_la64_emit_shift.c +++ b/src/dynarec/la64/dynarec_la64_emit_shift.c @@ -25,12 +25,13 @@ // emit SHL16 instruction, from s1 , shift s2, store result in s1 using s3, s4 and s5 as scratch void emit_shl16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); // s2 is not 0 here and is 1..1f/3f IFX (X_PEND) { ST_H(s1, xEmu, offsetof(x64emu_t, op1)); ST_H(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s4, d_shl16); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -92,12 +93,13 @@ void emit_shl16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int if (!c) return; // c != 0 + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { MOV64x(s3, c); ST_H(s3, xEmu, offsetof(x64emu_t, op2)); ST_H(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s4, d_shl16); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -184,17 +186,18 @@ void emit_shl16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int // emit SHL32 instruction, from s1 , shift s2, store result in s1 using s3, s4 and s5 as scratch void emit_shl32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); // s2 is not 0 here and is 1..1f/3f - IFX(X_PEND) { + IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SDxw(s2, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s4, rex.w?d_shl64:d_shl32); - } else IFX(X_ALL) { + SET_DF(s4, rex.w ? d_shl64 : d_shl32); + } else IFXORNAT (X_ALL) { SET_DFNONE(); } if (la64_lbt) { - IFX(X_ALL) { + IFX (X_ALL) { if (rex.w) X64_SLL_D(s1, s2); else @@ -202,49 +205,49 @@ void emit_shl32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s } SLL_D(s1, s1, s2); if (!rex.w) { ZEROUP(s1); } - IFX(X_PEND) { + IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } return; } CLEAR_FLAGS(s3); - IFX(X_CF | X_OF) { - ADDI_D(s5, s2, rex.w?-64:-32); + IFX (X_CF | X_OF) { + ADDI_D(s5, s2, rex.w ? -64 : -32); SUB_D(s5, xZR, s5); SRL_W(s3, s1, s5); ANDI(s5, s3, 1); // LSB == F_CF - IFX(X_CF) { + IFX (X_CF) { OR(xFlags, xFlags, s5); } } SLL_D(s1, s1, s2); - IFX(X_SF) { + IFX (X_SF) { BGE(s1, xZR, 8); ORI(xFlags, xFlags, 1 << F_SF); } if (!rex.w) { ZEROUP(s1); } - IFX(X_PEND) { + IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } - IFX(X_ZF) { + IFX (X_ZF) { BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } - IFX(X_OF) { + IFX (X_OF) { // OF flag is affected only on 1-bit shifts ADDI_D(s3, s2, -1); BNEZ(s3, 4 + 4 * 4); - SRLIxw(s3, s1, rex.w?63:31); + SRLIxw(s3, s1, rex.w ? 63 : 31); XOR(s3, s3, s5); SLLI_D(s3, s3, F_OF); OR(xFlags, xFlags, s3); } - IFX(X_PF) { + IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } } @@ -254,7 +257,8 @@ void emit_shl32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, { if (!c) return; - IFX(X_PEND) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX (X_PEND) { if (c) { MOV64x(s3, c); SDxw(s3, xEmu, offsetof(x64emu_t, op2)); @@ -262,14 +266,17 @@ void emit_shl32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, SDxw(xZR, xEmu, offsetof(x64emu_t, op2)); } SDxw(s1, xEmu, offsetof(x64emu_t, op1)); - SET_DF(s4, rex.w?d_shl64:d_shl32); - } else IFX(X_ALL) { + SET_DF(s4, rex.w ? d_shl64 : d_shl32); + } else IFXORNAT (X_ALL) { SET_DFNONE(); } - if(la64_lbt) { - IFX(X_PEND) {} else { MOV64x(s3, c); } - IFX(X_ALL) { + if (la64_lbt) { + IFX (X_PEND) { + } else { + MOV64x(s3, c); + } + IFX (X_ALL) { if (rex.w) X64_SLL_D(s1, s3); else @@ -278,18 +285,18 @@ void emit_shl32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, SLLIxw(s1, s1, c); if (!rex.w) ZEROUP(s1); - IFX(X_PEND) { + IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } return; } CLEAR_FLAGS(s3); - IFX(X_CF | X_OF) { + IFX (X_CF | X_OF) { if (c > 0) { SRLI_D(s3, s1, (rex.w ? 64 : 32) - c); ANDI(s5, s3, 1); // LSB == F_CF - IFX(X_CF) { + IFX (X_CF) { OR(xFlags, xFlags, s5); } } @@ -301,21 +308,21 @@ void emit_shl32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, SLLI_W(s1, s1, c); } - IFX(X_SF) { + IFX (X_SF) { BGE(s1, xZR, 8); ORI(xFlags, xFlags, 1 << F_SF); } if (!rex.w) { ZEROUP(s1); } - IFX(X_PEND) { + IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } - IFX(X_ZF) { + IFX (X_ZF) { BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } - IFX(X_OF) { + IFX (X_OF) { // OF flag is affected only on 1-bit shifts if (c == 1) { SRLIxw(s3, s1, rex.w ? 63 : 31); @@ -324,7 +331,7 @@ void emit_shl32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, OR(xFlags, xFlags, s3); } } - IFX(X_PF) { + IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } } @@ -335,11 +342,12 @@ void emit_shr8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i int64_t j64; + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { ST_B(s2, xEmu, offsetof(x64emu_t, op2)); ST_B(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s4, d_shr8); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -394,11 +402,12 @@ void emit_shr16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, { int64_t j64; + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { ST_H(s2, xEmu, offsetof(x64emu_t, op2)); ST_H(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s4, d_shr16); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -455,12 +464,13 @@ void emit_shr16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int if (!c) return; // c != 0 + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { MOV64x(s3, c); ST_H(s3, xEmu, offsetof(x64emu_t, op2)); ST_H(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s4, d_shr16); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -521,11 +531,12 @@ void emit_shr32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s int64_t j64; + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { SDxw(s2, xEmu, offsetof(x64emu_t, op2)); SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s4, rex.w ? d_shr64 : d_shr32); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -585,7 +596,8 @@ void emit_shr32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s // emit SHR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch void emit_shr32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4) { - IFX(X_PEND) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX (X_PEND) { if (c) { MOV64x(s3, c); SDxw(s3, xEmu, offsetof(x64emu_t, op2)); @@ -593,19 +605,22 @@ void emit_shr32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, SDxw(xZR, xEmu, offsetof(x64emu_t, op2)); SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s4, rex.w ? d_shr64 : d_shr32); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } if (!c) { - IFX(X_PEND) { + IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } return; } if (la64_lbt) { - IFX(X_PEND) {} else { MOV64x(s3, c); } - IFX(X_ALL) { + IFX (X_PEND) { + } else { + MOV64x(s3, c); + } + IFX (X_ALL) { if (rex.w) X64_SRL_D(s1, s3); else @@ -614,14 +629,14 @@ void emit_shr32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, SRLIxw(s1, s1, c); if (!rex.w) ZEROUP(s1); - IFX(X_PEND) { + IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } return; } CLEAR_FLAGS(s3); - IFX(X_CF) { + IFX (X_CF) { if (c > 1) { SRAI_D(s3, s1, c - 1); ANDI(s3, s3, 1); // LSB == F_CF @@ -631,7 +646,7 @@ void emit_shr32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, } OR(xFlags, xFlags, s3); } - IFX(X_OF) { + IFX (X_OF) { // OF flag is affected only on 1-bit shifts // OF flag is set to the most-significant bit of the original operand if (c == 1) { @@ -647,21 +662,21 @@ void emit_shr32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, SRLI_W(s1, s1, c); } - IFX(X_SF) { + IFX (X_SF) { BGE(s1, xZR, 8); ORI(xFlags, xFlags, 1 << F_SF); } if (!rex.w) { ZEROUP(s1); } - IFX(X_PEND) { + IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } - IFX(X_ZF) { + IFX (X_ZF) { BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } - IFX(X_PF) { + IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } } @@ -672,11 +687,12 @@ void emit_sar16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, { int64_t j64; + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { ST_H(s2, xEmu, offsetof(x64emu_t, op2)); ST_H(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s4, d_sar16); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -731,12 +747,13 @@ void emit_sar16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int if (!c) return; // c != 0 + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { MOV64x(s3, c); ST_H(s3, xEmu, offsetof(x64emu_t, op2)); ST_H(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s4, d_sar16); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -793,7 +810,8 @@ void emit_sar16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int // emit SAR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch void emit_sar32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4) { - IFX(X_PEND) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); + IFX (X_PEND) { if (c) { MOV64x(s3, c); SDxw(s3, xEmu, offsetof(x64emu_t, op2)); @@ -801,19 +819,22 @@ void emit_sar32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, SDxw(xZR, xEmu, offsetof(x64emu_t, op2)); SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s4, rex.w ? d_sar64 : d_sar32); - } else IFX(X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } if (!c) { - IFX(X_PEND) { + IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } return; } if (la64_lbt) { - IFX(X_PEND) {} else { MOV64x(s3, c); } - IFX(X_ALL) { + IFX (X_PEND) { + } else { + MOV64x(s3, c); + } + IFX (X_ALL) { if (rex.w) X64_SRA_D(s1, s3); else @@ -822,14 +843,14 @@ void emit_sar32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, SRAIxw(s1, s1, c); if (!rex.w) ZEROUP(s1); - IFX(X_PEND) { + IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } return; } CLEAR_FLAGS(s3); - IFX(X_CF) { + IFX (X_CF) { if (c > 1) { SRAI_D(s3, s1, c - 1); ANDI(s3, s3, 1); // LSB == F_CF @@ -849,21 +870,21 @@ void emit_sar32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, } // SRAI_W sign-extends, so test sign bit before clearing upper bits - IFX(X_SF) { + IFX (X_SF) { BGE(s1, xZR, 8); ORI(xFlags, xFlags, 1 << F_SF); } if (!rex.w) { ZEROUP(s1); } - IFX(X_PEND) { + IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } - IFX(X_ZF) { + IFX (X_ZF) { BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } - IFX(X_PF) { + IFX (X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } } @@ -874,11 +895,12 @@ void emit_ror32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, { if (!c) return; + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { MOV32w(s3, c); SDxw(s3, xEmu, offsetof(x64emu_t, op2)); SET_DF(s4, rex.w ? d_ror64 : d_ror32); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } if (!c) { @@ -929,12 +951,13 @@ void emit_ror32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, void emit_rol32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4) { int64_t j64; + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); if (!rex.w) ZEROUP(s1); BEQ_NEXT(s2, xZR); IFX (X_PEND) { SDxw(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s4, rex.w ? d_rol64 : d_rol32); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -982,6 +1005,7 @@ void emit_ror32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s { int64_t j64; + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); if (!rex.w) { ZEROUP(s1); } @@ -989,7 +1013,7 @@ void emit_ror32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX (X_PEND) { SDxw(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s4, rex.w ? d_ror64 : d_ror32); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -1040,11 +1064,12 @@ void emit_rol32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, { if (!c) return; + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { MOV32w(s3, c); SDxw(s3, xEmu, offsetof(x64emu_t, op2)); SET_DF(s4, rex.w ? d_rol64 : d_rol32); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } if (!c) { @@ -1095,6 +1120,7 @@ void emit_shld32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin { c &= (rex.w ? 0x3f : 0x1f); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { if (c) { MOV64x(s3, c); @@ -1103,7 +1129,7 @@ void emit_shld32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin SDxw(xZR, xEmu, offsetof(x64emu_t, op2)); SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s4, rex.w ? d_shld64 : d_shld32); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } @@ -1170,6 +1196,7 @@ void emit_shrd32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin { c &= (rex.w ? 0x3f : 0x1f); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR); IFX (X_PEND) { if (c) { MOV64x(s3, c); @@ -1178,7 +1205,7 @@ void emit_shrd32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin SDxw(xZR, xEmu, offsetof(x64emu_t, op2)); SDxw(s1, xEmu, offsetof(x64emu_t, op1)); SET_DF(s4, rex.w ? d_shrd64 : d_shrd32); - } else IFX (X_ALL) { + } else IFXORNAT (X_ALL) { SET_DFNONE(); } if (!c) { diff --git a/src/dynarec/la64/dynarec_la64_emit_tests.c b/src/dynarec/la64/dynarec_la64_emit_tests.c index 55f5ad8e..687a3f80 100644 --- a/src/dynarec/la64/dynarec_la64_emit_tests.c +++ b/src/dynarec/la64/dynarec_la64_emit_tests.c @@ -8,6 +8,7 @@ #include "dynarec.h" #include "emu/x64emu_private.h" #include "emu/x64run_private.h" +#include "la64_emitter.h" #include "x64run.h" #include "x64emu.h" #include "box64stack.h" @@ -33,6 +34,8 @@ void emit_cmp8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i SET_DFNONE(); } + NAT_FLAGS_ENABLE_CARRY(); + NAT_FLAGS_ENABLE_SIGN(); if (la64_lbt) { IFX(X_ALL) { X64_SUB_B(s1, s2); @@ -42,6 +45,15 @@ void emit_cmp8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i SUB_D(s6, s1, s2); ST_B(s6, xEmu, offsetof(x64emu_t, res)); } + if (dyn->insts[ninst].nat_flags_fusion) { + if (dyn->insts[ninst].nat_flags_needsign) { + EXT_W_B(s3, s1); + EXT_W_B(s4, s2); + NAT_FLAGS_OPS(s3, s4); + } else { + NAT_FLAGS_OPS(s1, s2); + } + } return; } @@ -70,6 +82,15 @@ void emit_cmp8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i IFX(X_PF) { emit_pf(dyn, ninst, s6, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) { + if (dyn->insts[ninst].nat_flags_needsign) { + EXT_W_B(s3, s1); + EXT_W_B(s4, s2); + NAT_FLAGS_OPS(s3, s4); + } else { + NAT_FLAGS_OPS(s1, s2); + } + } } // emit CMP8 instruction, from cmp s1 , 0, using s3 and s4 as scratch @@ -84,10 +105,20 @@ void emit_cmp8_0(dynarec_la64_t* dyn, int ninst, int s1, int s3, int s4) SET_DFNONE(); } + NAT_FLAGS_ENABLE_CARRY(); + NAT_FLAGS_ENABLE_SIGN(); if (la64_lbt) { IFX(X_ALL) { X64_SUB_B(s1, xZR); } + if (dyn->insts[ninst].nat_flags_fusion) { + if (dyn->insts[ninst].nat_flags_needsign) { + EXT_W_B(s3, s1); + NAT_FLAGS_OPS(s3, xZR); + } else { + NAT_FLAGS_OPS(s1, xZR); + } + } return; } @@ -104,6 +135,14 @@ void emit_cmp8_0(dynarec_la64_t* dyn, int ninst, int s1, int s3, int s4) IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) { + if (dyn->insts[ninst].nat_flags_needsign) { + EXT_W_B(s3, s1); + NAT_FLAGS_OPS(s3, xZR); + } else { + NAT_FLAGS_OPS(s1, xZR); + } + } } // emit CMP16 instruction, from cmp s1, s2, using s3 and s4 as scratch @@ -117,6 +156,8 @@ void emit_cmp16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SET_DFNONE(); } + NAT_FLAGS_ENABLE_CARRY(); + NAT_FLAGS_ENABLE_SIGN(); if (la64_lbt) { IFX(X_ALL) { X64_SUB_H(s1, s2); @@ -126,6 +167,15 @@ void emit_cmp16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SUB_D(s6, s1, s2); ST_H(s6, xEmu, offsetof(x64emu_t, res)); } + if (dyn->insts[ninst].nat_flags_fusion) { + if (dyn->insts[ninst].nat_flags_needsign) { + EXT_W_H(s3, s1); + EXT_W_H(s4, s2); + NAT_FLAGS_OPS(s3, s4); + } else { + NAT_FLAGS_OPS(s1, s2); + } + } return; } @@ -156,6 +206,15 @@ void emit_cmp16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, IFX(X_PF) { emit_pf(dyn, ninst, s6, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) { + if (dyn->insts[ninst].nat_flags_needsign) { + EXT_W_H(s3, s1); + EXT_W_H(s4, s2); + NAT_FLAGS_OPS(s3, s4); + } else { + NAT_FLAGS_OPS(s1, s2); + } + } } // emit CMP16 instruction, from cmp s1 , #0, using s3 and s4 as scratch @@ -170,10 +229,20 @@ void emit_cmp16_0(dynarec_la64_t* dyn, int ninst, int s1, int s3, int s4) SET_DFNONE(); } + NAT_FLAGS_ENABLE_CARRY(); + NAT_FLAGS_ENABLE_SIGN(); if (la64_lbt) { IFX(X_ALL) { X64_SUB_H(s1, xZR); } + if (dyn->insts[ninst].nat_flags_fusion) { + if (dyn->insts[ninst].nat_flags_needsign) { + EXT_W_H(s3, s1); + NAT_FLAGS_OPS(s3, xZR); + } else { + NAT_FLAGS_OPS(s1, xZR); + } + } return; } @@ -190,6 +259,14 @@ void emit_cmp16_0(dynarec_la64_t* dyn, int ninst, int s1, int s3, int s4) IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) { + if (dyn->insts[ninst].nat_flags_needsign) { + EXT_W_H(s3, s1); + NAT_FLAGS_OPS(s3, xZR); + } else { + NAT_FLAGS_OPS(s1, xZR); + } + } } // emit CMP32 instruction, from cmp s1, s2, using s3 and s4 as scratch @@ -203,6 +280,8 @@ void emit_cmp32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SET_DFNONE(); } + NAT_FLAGS_ENABLE_CARRY(); + NAT_FLAGS_ENABLE_SIGN(); if (la64_lbt) { IFX(X_ALL) { if (rex.w) @@ -215,6 +294,20 @@ void emit_cmp32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SUBxw(s6, s1, s2); SDxw(s6, xEmu, offsetof(x64emu_t, res)); } + if (dyn->insts[ninst].nat_flags_fusion) { + if (rex.w) + NAT_FLAGS_OPS(s1, s2); + else { + if (dyn->insts[ninst].nat_flags_needsign) { + SEXT_W(s3, s1); + SEXT_W(s4, s2); + } else { + ZEROUP2(s3, s1); + ZEROUP2(s4, s2); + } + NAT_FLAGS_OPS(s3, s4); + } + } return; } @@ -244,6 +337,20 @@ void emit_cmp32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX(X_PF) { emit_pf(dyn, ninst, s6, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) { + if (rex.w) + NAT_FLAGS_OPS(s1, s2); + else { + if (dyn->insts[ninst].nat_flags_needsign) { + SEXT_W(s3, s1); + SEXT_W(s4, s2); + } else { + ZEROUP2(s3, s1); + ZEROUP2(s4, s2); + } + NAT_FLAGS_OPS(s3, s4); + } + } } // emit CMP32 instruction, from cmp s1, 0, using s3 and s4 as scratch @@ -258,6 +365,8 @@ void emit_cmp32_0(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s3, int SET_DFNONE(); } + NAT_FLAGS_ENABLE_CARRY(); + NAT_FLAGS_ENABLE_SIGN(); if (la64_lbt) { IFX(X_ALL) { if (rex.w) @@ -265,6 +374,18 @@ void emit_cmp32_0(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s3, int else X64_SUB_W(s1, xZR); } + if (dyn->insts[ninst].nat_flags_fusion) { + if (rex.w) + NAT_FLAGS_OPS(s1, xZR); + else { + if (dyn->insts[ninst].nat_flags_needsign) { + SEXT_W(s3, s1); + } else { + ZEROUP2(s3, s1); + } + NAT_FLAGS_OPS(s3, xZR); + } + } return; } @@ -285,6 +406,18 @@ void emit_cmp32_0(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s3, int IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } + if (dyn->insts[ninst].nat_flags_fusion) { + if (rex.w) + NAT_FLAGS_OPS(s1, xZR); + else { + if (dyn->insts[ninst].nat_flags_needsign) { + SEXT_W(s3, s1); + } else { + ZEROUP2(s3, s1); + } + NAT_FLAGS_OPS(s3, xZR); + } + } } @@ -297,14 +430,23 @@ void emit_test8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SET_DFNONE(); } + NAT_FLAGS_ENABLE_SIGN(); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s3, xZR); if (la64_lbt) { IFX(X_ALL) { X64_AND_B(s1, s2); } - IFX_PENDOR0 { + if (dyn->insts[ninst].nat_flags_fusion) { AND(s3, s1, s2); - ST_D(s3, xEmu, offsetof(x64emu_t, res)); + if (dyn->insts[ninst].nat_flags_needsign) { + EXT_W_B(s3, s3); + } + } + + IFX_PENDOR0 { + if (!dyn->insts[ninst].nat_flags_fusion) AND(s3, s1, s2); + ST_B(s3, xEmu, offsetof(x64emu_t, res)); } return; } @@ -327,6 +469,9 @@ void emit_test8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, IFX(X_PF) { emit_pf(dyn, ninst, s3, s4, s5); } + if (dyn->insts[ninst].nat_flags_fusion && dyn->insts[ninst].nat_flags_needsign) { + EXT_W_B(s3, s3); + } } // emit TEST16 instruction, from test s1, s2, using s3, s4 and s5 as scratch @@ -338,13 +483,22 @@ void emit_test16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SET_DFNONE(); } + NAT_FLAGS_ENABLE_SIGN(); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s3, xZR); if (la64_lbt) { IFX (X_ALL) { X64_AND_H(s1, s2); } - IFX_PENDOR0 { + if (dyn->insts[ninst].nat_flags_fusion) { AND(s3, s1, s2); + if (dyn->insts[ninst].nat_flags_needsign) { + EXT_W_H(s3, s3); + } + } + + IFX_PENDOR0 { + if (!dyn->insts[ninst].nat_flags_fusion) AND(s3, s1, s2); ST_H(s3, xEmu, offsetof(x64emu_t, res)); } return; @@ -368,6 +522,9 @@ void emit_test16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, IFX (X_PF) { emit_pf(dyn, ninst, s3, s4, s5); } + if (dyn->insts[ninst].nat_flags_fusion && dyn->insts[ninst].nat_flags_needsign) { + EXT_W_H(s3, s3); + } } // emit TEST32 instruction, from test s1, s2, using s3 and s4 as scratch @@ -379,6 +536,8 @@ void emit_test32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int SET_DFNONE(); } + NAT_FLAGS_ENABLE_SIGN(); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s3, xZR); if (la64_lbt) { IFX(X_ALL) { if (rex.w) @@ -387,8 +546,17 @@ void emit_test32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int X64_AND_W(s1, s2); } - IFX_PENDOR0 { + if (dyn->insts[ninst].nat_flags_fusion) { AND(s3, s1, s2); + if (dyn->insts[ninst].nat_flags_needsign) { + if (!rex.w) SEXT_W(s3, s3); + } else if (!rex.w) { + ZEROUP(s3); + } + } + + IFX_PENDOR0 { + if (!dyn->insts[ninst].nat_flags_fusion) AND(s3, s1, s2); SDxw(s3, xEmu, offsetof(x64emu_t, res)); } return; @@ -415,18 +583,27 @@ void emit_test32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int IFX(X_PF) { emit_pf(dyn, ninst, s3, s4, s5); } + if (dyn->insts[ninst].nat_flags_fusion && !rex.w) { + if (dyn->insts[ninst].nat_flags_needsign) { + SEXT_W(s3, s3); + } else { + ZEROUP(s3); + } + } } // emit TEST32 instruction, from test s1, s2, using s3 and s4 as scratch void emit_test32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4, int s5) { + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s3, xZR); IFX_PENDOR0 { SET_DF(s3, rex.w ? d_tst64 : d_tst32); } else { SET_DFNONE(); } - + NAT_FLAGS_ENABLE_SIGN(); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s3, xZR); if (la64_lbt) { IFX(X_ALL) { MOV64xw(s3, c); @@ -436,13 +613,23 @@ void emit_test32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, X64_AND_W(s1, s3); } - IFX_PENDOR0 { - if (c >= 0 && c <= 4095) { - ANDI(s3, s1, c); - } else { - IFX(X_ALL) { } else MOV64xw(s3, c); - AND(s3, s1, s3); + if (c >= 0 && c <= 4095) { + ANDI(s3, s1, c); + } else { + IFX (X_ALL) { + } else + MOV64xw(s3, c); + AND(s3, s1, s3); + } + if (dyn->insts[ninst].nat_flags_fusion) { + if (dyn->insts[ninst].nat_flags_needsign) { + if (!rex.w) SEXT_W(s3, s3); + } else if (!rex.w) { + ZEROUP(s3); } + } + + IFX_PENDOR0 { SDxw(s3, xEmu, offsetof(x64emu_t, res)); } return; @@ -471,4 +658,11 @@ void emit_test32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, IFX(X_PF) { emit_pf(dyn, ninst, s3, s4, s5); } + if (dyn->insts[ninst].nat_flags_fusion && !rex.w) { + if (dyn->insts[ninst].nat_flags_needsign) { + SEXT_W(s3, s3); + } else { + ZEROUP(s3); + } + } } diff --git a/src/dynarec/la64/dynarec_la64_f0.c b/src/dynarec/la64/dynarec_la64_f0.c index cd955b88..b158a646 100644 --- a/src/dynarec/la64/dynarec_la64_f0.c +++ b/src/dynarec/la64/dynarec_la64_f0.c @@ -55,7 +55,7 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni switch (opcode) { case 0x01: INST_NAME("LOCK ADD Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; SMDMB(); @@ -69,7 +69,7 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ADDxw(x4, x1, gd); SCxw(x4, wback, 0); BEQZ_MARKLOCK(x4); - IFX (X_ALL | X_PEND) { + IFXORNAT (X_ALL | X_PEND) { emit_add32(dyn, ninst, rex, x1, gd, x3, x4, x5); } } @@ -77,7 +77,7 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x09: INST_NAME("LOCK OR Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; SMDMB(); @@ -91,7 +91,7 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni OR(x4, x1, gd); SCxw(x4, wback, 0); BEQZ_MARKLOCK(x4); - IFX (X_ALL | X_PEND) + IFXORNAT (X_ALL | X_PEND) emit_or32(dyn, ninst, rex, x1, gd, x3, x4); } SMDMB(); @@ -103,7 +103,7 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni switch (rep) { case 0: INST_NAME("LOCK CMPXCHG Eb, Gb"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_NOFUSION); nextop = F8; ANDI(x6, xRAX, 0xff); // AL SMDMB(); @@ -174,7 +174,7 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni switch (rep) { case 0: INST_NAME("LOCK CMPXCHG Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_NOFUSION); nextop = F8; GETGD; if (MODREG) { @@ -229,7 +229,7 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni switch (rep) { case 0: INST_NAME("LOCK XADD Gd, Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; SMDMB(); @@ -246,7 +246,7 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ADDxw(x4, x1, gd); SCxw(x4, wback, 0); BEQZ_MARKLOCK(x4); - IFX(X_ALL | X_PEND) { + IFXORNAT (X_ALL | X_PEND) { MVxw(x2, x1); emit_add32(dyn, ninst, rex, x2, gd, x3, x4, x5); } @@ -273,7 +273,7 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("LOCK CMPXCHG8B Gq, Eq"); } - SETFLAGS(X_ZF, SF_SUBSET); + SETFLAGS(X_ZF, SF_SUBSET, NAT_FLAGS_NOFUSION); addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, LOCK_LOCK, 0, 0); UFLAG_IF { if (la64_lbt) { @@ -402,7 +402,7 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x11: INST_NAME("LOCK ADC Ed, Gd"); READFLAGS(X_CF); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; SMDMB(); @@ -418,7 +418,7 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ADDxw(x3, x3, x4); SCxw(x3, wback, 0); BEQZ_MARKLOCK(x3); - IFX (X_ALL | X_PEND) { + IFXORNAT (X_ALL | X_PEND) { emit_adc32(dyn, ninst, rex, x1, gd, x3, x4, x5, x6); } } @@ -426,7 +426,7 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0x29: INST_NAME("LOCK SUB Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); nextop = F8; GETGD; SMDMB(); @@ -440,7 +440,7 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SUB_D(x4, x1, gd); SCxw(x4, wback, 0); BEQZ_MARKLOCK(x4); - IFX (X_ALL | X_PEND) + IFXORNAT (X_ALL | X_PEND) emit_sub32(dyn, ninst, rex, x1, gd, x3, x4, x5); } SMDMB(); @@ -456,7 +456,7 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("LOCK ADD Ed, Ib"); } - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); if (MODREG) { if (opcode == 0x81) i64 = F32S; @@ -482,7 +482,7 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } SCxw(x4, wback, 0); BEQZ_MARKLOCK(x4); - IFX(X_ALL | X_PEND) { + IFXORNAT (X_ALL | X_PEND) { emit_add32c(dyn, ninst, rex, x1, i64, x3, x4, x5, x6); } SMDMB(); @@ -494,7 +494,7 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("LOCK OR Ed, Ib"); } - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); if (MODREG) { if (opcode == 0x81) i64 = F32S; @@ -522,7 +522,7 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (!rex.w) ZEROUP(x4); SCxw(x4, wback, 0); BEQZ_MARKLOCK(x4); - IFX (X_ALL | X_PEND) + IFXORNAT (X_ALL | X_PEND) emit_or32c(dyn, ninst, rex, x1, i64, x3, x4); } break; @@ -532,7 +532,7 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { INST_NAME("LOCK SUB Ed, Ib"); } - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); if (MODREG) { if (opcode == 0x81) i64 = F32S; @@ -558,7 +558,7 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } SCxw(x4, wback, 0); BEQZ_MARKLOCK(x4); - IFX (X_ALL | X_PEND) + IFXORNAT (X_ALL | X_PEND) emit_sub32c(dyn, ninst, rex, x1, i64, x3, x4, x5, x6); } break; @@ -594,7 +594,7 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni switch ((nextop >> 3) & 7) { case 0: INST_NAME("LOCK INC Ed"); - SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING); + SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING, NAT_FLAGS_FUSION); SMDMB(); if (MODREG) { ed = TO_NAT((nextop & 7) + (rex.b << 3)); @@ -606,13 +606,13 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ADDIxw(x4, x1, 1); SCxw(x4, wback, 0); BEQZ_MARKLOCK(x4); - IFX (X_ALL | X_PEND) + IFXORNAT (X_ALL | X_PEND) emit_inc32(dyn, ninst, rex, x1, x3, x4, x5, x6); } break; case 1: INST_NAME("LOCK DEC Ed"); - SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING); + SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING, NAT_FLAGS_FUSION); SMDMB(); if (MODREG) { ed = TO_NAT((nextop & 7) + (rex.b << 3)); @@ -624,7 +624,7 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ADDIxw(x4, x1, -1); SCxw(x4, wback, 0); BEQZ_MARKLOCK(x4); - IFX (X_ALL | X_PEND) + IFXORNAT (X_ALL | X_PEND) emit_dec32(dyn, ninst, rex, x1, x3, x4, x5, x6); } break; diff --git a/src/dynarec/la64/dynarec_la64_f30f.c b/src/dynarec/la64/dynarec_la64_f30f.c index 30ce633a..3eabd50b 100644 --- a/src/dynarec/la64/dynarec_la64_f30f.c +++ b/src/dynarec/la64/dynarec_la64_f30f.c @@ -333,7 +333,7 @@ uintptr_t dynarec64_F30F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0xB8: INST_NAME("POPCNT Gd, Ed"); - SETFLAGS(X_ALL, SF_SET); + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); SET_DFNONE(); nextop = F8; v1 = fpu_get_scratch(dyn); @@ -373,7 +373,7 @@ uintptr_t dynarec64_F30F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0xBC: INST_NAME("TZCNT Gd, Ed"); - SETFLAGS(X_ZF, SF_SUBSET); + SETFLAGS(X_ZF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); nextop = F8; GETED(0); diff --git a/src/dynarec/la64/dynarec_la64_functions.c b/src/dynarec/la64/dynarec_la64_functions.c index 02735625..b12bf69d 100644 --- a/src/dynarec/la64/dynarec_la64_functions.c +++ b/src/dynarec/la64/dynarec_la64_functions.c @@ -334,7 +334,7 @@ void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t r if (!box64_dynarec_dump && !box64_dynarec_gdbjit) return; static char buf[512]; - int length = sprintf(buf, "barrier=%d state=%d/%d(%d), %s=%X/%X, use=%X, need=%X/%X, sm=%d(%d/%d)", + int length = sprintf(buf, "barrier=%d state=%d/%d(%d), %s=%X/%X, use=%X, need=%X/%X, fuse=%d, sm=%d(%d/%d)", dyn->insts[ninst].x64.barrier, dyn->insts[ninst].x64.state_flags, dyn->f.pending, @@ -345,6 +345,7 @@ void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t r dyn->insts[ninst].x64.use_flags, dyn->insts[ninst].x64.need_before, dyn->insts[ninst].x64.need_after, + dyn->insts[ninst].nat_flags_fusion, dyn->smwrite, dyn->insts[ninst].will_write, dyn->insts[ninst].last_write); if (dyn->insts[ninst].pred_sz) { length += sprintf(buf + length, ", pred="); @@ -485,3 +486,40 @@ void fpu_unwind_restore(dynarec_la64_t* dyn, int ninst, lsxcache_t* cache) { memcpy(&dyn->insts[ninst].lsx, cache, sizeof(lsxcache_t)); } + +void updateNativeFlags(dynarec_la64_t* dyn) +{ + if (!box64_dynarec_nativeflags) + return; + for (int i = 1; i < dyn->size; ++i) + if (dyn->insts[i].nat_flags_fusion) { + if (dyn->insts[i].pred_sz == 1 && dyn->insts[i].pred[0] == i - 1 + && (dyn->insts[i].x64.use_flags & dyn->insts[i - 1].x64.set_flags) == dyn->insts[i].x64.use_flags) { + dyn->insts[i - 1].nat_flags_fusion = 1; + if (dyn->insts[i].x64.use_flags & X_SF) { + dyn->insts[i - 1].nat_flags_needsign = 1; + } + dyn->insts[i].x64.use_flags = 0; + } else + dyn->insts[i].nat_flags_fusion = 0; + } +} + +void get_free_scratch(dynarec_la64_t* dyn, int ninst, uint8_t* tmp1, uint8_t* tmp2, uint8_t* tmp3, uint8_t s1, uint8_t s2, uint8_t s3, uint8_t s4, uint8_t s5) +{ + uint8_t n1 = dyn->insts[ninst].nat_flags_op1; + uint8_t n2 = dyn->insts[ninst].nat_flags_op2; + uint8_t tmp[5] = { 0 }; + int idx = 0; +#define GO(s) \ + if ((s != n1) && (s != n2)) tmp[idx++] = s + GO(s1); + GO(s2); + GO(s3); + GO(s4); + GO(s5); +#undef GO + *tmp1 = tmp[0]; + *tmp2 = tmp[1]; + *tmp3 = tmp[2]; +} \ No newline at end of file diff --git a/src/dynarec/la64/dynarec_la64_functions.h b/src/dynarec/la64/dynarec_la64_functions.h index 1fcde649..f8c8b58e 100644 --- a/src/dynarec/la64/dynarec_la64_functions.h +++ b/src/dynarec/la64/dynarec_la64_functions.h @@ -36,4 +36,6 @@ void print_opcode(dynarec_native_t* dyn, int ninst, uint32_t opcode); void fpu_reset(dynarec_native_t* dyn); void fpu_reset_ninst(dynarec_native_t* dyn, int ninst); +void updateNativeFlags(dynarec_la64_t* dyn); +void get_free_scratch(dynarec_la64_t* dyn, int ninst, uint8_t* tmp1, uint8_t* tmp2, uint8_t* tmp3, uint8_t s1, uint8_t s2, uint8_t s3, uint8_t s4, uint8_t s5); #endif //__DYNAREC_LA64_FUNCTIONS_H__ diff --git a/src/dynarec/la64/dynarec_la64_helper.c b/src/dynarec/la64/dynarec_la64_helper.c index f297fea4..e752ac96 100644 --- a/src/dynarec/la64/dynarec_la64_helper.c +++ b/src/dynarec/la64/dynarec_la64_helper.c @@ -712,7 +712,7 @@ void call_c(dynarec_la64_t* dyn, int ninst, void* fnc, int reg, int ret, int sav LD_D(xFlags, xEmu, offsetof(x64emu_t, eflags)); SPILL_EFLAGS(); } - SET_NODF(); + // SET_NODF(); dyn->last_ip = 0; } diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h index 630f3873..15f9bb15 100644 --- a/src/dynarec/la64/dynarec_la64_helper.h +++ b/src/dynarec/la64/dynarec_la64_helper.h @@ -416,10 +416,10 @@ #define MARKF2 MARKFi(1) #define GETMARKF2 GETMARKFi(1) -#define MARKSEG dyn->insts[ninst].markseg = dyn->native_size -#define GETMARKSEG dyn->insts[ninst].markseg -#define MARKLOCK dyn->insts[ninst].marklock = dyn->native_size -#define GETMARKLOCK dyn->insts[ninst].marklock +#define MARKSEG dyn->insts[ninst].markseg = dyn->native_size +#define GETMARKSEG dyn->insts[ninst].markseg +#define MARKLOCK dyn->insts[ninst].marklock = dyn->native_size +#define GETMARKLOCK dyn->insts[ninst].marklock #define MARKLOCK2 dyn->insts[ninst].marklock2 = dyn->native_size #define GETMARKLOCK2 dyn->insts[ninst].marklock2 @@ -553,7 +553,8 @@ #define IFX(A) if ((dyn->insts[ninst].x64.gen_flags & (A))) #define IFXA(A, B) if ((dyn->insts[ninst].x64.gen_flags & (A)) && (B)) -#define IFX_PENDOR0 if ((dyn->insts[ninst].x64.gen_flags & (X_PEND) || !dyn->insts[ninst].x64.gen_flags)) +#define IFXORNAT(A) if ((dyn->insts[ninst].x64.gen_flags & (A)) || dyn->insts[ninst].nat_flags_fusion) +#define IFX_PENDOR0 if ((dyn->insts[ninst].x64.gen_flags & (X_PEND) || (!dyn->insts[ninst].x64.gen_flags && !dyn->insts[ninst].nat_flags_fusion))) #define IFXX(A) if ((dyn->insts[ninst].x64.gen_flags == (A))) #define IFX2X(A, B) if ((dyn->insts[ninst].x64.gen_flags == (A) || dyn->insts[ninst].x64.gen_flags == (B) || dyn->insts[ninst].x64.gen_flags == ((A) | (B)))) #define IFXN(A, B) if ((dyn->insts[ninst].x64.gen_flags & (A) && !(dyn->insts[ninst].x64.gen_flags & (B)))) @@ -639,7 +640,7 @@ if ((width) == 8) { \ ANDI(scratch1, scratch2, 0x80); \ } else { \ - SRLI_D(scratch1, scratch2, (width)-1); \ + SRLI_D(scratch1, scratch2, (width) - 1); \ if ((width) != 64) ANDI(scratch1, scratch1, 1); \ } \ BEQZ(scratch1, 8); \ @@ -647,7 +648,7 @@ } \ IFX (X_OF) { \ /* of = ((bc >> (width-2)) ^ (bc >> (width-1))) & 0x1; */ \ - SRLI_D(scratch1, scratch2, (width)-2); \ + SRLI_D(scratch1, scratch2, (width) - 2); \ SRLI_D(scratch2, scratch1, 1); \ XOR(scratch1, scratch1, scratch2); \ ANDI(scratch1, scratch1, 1); \ @@ -678,8 +679,29 @@ } #endif +#ifndef READFLAGS_FUSION +#define READFLAGS_FUSION(A, s1, s2, s3, s4, s5) \ + if (dyn->insts[ninst].nat_flags_fusion) \ + get_free_scratch(dyn, ninst, &tmp1, &tmp2, &tmp3, s1, s2, s3, s4, s5); \ + else { \ + tmp1 = s1; \ + tmp2 = s2; \ + tmp3 = s3; \ + } \ + READFLAGS(A) +#endif + +#define NAT_FLAGS_OPS(op1, op2) \ + do { \ + dyn->insts[ninst + 1].nat_flags_op1 = op1; \ + dyn->insts[ninst + 1].nat_flags_op2 = op2; \ + } while (0) + +#define NAT_FLAGS_ENABLE_CARRY() dyn->insts[ninst].nat_flags_carry = 1 +#define NAT_FLAGS_ENABLE_SIGN() dyn->insts[ninst].nat_flags_sign = 1 + #ifndef SETFLAGS -#define SETFLAGS(A, B) \ +#define SETFLAGS(A, B, FUSION) \ if (dyn->f.pending != SF_SET \ && ((B) & SF_SUB) \ && (dyn->insts[ninst].x64.gen_flags & (~(A)))) \ @@ -687,6 +709,14 @@ if (dyn->insts[ninst].x64.gen_flags) switch (B) { \ case SF_SUBSET: \ case SF_SET: dyn->f.pending = SF_SET; break; \ + case SF_SET_DF: \ + dyn->f.pending = SF_SET; \ + dyn->f.dfnone = 1; \ + break; \ + case SF_SET_NODF: \ + dyn->f.pending = SF_SET; \ + dyn->f.dfnone = 0; \ + break; \ case SF_PENDING: dyn->f.pending = SF_PENDING; break; \ case SF_SUBSET_PENDING: \ case SF_SET_PENDING: \ @@ -694,7 +724,8 @@ break; \ } \ else \ - dyn->f.pending = SF_SET + dyn->f.pending = SF_SET; \ + dyn->insts[ninst].nat_flags_nofusion = (FUSION) #endif #ifndef JUMP #define JUMP(A, C) @@ -738,23 +769,23 @@ #define GETIP_(A) TABLE64(0, 0) #else // put value in the Table64 even if not using it for now to avoid difference between Step2 and Step3. Needs to be optimized later... -#define GETIP(A) \ - if (dyn->last_ip && ((A)-dyn->last_ip) < 2048) { \ - uint64_t _delta_ip = (A)-dyn->last_ip; \ - dyn->last_ip += _delta_ip; \ - if (_delta_ip) { \ - ADDI_D(xRIP, xRIP, _delta_ip); \ - } \ - } else { \ - dyn->last_ip = (A); \ - if (dyn->last_ip < 0xffffffff) { \ - MOV64x(xRIP, dyn->last_ip); \ - } else \ - TABLE64(xRIP, dyn->last_ip); \ +#define GETIP(A) \ + if (dyn->last_ip && ((A) - dyn->last_ip) < 2048) { \ + uint64_t _delta_ip = (A) - dyn->last_ip; \ + dyn->last_ip += _delta_ip; \ + if (_delta_ip) { \ + ADDI_D(xRIP, xRIP, _delta_ip); \ + } \ + } else { \ + dyn->last_ip = (A); \ + if (dyn->last_ip < 0xffffffff) { \ + MOV64x(xRIP, dyn->last_ip); \ + } else \ + TABLE64(xRIP, dyn->last_ip); \ } #define GETIP_(A) \ - if (dyn->last_ip && ((A)-dyn->last_ip) < 2048) { \ - int64_t _delta_ip = (A)-dyn->last_ip; \ + if (dyn->last_ip && ((A) - dyn->last_ip) < 2048) { \ + int64_t _delta_ip = (A) - dyn->last_ip; \ if (_delta_ip) { ADDI_D(xRIP, xRIP, _delta_ip); } \ } else { \ if ((A) < 0xffffffff) { \ @@ -862,16 +893,16 @@ void* la64_next(x64emu_t* emu, uintptr_t addr); #define emit_and32 STEPNAME(emit_and32) #define emit_and32c STEPNAME(emit_and32c) #define emit_shl16 STEPNAME(emit_shl16) -#define emit_shl16c STEPNAME(emit_shl16c) +#define emit_shl16c STEPNAME(emit_shl16c) #define emit_shl32 STEPNAME(emit_shl32) #define emit_shl32c STEPNAME(emit_shl32c) #define emit_shr8 STEPNAME(emit_shr8) #define emit_shr16 STEPNAME(emit_shr16) -#define emit_shr16c STEPNAME(emit_shr16c) +#define emit_shr16c STEPNAME(emit_shr16c) #define emit_shr32 STEPNAME(emit_shr32) #define emit_shr32c STEPNAME(emit_shr32c) #define emit_sar16 STEPNAME(emit_sar16) -#define emit_sar16c STEPNAME(emit_sar16c) +#define emit_sar16c STEPNAME(emit_sar16c) #define emit_sar32c STEPNAME(emit_sar32c) #define emit_shld32c STEPNAME(emit_shld32c) #define emit_shrd32c STEPNAME(emit_shrd32c) @@ -1054,86 +1085,96 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int #define MAYUSE(A) #endif -#define GOCOND(B, T1, T2) \ - case B + 0x0: \ - INST_NAME(T1 "O " T2); \ - GO(ANDI(x1, xFlags, 1 << F_OF), EQZ, NEZ, X_OF, X64_JMP_JO); \ - break; \ - case B + 0x1: \ - INST_NAME(T1 "NO " T2); \ - GO(ANDI(x1, xFlags, 1 << F_OF), NEZ, EQZ, X_OF, X64_JMP_JNO); \ - break; \ - case B + 0x2: \ - INST_NAME(T1 "C " T2); \ - GO(ANDI(x1, xFlags, 1 << F_CF), EQZ, NEZ, X_CF, X64_JMP_JC); \ - break; \ - case B + 0x3: \ - INST_NAME(T1 "NC " T2); \ - GO(ANDI(x1, xFlags, 1 << F_CF), NEZ, EQZ, X_CF, X64_JMP_JNC); \ - break; \ - case B + 0x4: \ - INST_NAME(T1 "Z " T2); \ - GO(ANDI(x1, xFlags, 1 << F_ZF), EQZ, NEZ, X_ZF, X64_JMP_JZ); \ - break; \ - case B + 0x5: \ - INST_NAME(T1 "NZ " T2); \ - GO(ANDI(x1, xFlags, 1 << F_ZF), NEZ, EQZ, X_ZF, X64_JMP_JNZ); \ - break; \ - case B + 0x6: \ - INST_NAME(T1 "BE " T2); \ - GO(ANDI(x1, xFlags, (1 << F_CF) | (1 << F_ZF)), EQZ, NEZ, X_CF | X_ZF, X64_JMP_JBE); \ - break; \ - case B + 0x7: \ - INST_NAME(T1 "NBE " T2); \ - GO(ANDI(x1, xFlags, (1 << F_CF) | (1 << F_ZF)), NEZ, EQZ, X_CF | X_ZF, X64_JMP_JNBE); \ - break; \ - case B + 0x8: \ - INST_NAME(T1 "S " T2); \ - GO(ANDI(x1, xFlags, 1 << F_SF), EQZ, NEZ, X_SF, X64_JMP_JS); \ - break; \ - case B + 0x9: \ - INST_NAME(T1 "NS " T2); \ - GO(ANDI(x1, xFlags, 1 << F_SF), NEZ, EQZ, X_SF, X64_JMP_JNS); \ - break; \ - case B + 0xA: \ - INST_NAME(T1 "P " T2); \ - GO(ANDI(x1, xFlags, 1 << F_PF), EQZ, NEZ, X_PF, X64_JMP_JP); \ - break; \ - case B + 0xB: \ - INST_NAME(T1 "NP " T2); \ - GO(ANDI(x1, xFlags, 1 << F_PF), NEZ, EQZ, X_PF, X64_JMP_JNP); \ - break; \ - case B + 0xC: \ - INST_NAME(T1 "L " T2); \ - GO(SRLI_D(x1, xFlags, F_OF - F_SF); \ - XOR(x1, x1, xFlags); \ - ANDI(x1, x1, 1 << F_SF), EQZ, NEZ, X_SF | X_OF, X64_JMP_JL); \ - break; \ - case B + 0xD: \ - INST_NAME(T1 "GE " T2); \ - GO(SRLI_D(x1, xFlags, F_OF - F_SF); \ - XOR(x1, x1, xFlags); \ - ANDI(x1, x1, 1 << F_SF), NEZ, EQZ, X_SF | X_OF, X64_JMP_JGE); \ - break; \ - case B + 0xE: \ - INST_NAME(T1 "LE " T2); \ - GO(SRLI_D(x1, xFlags, F_OF - F_SF); \ - XOR(x1, x1, xFlags); \ - ANDI(x1, x1, 1 << F_SF); \ - ANDI(x3, xFlags, 1 << F_ZF); \ - OR(x1, x1, x3); \ - ANDI(x1, x1, (1 << F_SF) | (1 << F_ZF)), EQZ, NEZ, X_SF | X_OF | X_ZF, X64_JMP_JLE); \ - break; \ - case B + 0xF: \ - INST_NAME(T1 "G " T2); \ - GO(SRLI_D(x1, xFlags, F_OF - F_SF); \ - XOR(x1, x1, xFlags); \ - ANDI(x1, x1, 1 << F_SF); \ - ANDI(x3, xFlags, 1 << F_ZF); \ - OR(x1, x1, x3); \ - ANDI(x1, x1, (1 << F_SF) | (1 << F_ZF)), NEZ, EQZ, X_SF | X_OF | X_ZF, X64_JMP_JG); \ +#define GOCOND(B, T1, T2) \ + case B + 0x0: \ + INST_NAME(T1 "O " T2); \ + GO(ANDI(tmp1, xFlags, 1 << F_OF), EQZ, NEZ, _, _, X_OF, X64_JMP_JO); \ + break; \ + case B + 0x1: \ + INST_NAME(T1 "NO " T2); \ + GO(ANDI(tmp1, xFlags, 1 << F_OF), NEZ, EQZ, _, _, X_OF, X64_JMP_JNO); \ + break; \ + case B + 0x2: \ + INST_NAME(T1 "C " T2); \ + GO(ANDI(tmp1, xFlags, 1 << F_CF), EQZ, NEZ, GEU, LTU, X_CF, X64_JMP_JC); \ + break; \ + case B + 0x3: \ + INST_NAME(T1 "NC " T2); \ + GO(ANDI(tmp1, xFlags, 1 << F_CF), NEZ, EQZ, LTU, GEU, X_CF, X64_JMP_JNC); \ + break; \ + case B + 0x4: \ + INST_NAME(T1 "Z " T2); \ + GO(ANDI(tmp1, xFlags, 1 << F_ZF), EQZ, NEZ, NE, EQ, X_ZF, X64_JMP_JZ); \ + break; \ + case B + 0x5: \ + INST_NAME(T1 "NZ " T2); \ + GO(ANDI(tmp1, xFlags, 1 << F_ZF), NEZ, EQZ, EQ, NE, X_ZF, X64_JMP_JNZ); \ + break; \ + case B + 0x6: \ + INST_NAME(T1 "BE " T2); \ + GO(ANDI(tmp1, xFlags, (1 << F_CF) | (1 << F_ZF)), EQZ, NEZ, GTU, LEU, X_CF | X_ZF, X64_JMP_JBE); \ + break; \ + case B + 0x7: \ + INST_NAME(T1 "NBE " T2); \ + GO(ANDI(tmp1, xFlags, (1 << F_CF) | (1 << F_ZF)), NEZ, EQZ, LEU, GTU, X_CF | X_ZF, X64_JMP_JNBE); \ + break; \ + case B + 0x8: \ + INST_NAME(T1 "S " T2); \ + GO(ANDI(tmp1, xFlags, 1 << F_SF), EQZ, NEZ, _, _, X_SF, X64_JMP_JS); \ + break; \ + case B + 0x9: \ + INST_NAME(T1 "NS " T2); \ + GO(ANDI(tmp1, xFlags, 1 << F_SF), NEZ, EQZ, _, _, X_SF, X64_JMP_JNS); \ + break; \ + case B + 0xA: \ + INST_NAME(T1 "P " T2); \ + GO(ANDI(tmp1, xFlags, 1 << F_PF), EQZ, NEZ, _, _, X_PF, X64_JMP_JP); \ + break; \ + case B + 0xB: \ + INST_NAME(T1 "NP " T2); \ + GO(ANDI(tmp1, xFlags, 1 << F_PF), NEZ, EQZ, _, _, X_PF, X64_JMP_JNP); \ + break; \ + case B + 0xC: \ + INST_NAME(T1 "L " T2); \ + GO(SRLI_D(tmp1, xFlags, F_OF - F_SF); \ + XOR(tmp1, tmp1, xFlags); \ + ANDI(tmp1, tmp1, 1 << F_SF), EQZ, NEZ, GE, LT, X_SF | X_OF, X64_JMP_JL); \ + break; \ + case B + 0xD: \ + INST_NAME(T1 "GE " T2); \ + GO(SRLI_D(tmp1, xFlags, F_OF - F_SF); \ + XOR(tmp1, tmp1, xFlags); \ + ANDI(tmp1, tmp1, 1 << F_SF), NEZ, EQZ, LT, GE, X_SF | X_OF, X64_JMP_JGE); \ + break; \ + case B + 0xE: \ + INST_NAME(T1 "LE " T2); \ + GO(SRLI_D(tmp1, xFlags, F_OF - F_SF); \ + XOR(tmp1, tmp1, xFlags); \ + ANDI(tmp1, tmp1, 1 << F_SF); \ + ANDI(tmp3, xFlags, 1 << F_ZF); \ + OR(tmp1, tmp1, tmp3); \ + ANDI(tmp1, tmp1, (1 << F_SF) | (1 << F_ZF)), EQZ, NEZ, GT, LE, X_SF | X_OF | X_ZF, X64_JMP_JLE); \ + break; \ + case B + 0xF: \ + INST_NAME(T1 "G " T2); \ + GO(SRLI_D(tmp1, xFlags, F_OF - F_SF); \ + XOR(tmp1, tmp1, xFlags); \ + ANDI(tmp1, tmp1, 1 << F_SF); \ + ANDI(tmp3, xFlags, 1 << F_ZF); \ + OR(tmp1, tmp1, tmp3); \ + ANDI(tmp1, tmp1, (1 << F_SF) | (1 << F_ZF)), NEZ, EQZ, LE, GT, X_SF | X_OF | X_ZF, X64_JMP_JG); \ break +// Dummy macros +#define B__safe(a, b, c) XOR(xZR, xZR, xZR) +#define B_(a, b, c) XOR(xZR, xZR, xZR) + +#define NATIVEJUMP_safe(COND, val) \ + B##COND##_safe(dyn->insts[ninst].nat_flags_op1, dyn->insts[ninst].nat_flags_op2, val); + +#define NATIVEJUMP(COND, val) \ + B##COND(dyn->insts[ninst].nat_flags_op1, dyn->insts[ninst].nat_flags_op2, val); + #define NOTEST(s1) \ if (box64_dynarec_test) { \ ST_W(xZR, xEmu, offsetof(x64emu_t, test.test)); \ diff --git a/src/dynarec/la64/dynarec_la64_pass0.h b/src/dynarec/la64/dynarec_la64_pass0.h index 3ed6c608..a35c9669 100644 --- a/src/dynarec/la64/dynarec_la64_pass0.h +++ b/src/dynarec/la64/dynarec_la64_pass0.h @@ -10,11 +10,25 @@ dyn->insts[ninst].x64.use_flags = A; \ dyn->f.dfnone = 1; \ dyn->f.pending = SF_SET -#define SETFLAGS(A, B) \ - dyn->insts[ninst].x64.set_flags = A; \ - dyn->insts[ninst].x64.state_flags = B; \ - dyn->f.pending = (B) & SF_SET_PENDING; \ - dyn->f.dfnone = ((B) & SF_SET) ? 1 : 0; + +#define READFLAGS_FUSION(A, s1, s2, s3, s4, s5) \ + if (box64_dynarec_nativeflags && ninst > 0 && !dyn->insts[ninst - 1].nat_flags_nofusion) { \ + if ((A) == (X_ZF)) \ + dyn->insts[ninst].nat_flags_fusion = 1; \ + else if (dyn->insts[ninst - 1].nat_flags_carry && ((A) == (X_CF) || (A) == (X_CF | X_ZF))) \ + dyn->insts[ninst].nat_flags_fusion = 1; \ + else if (dyn->insts[ninst - 1].nat_flags_sign && ((A) == (X_SF | X_OF) || (A) == (X_SF | X_OF | X_ZF))) \ + dyn->insts[ninst].nat_flags_fusion = 1; \ + } \ + READFLAGS(A); + +#define SETFLAGS(A, B, FUSION) \ + dyn->insts[ninst].x64.set_flags = A; \ + dyn->insts[ninst].x64.state_flags = (B) & ~SF_DF; \ + dyn->f.pending = (B) & SF_SET_PENDING; \ + dyn->f.dfnone = ((B) & SF_SET) ? (((B) == SF_SET_NODF) ? 0 : 1) : 0; \ + dyn->insts[ninst].nat_flags_nofusion = (FUSION) + #define EMIT(A) dyn->native_size += 4 #define JUMP(A, C) add_jump(dyn, ninst); add_next(dyn, (uintptr_t)A); SMEND(); dyn->insts[ninst].x64.jmp = A; dyn->insts[ninst].x64.jmp_cond = C; dyn->insts[ninst].x64.jmp_insts = 0 #define BARRIER(A) \ diff --git a/src/dynarec/la64/dynarec_la64_private.h b/src/dynarec/la64/dynarec_la64_private.h index 4f57d366..49951c3e 100644 --- a/src/dynarec/la64/dynarec_la64_private.h +++ b/src/dynarec/la64/dynarec_la64_private.h @@ -93,6 +93,13 @@ typedef struct instruction_la64_s { uint8_t last_write; uint8_t lock; uint8_t df_notneeded; + uint8_t nat_flags_fusion:1; + uint8_t nat_flags_nofusion:1; + uint8_t nat_flags_carry:1; + uint8_t nat_flags_sign:1; + uint8_t nat_flags_needsign:1; + uint8_t nat_flags_op1; + uint8_t nat_flags_op2; flagcache_t f_exit; // flags status at end of instruction lsxcache_t lsx; // lsxcache at end of instruction (but before poping) flagcache_t f_entry; // flags status before the instruction begin diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h index cb5393a4..2d48879c 100644 --- a/src/dynarec/la64/la64_emitter.h +++ b/src/dynarec/la64/la64_emitter.h @@ -568,6 +568,11 @@ f24-f31 fs0-fs7 Static registers Callee // PC = PC + SignExtend({imm21, 2'b0}, GRLEN) #define BNEZ(rj, imm23) EMIT(type_1RI21(0b010001, ((imm23) >> 2), rj)) +#define BGT(rj, rd, imm13) BLT(rd, rj, imm13) +#define BLE(rj, rd, imm13) BGE(rd, rj, imm13) +#define BGTU(rj, rd, imm13) BLTU(rd, rj, imm13) +#define BLEU(rj, rd, imm13) BGEU(rd, rj, imm13) + #define BCEQZ(cj, imm23) EMIT(type_1RI21(0b010010, ((imm23) >> 2), 0b00000 | cj)) #define BCNEZ(cj, imm23) EMIT(type_1RI21(0b010010, ((imm23) >> 2), 0b01000 | cj)) @@ -583,7 +588,7 @@ f24-f31 fs0-fs7 Static registers Callee #define B__(reg1, reg2, imm28) B(imm28) #define BEQ_safe(rj, rd, imm) \ - if { \ + do { \ if ((imm) > -0x20000 && (imm) < 0x20000) { \ BEQ(rj, rd, imm); \ NOP(); \ @@ -591,8 +596,7 @@ f24-f31 fs0-fs7 Static registers Callee BNE(rj, rd, 8); \ B(imm - 4); \ } \ - } \ - while (0) + } while (0) #define BNE_safe(rj, rd, imm) \ do { \ @@ -649,6 +653,50 @@ f24-f31 fs0-fs7 Static registers Callee } \ } while (0) +#define BGT_safe(rj, rd, imm) \ + do { \ + if ((imm) > -0x20000 && (imm) < 0x20000) { \ + BGT(rj, rd, imm); \ + NOP(); \ + } else { \ + BLE(rj, rd, 8); \ + B(imm - 4); \ + } \ + } while (0) + +#define BLE_safe(rj, rd, imm) \ + do { \ + if ((imm) > -0x20000 && (imm) < 0x20000) { \ + BLE(rj, rd, imm); \ + NOP(); \ + } else { \ + BGT(rj, rd, 8); \ + B(imm - 4); \ + } \ + } while (0) + +#define BGTU_safe(rj, rd, imm) \ + do { \ + if ((imm) > -0x20000 && (imm) < 0x20000) { \ + BGTU(rj, rd, imm); \ + NOP(); \ + } else { \ + BLEU(rj, rd, 8); \ + B(imm - 4); \ + } \ + } while (0) + +#define BLEU_safe(rj, rd, imm) \ + do { \ + if ((imm) > -0x20000 && (imm) < 0x20000) { \ + BLEU(rj, rd, imm); \ + NOP(); \ + } else { \ + BGTU(rj, rd, 8); \ + B(imm - 4); \ + } \ + } while (0) + #define BEQZ_safe(rj, imm) \ do { \ if ((imm) > -0x70000 && (imm) < 0x70000) { \ diff --git a/src/dynarec/la64/la64_printer.c b/src/dynarec/la64/la64_printer.c index bc314bd6..62f8ffdb 100644 --- a/src/dynarec/la64/la64_printer.c +++ b/src/dynarec/la64/la64_printer.c @@ -432,27 +432,27 @@ const char* la64_print(uint32_t opcode, uintptr_t addr) return buff; } if (isMask(opcode, "010110iiiiiiiiiiiiiiiijjjjjddddd", &a)) { - snprintf(buff, sizeof(buff), "%-15s %s, %s, %ld", "BEQ", Xt[Rd], Xt[Rj], signExtend(imm << 2, 18)); + snprintf(buff, sizeof(buff), "%-15s %s, %s, %ld", "BEQ", Xt[Rj], Xt[Rd], signExtend(imm << 2, 18)); return buff; } if (isMask(opcode, "010111iiiiiiiiiiiiiiiijjjjjddddd", &a)) { - snprintf(buff, sizeof(buff), "%-15s %s, %s, %ld", "BNE", Xt[Rd], Xt[Rj], signExtend(imm << 2, 18)); + snprintf(buff, sizeof(buff), "%-15s %s, %s, %ld", "BNE", Xt[Rj], Xt[Rd], signExtend(imm << 2, 18)); return buff; } if (isMask(opcode, "011000iiiiiiiiiiiiiiiijjjjjddddd", &a)) { - snprintf(buff, sizeof(buff), "%-15s %s, %s, %ld", "BLT", Xt[Rd], Xt[Rj], signExtend(imm << 2, 18)); + snprintf(buff, sizeof(buff), "%-15s %s, %s, %ld", "BLT", Xt[Rj], Xt[Rd], signExtend(imm << 2, 18)); return buff; } if (isMask(opcode, "011001iiiiiiiiiiiiiiiijjjjjddddd", &a)) { - snprintf(buff, sizeof(buff), "%-15s %s, %s, %ld", "BGE", Xt[Rd], Xt[Rj], signExtend(imm << 2, 18)); + snprintf(buff, sizeof(buff), "%-15s %s, %s, %ld", "BGE", Xt[Rj], Xt[Rd], signExtend(imm << 2, 18)); return buff; } if (isMask(opcode, "011010iiiiiiiiiiiiiiiijjjjjddddd", &a)) { - snprintf(buff, sizeof(buff), "%-15s %s, %s, %ld", "BLTU", Xt[Rd], Xt[Rj], signExtend(imm << 2, 18)); + snprintf(buff, sizeof(buff), "%-15s %s, %s, %ld", "BLTU", Xt[Rj], Xt[Rd], signExtend(imm << 2, 18)); return buff; } if (isMask(opcode, "011011iiiiiiiiiiiiiiiijjjjjddddd", &a)) { - snprintf(buff, sizeof(buff), "%-15s %s, %s, %ld", "BGEU", Xt[Rd], Xt[Rj], signExtend(imm << 2, 18)); + snprintf(buff, sizeof(buff), "%-15s %s, %s, %ld", "BGEU", Xt[Rj], Xt[Rd], signExtend(imm << 2, 18)); return buff; } if (isMask(opcode, "010000iiiiiiiiiiiiiiiijjjjjuuuuu", &a)) { diff --git a/src/dynarec/rv64/dynarec_rv64_emit_tests.c b/src/dynarec/rv64/dynarec_rv64_emit_tests.c index 6783079b..b122736c 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_tests.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_tests.c @@ -405,10 +405,12 @@ void emit_test32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int } NAT_FLAGS_ENABLE_SIGN(); - if (dyn->insts[ninst].nat_flags_fusion && dyn->insts[ninst].nat_flags_needsign) { - if (!rex.w) { + if (dyn->insts[ninst].nat_flags_fusion && !rex.w) { + if (dyn->insts[ninst].nat_flags_needsign) { SLLI(s3, s3, 32); SRAI(s3, s3, 32); + } else { + ZEROUP(s3); } } } @@ -451,10 +453,12 @@ void emit_test32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, } NAT_FLAGS_ENABLE_SIGN(); - if (dyn->insts[ninst].nat_flags_fusion && dyn->insts[ninst].nat_flags_needsign) { - if (!rex.w) { + if (dyn->insts[ninst].nat_flags_fusion && !rex.w) { + if (dyn->insts[ninst].nat_flags_needsign) { SLLI(s3, s3, 32); SRAI(s3, s3, 32); + } else { + ZEROUP(s3); } } } diff --git a/src/tools/rcfile.c b/src/tools/rcfile.c index 247d821e..5cbd421c 100644 --- a/src/tools/rcfile.c +++ b/src/tools/rcfile.c @@ -768,7 +768,7 @@ void internal_ApplyParams(const char* name, const my_params_t* param) { box64_dynarec_x87double = 1; box64_dynarec_div0 = 1; box64_dynarec_callret = 0; - #ifdef RV64 + #if defined(RV64) || defined(LA64) box64_dynarec_nativeflags = 0; #endif } |