diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2024-10-29 13:09:51 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2024-10-29 13:09:51 +0100 |
| commit | d99d18759bf7d1e2e83b5c263f7f915ec8e7b7d0 (patch) | |
| tree | 2c47bdb449c93f605849d2e78e4e09afd6ab5a13 /src | |
| parent | 68c3be3e16e529521e35c7852820db5ee251281b (diff) | |
| download | box64-d99d18759bf7d1e2e83b5c263f7f915ec8e7b7d0.tar.gz box64-d99d18759bf7d1e2e83b5c263f7f915ec8e7b7d0.zip | |
[ARM64_DYNAREC] Also use Native Carry flags directly when possible
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/arm64_emitter.h | 4 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_660f.c | 4 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_67.c | 4 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_emit_logic.c | 126 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_emit_math.c | 156 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_emit_tests.c | 48 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_f30f.c | 4 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_functions.c | 242 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_functions.h | 2 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.c | 50 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.h | 37 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_pass0.h | 10 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_private.h | 13 |
13 files changed, 443 insertions, 257 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index f5f0cc96..ae337ac6 100644 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -820,9 +820,9 @@ int convert_bitmask(uint64_t bitmask); #define MRS_gen(L, o0, op1, CRn, CRm, op2, Rt) (0b1101010100<<22 | (L)<<21 | 1<<20 | (o0)<<19 | (op1)<<16 | (CRn)<<12 | (CRm)<<8 | (op2)<<5 | (Rt)) // mrs x0, nzcv : 1101010100 1 1 1 011 0100 0010 000 00000 o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0010(2) op2=0 // MRS : from System register -#define MRS_nzvc(Rt) EMIT(MRS_gen(1, 1, 3, 4, 2, 0, Rt)) +#define MRS_nzcv(Rt) EMIT(MRS_gen(1, 1, 3, 4, 2, 0, Rt)) // MSR : to System register -#define MSR_nzvc(Rt) FEMIT(MRS_gen(0, 1, 3, 4, 2, 0, Rt)) +#define MSR_nzcv(Rt) FEMIT(MRS_gen(0, 1, 3, 4, 2, 0, Rt)) // mrs x0, fpcr : 1101010100 1 1 1 011 0100 0100 000 00000 o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0100(4) op2=0 #define MRS_fpcr(Rt) EMIT(MRS_gen(1, 1, 3, 4, 4, 0, Rt)) #define MSR_fpcr(Rt) EMIT(MRS_gen(0, 1, 3, 4, 4, 0, Rt)) diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index 2ddb2212..41377f4d 100644 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -923,9 +923,9 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n SETFLAGS(X_CF, SF_SUBSET); GETED(0); GETGD; - MRS_nzvc(x3); + MRS_nzcv(x3); BFIx(x3, xFlags, 29, 1); // set C - MSR_nzvc(x3); // load CC into ARM CF + MSR_nzcv(x3); // load CC into ARM CF IFX(X_CF) { ADCSxw_REG(gd, gd, ed); CSETw(x3, cCS); diff --git a/src/dynarec/arm64/dynarec_arm64_67.c b/src/dynarec/arm64/dynarec_arm64_67.c index 94202ba1..358825c1 100644 --- a/src/dynarec/arm64/dynarec_arm64_67.c +++ b/src/dynarec/arm64/dynarec_arm64_67.c @@ -234,10 +234,10 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SETFLAGS(X_OF, SF_SUBSET); GETED32(0); GETGD; - MRS_nzvc(x3); + MRS_nzcv(x3); LSRw(x4, xFlags, F_OF); BFIx(x3, x4, 29, 1); // set C - MSR_nzvc(x3); // load CC into ARM CF + MSR_nzcv(x3); // load CC into ARM CF IFX(X_OF) { ADCSxw_REG(gd, gd, ed); CSETw(x3, cCS); diff --git a/src/dynarec/arm64/dynarec_arm64_emit_logic.c b/src/dynarec/arm64/dynarec_arm64_emit_logic.c index 5b9a08f2..8e6b9bff 100644 --- a/src/dynarec/arm64/dynarec_arm64_emit_logic.c +++ b/src/dynarec/arm64/dynarec_arm64_emit_logic.c @@ -38,18 +38,21 @@ void emit_or32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, IFX(X_ZF) need_tst = 1; IFXNATIVE(X_SF, NF_SF) need_tst = 1; IFXNATIVE(X_OF, NF_VF) need_tst = 1; + IFXNATIVE(X_CF, NF_CF) need_tst = 1; if(need_tst) TSTxw_REG(s1, s1); - IFX(X_CF | X_AF | X_OF) { - IFNATIVE(NF_VF) { - IFX(X_CF | X_AF) { - MOV32w(s3, (1<<F_CF)|(1<<F_AF)); - BICw(xFlags, xFlags, s3); - } - } else { - MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF)); - BICw(xFlags, xFlags, s3); + IFX(X_CF) { + IFNATIVE(NF_CF) {} else { + BFCw(xFlags, F_CF, 1); + } + } + IFX(X_OF) { + IFNATIVE(NF_VF) {} else { + BFCw(xFlags, F_OF, 1); } } + IFX(X_AF) { + BFCw(xFlags, F_AF, 1); + } IFX(X_ZF) { IFNATIVE(NF_EQ) {} else { CSETw(s3, cEQ); @@ -89,18 +92,21 @@ void emit_or32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int IFX(X_ZF) need_tst = 1; IFXNATIVE(X_SF, NF_SF) need_tst = 1; IFXNATIVE(X_OF, NF_VF) need_tst = 1; + IFXNATIVE(X_CF, NF_CF) need_tst = 1; if(need_tst) TSTxw_REG(s1, s1); - IFX(X_CF | X_AF | X_OF) { - IFNATIVE(NF_VF) { - IFX(X_CF | X_AF) { - MOV32w(s3, (1<<F_CF)|(1<<F_AF)); - BICw(xFlags, xFlags, s3); - } - } else { - MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF)); - BICw(xFlags, xFlags, s3); + IFX(X_CF) { + IFNATIVE(NF_CF) {} else { + BFCw(xFlags, F_CF, 1); } } + IFX(X_OF) { + IFNATIVE(NF_VF) {} else { + BFCw(xFlags, F_OF, 1); + } + } + IFX(X_AF) { + BFCw(xFlags, F_AF, 1); + } IFX(X_ZF) { IFNATIVE(NF_EQ) {} else { CSETw(s3, cEQ); @@ -135,18 +141,21 @@ void emit_xor32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 IFX(X_ZF) need_tst = 1; IFXNATIVE(X_SF, NF_SF) need_tst = 1; IFXNATIVE(X_OF, NF_VF) need_tst = 1; + IFXNATIVE(X_CF, NF_CF) need_tst = 1; if(need_tst) TSTxw_REG(s1, s1); - IFX(X_CF | X_AF | X_OF) { - IFNATIVE(NF_VF) { - IFX(X_CF | X_AF) { - MOV32w(s3, (1<<F_CF)|(1<<F_AF)); - BICw(xFlags, xFlags, s3); - } - } else { - MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF)); - BICw(xFlags, xFlags, s3); + IFX(X_CF) { + IFNATIVE(NF_CF) {} else { + BFCw(xFlags, F_CF, 1); + } + } + IFX(X_OF) { + IFNATIVE(NF_VF) {} else { + BFCw(xFlags, F_OF, 1); } } + IFX(X_AF) { + BFCw(xFlags, F_AF, 1); + } IFX(X_ZF) { IFNATIVE(NF_EQ) {} else { CSETw(s3, cEQ); @@ -186,18 +195,21 @@ void emit_xor32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in IFX(X_ZF) need_tst = 1; IFXNATIVE(X_SF, NF_SF) need_tst = 1; IFXNATIVE(X_OF, NF_VF) need_tst = 1; + IFXNATIVE(X_CF, NF_CF) need_tst = 1; if(need_tst) TSTxw_REG(s1, s1); - IFX(X_CF | X_AF | X_OF) { - IFNATIVE(NF_VF) { - IFX(X_CF | X_AF) { - MOV32w(s3, (1<<F_CF)|(1<<F_AF)); - BICw(xFlags, xFlags, s3); - } - } else { - MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF)); - BICw(xFlags, xFlags, s3); + IFX(X_CF) { + IFNATIVE(NF_CF) {} else { + BFCw(xFlags, F_CF, 1); + } + } + IFX(X_OF) { + IFNATIVE(NF_VF) {} else { + BFCw(xFlags, F_OF, 1); } } + IFX(X_AF) { + BFCw(xFlags, F_AF, 1); + } IFX(X_ZF) { IFNATIVE(NF_EQ) {} else { CSETw(s3, cEQ); @@ -224,7 +236,7 @@ void emit_and32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 } else IFX(X_ALL) { SET_DFNONE(s4); } - IFX(X_ZF|X_SF) { + IFX(X_ZF|X_SF|X_CF|X_OF) { ANDSxw_REG(s1, s1, s2); } else { ANDxw_REG(s1, s1, s2); @@ -232,17 +244,19 @@ void emit_and32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 IFX(X_PEND) { STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); } - IFX(X_CF | X_AF | X_OF) { - IFNATIVE(NF_VF) { - IFX(X_CF | X_AF) { - MOV32w(s3, (1<<F_CF)|(1<<F_AF)); - BICw(xFlags, xFlags, s3); - } - } else { - MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF)); - BICw(xFlags, xFlags, s3); + IFX(X_CF) { + IFNATIVE(NF_CF) {} else { + BFCw(xFlags, F_CF, 1); } } + IFX(X_OF) { + IFNATIVE(NF_VF) {} else { + BFCw(xFlags, F_OF, 1); + } + } + IFX(X_AF) { + BFCw(xFlags, F_AF, 1); + } IFX(X_ZF) { IFNATIVE(NF_EQ) {} else { CSETw(s3, cEQ); @@ -286,17 +300,19 @@ void emit_and32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in IFX(X_PEND) { STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); } - IFX(X_CF | X_AF | X_OF) { - IFNATIVE(NF_VF) { - IFX(X_CF | X_AF) { - MOV32w(s3, (1<<F_CF)|(1<<F_AF)); - BICw(xFlags, xFlags, s3); - } - } else { - MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF)); - BICw(xFlags, xFlags, s3); + IFX(X_CF) { + IFNATIVE(NF_CF) {} else { + BFCw(xFlags, F_CF, 1); + } + } + IFX(X_OF) { + IFNATIVE(NF_VF) {} else { + BFCw(xFlags, F_OF, 1); } } + IFX(X_AF) { + BFCw(xFlags, F_AF, 1); + } IFX(X_ZF) { IFNATIVE(NF_EQ) {} else { CSETw(s3, cEQ); diff --git a/src/dynarec/arm64/dynarec_arm64_emit_math.c b/src/dynarec/arm64/dynarec_arm64_emit_math.c index ac927c72..4478b1ab 100644 --- a/src/dynarec/arm64/dynarec_arm64_emit_math.c +++ b/src/dynarec/arm64/dynarec_arm64_emit_math.c @@ -57,8 +57,10 @@ void emit_add32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 } } IFX(X_CF) { - CSETw(s4, cCS); - BFIw(xFlags, s4, F_CF, 1); + IFNATIVE(NF_CF) {} else { + CSETw(s4, cCS); + BFIw(xFlags, s4, F_CF, 1); + } } IFX(X_OF) { IFNATIVE(NF_VF) {} else { @@ -135,8 +137,10 @@ void emit_add32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in } } IFX(X_CF) { - CSETw(s4, cCS); - BFIw(xFlags, s4, F_CF, 1); + IFNATIVE(NF_CF) {} else { + CSETw(s4, cCS); + BFIw(xFlags, s4, F_CF, 1); + } } IFX(X_OF) { IFNATIVE(NF_VF) {} else { @@ -192,8 +196,12 @@ void emit_sub32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 } IFX(X_CF) { // inverted carry - CSETw(s4, cCC); - BFIw(xFlags, s4, F_CF, 1); + IFNATIVE(NF_CF) { + GEN_INVERTED_CARRY(); + } else { + CSETw(s4, cCC); + BFIw(xFlags, s4, F_CF, 1); + } } IFX(X_OF) { IFNATIVE(NF_VF) {} else { @@ -271,8 +279,12 @@ void emit_sub32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in } IFX(X_CF) { // inverted carry - CSETw(s4, cCC); - BFIw(xFlags, s4, F_CF, 1); + IFNATIVE(NF_CF) { + GEN_INVERTED_CARRY(); + } else { + CSETw(s4, cCC); + BFIw(xFlags, s4, F_CF, 1); + } } IFX(X_OF) { IFNATIVE(NF_VF) {} else { @@ -980,9 +992,21 @@ void emit_adc32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 IFX(X_AF) { MOVxw_REG(s4, s1); } - MRS_nzvc(s3); - BFIx(s3, xFlags, 29, 1); // set C - MSR_nzvc(s3); // load CC into ARM CF + IFNATIVE_BEFORE(NF_CF) { + if(INVERTED_CARRY_BEFORE) { + if(arm64_flagm) + CFINV(); + else { + MRS_nzcv(s3); + EORx_mask(s3, s3, 1, 35, 0); //mask=1<<NZCV_C + MSR_nzcv(s3); + } + } + } else { + MRS_nzcv(s3); + BFIx(s3, xFlags, 29, 1); // set C + MSR_nzcv(s3); // load CC into ARM CF + } IFX(X_ZF|X_CF|X_OF|X_SF) { ADCSxw_REG(s1, s1, s2); } else { @@ -1006,8 +1030,10 @@ void emit_adc32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 } } IFX(X_CF) { - CSETw(s3, cCS); - BFIw(xFlags, s3, F_CF, 1); + IFNATIVE(NF_CF) {} else { + CSETw(s3, cCS); + BFIw(xFlags, s3, F_CF, 1); + } } IFX(X_OF) { IFNATIVE(NF_VF) {} else { @@ -1110,9 +1136,21 @@ void emit_adc8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) IFX(X_AF | X_OF) { MOVw_REG(s4, s1); } - MRS_nzvc(s3); - BFIx(s3, xFlags, 29, 1); // set C - MSR_nzvc(s3); // load CC into ARM CF + IFNATIVE_BEFORE(NF_CF) { + if(INVERTED_CARRY_BEFORE) { + if(arm64_flagm) + CFINV(); + else { + MRS_nzcv(s3); + EORx_mask(s3, s3, 1, 35, 0); //mask=1<<NZCV_C + MSR_nzcv(s3); + } + } + } else { + MRS_nzcv(s3); + BFIx(s3, xFlags, 29, 1); // set C + MSR_nzcv(s3); // load CC into ARM CF + } ADCw_REG(s1, s1, s2); IFX(X_PEND) { STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); @@ -1164,9 +1202,21 @@ void emit_adc16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) IFX(X_AF | X_OF) { MOVw_REG(s4, s1); } - MRS_nzvc(s3); - BFIx(s3, xFlags, 29, 1); // set C - MSR_nzvc(s3); // load CC into ARM CF + IFNATIVE_BEFORE(NF_CF) { + if(INVERTED_CARRY_BEFORE) { + if(arm64_flagm) + CFINV(); + else { + MRS_nzcv(s3); + EORx_mask(s3, s3, 1, 35, 0); //mask=1<<NZCV_C + MSR_nzcv(s3); + } + } + } else { + MRS_nzcv(s3); + BFIx(s3, xFlags, 29, 1); // set C + MSR_nzcv(s3); // load CC into ARM CF + } ADCw_REG(s1, s1, s2); IFX(X_PEND) { STRw_U12(s1, xEmu, offsetof(x64emu_t, res)); @@ -1274,10 +1324,22 @@ void emit_sbb32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 } else IFX(X_ALL) { SET_DFNONE(s3); } - EORw_mask(s4, xFlags, 0, 0); // invert CC because it's reverted for SUB on ARM - MRS_nzvc(s3); - BFIx(s3, s4, 29, 1); // set C - MSR_nzvc(s3); // load CC into ARM CF + IFNATIVE_BEFORE(NF_CF) { + if(!INVERTED_CARRY_BEFORE) { + if(arm64_flagm) + CFINV(); + else { + MRS_nzcv(s3); + EORx_mask(s3, s3, 1, 35, 0); //mask=1<<NZCV_C + MSR_nzcv(s3); + } + } + } else { + EORw_mask(s4, xFlags, 0, 0); // invert CC because it's reverted for SUB on ARM + MRS_nzcv(s3); + BFIx(s3, s4, 29, 1); // set C + MSR_nzcv(s3); // load CC into ARM CF + } IFX(X_AF) { MVNxw_REG(s4, s1); } @@ -1305,8 +1367,12 @@ void emit_sbb32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 } IFX(X_CF) { // Inverted carry - CSETw(s3, cCC); - BFIw(xFlags, s3, F_CF, 1); + IFNATIVE(NF_CF) { + GEN_INVERTED_CARRY(); + } else { + CSETw(s3, cCC); + BFIw(xFlags, s3, F_CF, 1); + } } IFX(X_OF) { IFNATIVE(NF_VF) {} else { @@ -1408,10 +1474,22 @@ void emit_sbb8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) } else IFX(X_ALL) { SET_DFNONE(s3); } - EORw_mask(s4, xFlags, 0, 0); // invert CC because it's reverted for SUB on ARM - MRS_nzvc(s3); - BFIx(s3, s4, 29, 1); // set C - MSR_nzvc(s3); // load CC into ARM CF + IFNATIVE_BEFORE(NF_CF) { + if(!INVERTED_CARRY_BEFORE) { + if(arm64_flagm) + CFINV(); + else { + MRS_nzcv(s3); + EORx_mask(s3, s3, 1, 35, 0); //mask=1<<NZCV_C + MSR_nzcv(s3); + } + } + } else { + EORw_mask(s4, xFlags, 0, 0); // invert CC because it's reverted for SUB on ARM + MRS_nzcv(s3); + BFIx(s3, s4, 29, 1); // set C + MSR_nzcv(s3); // load CC into ARM CF + } IFX(X_AF|X_OF|X_CF) { MVNw_REG(s4, s1); } @@ -1463,10 +1541,22 @@ void emit_sbb16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) } else IFX(X_ALL) { SET_DFNONE(s3); } - EORw_mask(s4, xFlags, 0, 0); // invert CC because it's reverted for SUB on ARM - MRS_nzvc(s3); - BFIx(s3, s4, 29, 1); // set C, bit 29 - MSR_nzvc(s3); // load CC into ARM CF + IFNATIVE_BEFORE(NF_CF) { + if(!INVERTED_CARRY_BEFORE) { + if(arm64_flagm) + CFINV(); + else { + MRS_nzcv(s3); + EORx_mask(s3, s3, 1, 35, 0); //mask=1<<NZCV_C + MSR_nzcv(s3); + } + } + } else { + EORw_mask(s4, xFlags, 0, 0); // invert CC because it's reverted for SUB on ARM + MRS_nzcv(s3); + BFIx(s3, s4, 29, 1); // set C, bit 29 + MSR_nzcv(s3); // load CC into ARM CF + } IFX(X_AF|X_OF|X_CF) { MVNw_REG(s4, s1); } diff --git a/src/dynarec/arm64/dynarec_arm64_emit_tests.c b/src/dynarec/arm64/dynarec_arm64_emit_tests.c index 51c9f1bf..1b5184f4 100644 --- a/src/dynarec/arm64/dynarec_arm64_emit_tests.c +++ b/src/dynarec/arm64/dynarec_arm64_emit_tests.c @@ -54,8 +54,12 @@ void emit_cmp32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 } IFX(X_CF) { // inverted carry - CSETw(s4, cCC); - BFIw(xFlags, s4, F_CF, 1); + IFNATIVE(NF_CF) { + GEN_INVERTED_CARRY(); + } else { + CSETw(s4, cCC); + BFIw(xFlags, s4, F_CF, 1); + } } IFX(X_OF) { IFNATIVE(NF_VF) {} else { @@ -89,17 +93,21 @@ void emit_cmp32_0(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int SUBSxw_U12(s3, s1, 0); // res = s1 - 0 // and now the tricky ones (and mostly unused), PF and AF // bc = (res & (~d | s)) | (~d & s) => is 0 here... - IFX(X_OF|X_AF|X_CF) { - IFXNATIVE(X_OF, NF_VF) { - IFX(X_AF|X_CF) { - MOV32w(s4, (1<<F_CF)|(1<<F_AF)); - BICw(xFlags, xFlags, s4); - } + IFX(X_CF) { + IFNATIVE(NF_CF) { + GEN_INVERTED_CARRY(); } else { - MOV32w(s4, (1<<F_OF)|(1<<F_AF)|(1<<F_CF)); - BICw(xFlags, xFlags, s4); + BFCw(xFlags, F_CF, 1); } } + IFX(X_OF) { + IFNATIVE(NF_VF) {} else { + BFCw(xFlags, F_OF, 1); + } + } + IFX(X_AF) { + BFCw(xFlags, F_AF, 1); + } IFX(X_ZF) { IFNATIVE(NF_EQ) {} else { CSETw(s4, cEQ); @@ -253,17 +261,19 @@ void emit_test32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s } else { SET_DFNONE(s4); } - IFX(X_CF | X_AF | X_OF) { - IFXNATIVE(X_OF, NF_VF) { - IFX(X_AF|X_CF) { - MOV32w(s3, (1<<F_CF)|(1<<F_AF)); - BICw(xFlags, xFlags, s3); - } - } else { - MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF)); - BICw(xFlags, xFlags, s3); + IFX(X_CF) { + IFNATIVE(NF_CF) {} else { + BFCw(xFlags, F_CF, 1); } } + IFX(X_OF) { + IFNATIVE(NF_VF) {} else { + BFCw(xFlags, F_OF, 1); + } + } + IFX(X_AF) { + BFCw(xFlags, F_AF, 1); + } ANDSxw_REG(s3, s1, s2); // res = s1 & s2 IFX_PENDOR0 { STRxw_U12(s3, xEmu, offsetof(x64emu_t, res)); diff --git a/src/dynarec/arm64/dynarec_arm64_f30f.c b/src/dynarec/arm64/dynarec_arm64_f30f.c index ce73f08f..752595fd 100644 --- a/src/dynarec/arm64/dynarec_arm64_f30f.c +++ b/src/dynarec/arm64/dynarec_arm64_f30f.c @@ -184,10 +184,10 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n SETFLAGS(X_OF, SF_SUBSET); GETED(0); GETGD; - MRS_nzvc(x3); + MRS_nzcv(x3); LSRw(x4, xFlags, F_OF); BFIx(x3, x4, 29, 1); // set C - MSR_nzvc(x3); // load CC into ARM CF + MSR_nzcv(x3); // load CC into ARM CF IFX(X_OF) { ADCSxw_REG(gd, gd, ed); CSETw(x3, cCS); diff --git a/src/dynarec/arm64/dynarec_arm64_functions.c b/src/dynarec/arm64/dynarec_arm64_functions.c index bc41ebd9..e2456ee6 100644 --- a/src/dynarec/arm64/dynarec_arm64_functions.c +++ b/src/dynarec/arm64/dynarec_arm64_functions.c @@ -686,6 +686,14 @@ void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t r } if(dyn->insts[ninst].use_nat_flags || dyn->insts[ninst].set_nat_flags || dyn->insts[ninst].need_nat_flags) printf_log(LOG_NONE, " nf:%hhx/%hhx/%hhx", dyn->insts[ninst].set_nat_flags, dyn->insts[ninst].use_nat_flags, dyn->insts[ninst].need_nat_flags); + if(dyn->insts[ninst].invert_carry) + printf_log(LOG_NONE, " CI"); + if(dyn->insts[ninst].gen_inverted_carry) + printf_log(LOG_NONE, " gic"); + if(dyn->insts[ninst].before_nat_flags&NF_CF) + printf_log(LOG_NONE, " %ccb", dyn->insts[ninst].normal_carry_before?'n':'i'); + if(dyn->insts[ninst].need_nat_flags&NF_CF) + printf_log(LOG_NONE, " %cc", dyn->insts[ninst].normal_carry?'n':'i'); if(dyn->insts[ninst].pred_sz) { dynarec_log(LOG_NONE, ", pred="); for(int ii=0; ii<dyn->insts[ninst].pred_sz; ++ii) @@ -806,15 +814,18 @@ int fpu_is_st_freed(dynarec_native_t* dyn, int ninst, int st) } -uint8_t mark_natflag(dynarec_arm_t* dyn, int ninst, uint8_t flag) +uint8_t mark_natflag(dynarec_arm_t* dyn, int ninst, uint8_t flag, int before) { - if(dyn->insts[ninst].x64.set_flags) { + if(dyn->insts[ninst].x64.set_flags && !before) { dyn->insts[ninst].set_nat_flags |= flag; if(dyn->insts[ninst].x64.use_flags) { dyn->insts[ninst].use_nat_flags |= flag; } } else { - dyn->insts[ninst].use_nat_flags |= flag; + if(before) + dyn->insts[ninst].use_nat_flags_before |= flag; + else + dyn->insts[ninst].use_nat_flags |= flag; } return flag; } @@ -826,6 +837,7 @@ uint8_t flag2native(uint8_t flags) if(flags&X_ZF) ret|=NF_EQ; if(flags&X_SF) ret|=NF_SF; if(flags&X_OF) ret|=NF_VF; + if(flags&X_CF) ret|=NF_CF; #else // no native flags on rv64 or la64 #endif @@ -834,140 +846,110 @@ uint8_t flag2native(uint8_t flags) int flagIsNative(uint8_t flags) { - if(flags&(X_AF|X_PF|X_CF)) return 0; + if(flags&(X_AF|X_PF)) return 0; return 1; } -static int markNativeFlags(dynarec_native_t* dyn, int ninst, uint8_t flags, int start) +static uint8_t getNativeFlagsUsed(dynarec_arm_t* dyn, int start, uint8_t flags) { - while(ninst>=0) { -//printf_log(LOG_INFO, "markNativeFlags ninst=%d, flags=%x, start=%d, nat_flags_op=%d, need_nat_flag=%x, flag_gen=%x need_before=%x need_after=%x\n", ninst, flags, start, dyn->insts[ninst].nat_flags_op, dyn->insts[ninst].need_nat_flags, dyn->insts[ninst].x64.gen_flags, flag2native(dyn->insts[ninst].x64.need_before), flag2native(dyn->insts[ninst].x64.need_after)); - // propagation already done - uint8_t flag_entry = (start && dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH)?dyn->insts[ninst].before_nat_flags:dyn->insts[ninst].need_nat_flags; - if((flag_entry&flags)==flags) return flag_entry; - // no more flag propagation - if(!start && !flag2native(dyn->insts[ninst].x64.need_after)) return flags; - // flags destroyed, cancel native flags - if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_UNUSABLE) return 0; - if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_CANCELED) return 0; - if(!flagIsNative(dyn->insts[ninst].x64.use_flags)) return 0; - if(start) { - start = 0; - flags |= flag2native(dyn->insts[ninst].x64.need_before); - } else if(dyn->insts[ninst].x64.gen_flags && (flag2native(dyn->insts[ninst].x64.gen_flags)&flags)) { - // this is the emitter of the native flags! so, is it good or not? - if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH && (dyn->insts[ninst].set_nat_flags&flags)==flags) { - dyn->insts[ninst].need_nat_flags |= flags; - if(!dyn->insts[ninst].x64.may_set) // if flags just may be set, continue! - return flags; - } else - return 0; - } - if(dyn->insts[ninst].use_nat_flags) - flags |= dyn->insts[ninst].use_nat_flags; - if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH) // can happens on operation that read and generate flags - dyn->insts[ninst].before_nat_flags |= flags; - else - dyn->insts[ninst].need_nat_flags |= flags; - flags |= flag2native(dyn->insts[ninst].x64.need_before); - if(!dyn->insts[ninst].pred_sz) + // propagate and check wich flags are actually used + uint8_t used_flags = 0; + int ninst = start; + while(ninst<dyn->size) { +//printf_log(LOG_INFO, "getNativeFlagsUsed ninst:%d/%d, flags=%x, used_flags=%x, nat_flags_op_before:%x, nat_flags_op:%x, need_after:%x\n", ninst, start, flags, used_flags, dyn->insts[ninst].nat_flags_op_before, dyn->insts[ninst].nat_flags_op, flag2native(dyn->insts[ninst].x64.need_after)); + // check if this is an opcode that generate flags but consume flags before + if(dyn->insts[ninst].nat_flags_op_before) return 0; - for(int i=1; i<dyn->insts[ninst].pred_sz; ++i) { - int ret_flags = markNativeFlags(dyn, dyn->insts[ninst].pred[i], flags, 0); - if(!ret_flags) + if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH && dyn->insts[ninst].use_nat_flags_before) + used_flags|=dyn->insts[ninst].use_nat_flags_before&flags; + // if the opcode generate flags, return + if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH && (start!=ninst)) { + if(used_flags&~dyn->insts[ninst].set_nat_flags) // check partial changes that would destroy flag state return 0; - flags|=ret_flags; + return used_flags; } - ninst = dyn->insts[ninst].pred[0]; - } - return 0; -} - -static void unmarkNativeFlags(dynarec_native_t* dyn, int ninst, int start) -{ -//printf_log(LOG_INFO, "unmarkNativeFlags ninst=%d, will check forward the real start\n", ninst); - // need to check if branch also goes forward to really start from the beggining - while((ninst<dyn->size) && dyn->insts[ninst].x64.has_next && !dyn->insts[ninst].nat_flags_op && dyn->insts[ninst+1].before_nat_flags) - ninst++; - - while(ninst>=0) { -//printf_log(LOG_INFO, "unmarkNativeFlags ninst=%d, start=%d\n", ninst, start); - // no more flag propagation - if(!start && !flag2native(dyn->insts[ninst].x64.need_after)) return; - // flags destroyed, but maybe it's be used - if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_UNUSABLE) return; - if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_CANCELED) return; - if(start) - start = 0; - else if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH) { - if(!dyn->insts[ninst].x64.may_set) { - dyn->insts[ninst].need_nat_flags = 0; - dyn->insts[ninst].nat_flags_op = NAT_FLAG_OP_CANCELED; - return; - } + // check if there is a callret barrier + if(dyn->insts[ninst].x64.has_callret) + return 0; + // check if flags are still needed + if(!(flag2native(dyn->insts[ninst].x64.need_after)&flags)) + return used_flags; + // check if flags are destroyed, cancel the use then + if(dyn->insts[ninst].nat_flags_op && (start!=ninst)) + return 0; + // check if flags are generated without native option + if((start!=ninst) && dyn->insts[ninst].x64.gen_flags && (flag2native(dyn->insts[ninst].x64.gen_flags&dyn->insts[ninst].x64.need_after)&used_flags)) { + if(used_flags&~flag2native(dyn->insts[ninst].x64.gen_flags&dyn->insts[ninst].x64.need_after)) + return 0; // partial covert, not supported for now (TODO: this might be fixable) + else + return used_flags; // full covert... End of propagation } - dyn->insts[ninst].nat_flags_op = NAT_FLAG_OP_CANCELED; - #if 0 - // check forward - if(dyn->insts[ninst].x64.has_next && dyn->insts[ninst+1].need_nat_flags) - unmarkNativeFlags(dyn, ninst+1, 1); - if(dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts!=-1) { + // update used flags + used_flags |= (flag2native(dyn->insts[ninst].x64.need_after)&flags); + // go next + if(!dyn->insts[ninst].x64.has_next) { + // check if it's a jump to an opcode with only 1 preds, then just follow the jump int jmp = dyn->insts[ninst].x64.jmp_insts; - if(dyn->insts[jmp].need_nat_flags) - unmarkNativeFlags(dyn, jmp, 1); - } - #endif - // check if stop - if(((dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH)?dyn->insts[ninst].before_nat_flags:dyn->insts[ninst].need_nat_flags)==0) - return; - if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH) // can happens on operation that read and generate flags - dyn->insts[ninst].before_nat_flags = 0; - else - dyn->insts[ninst].need_nat_flags = 0; - if(!flag2native(dyn->insts[ninst].x64.need_before)) return; - if(!dyn->insts[ninst].pred_sz) - return; - for(int i=1; i<dyn->insts[ninst].pred_sz; ++i) - unmarkNativeFlags(dyn, dyn->insts[ninst].pred[i], 0); - if(!dyn->insts[ninst].x64.has_next) - return; - ninst = dyn->insts[ninst].pred[0]; - } -} - -static void propagateNativeFlags(dynarec_native_t* dyn, int ninst) -{ - uint8_t flags = dyn->insts[ninst].use_nat_flags&flag2native(dyn->insts[ninst].x64.need_before); - uint8_t flags_after = flag2native(dyn->insts[ninst].x64.need_after); - int marked_flags = markNativeFlags(dyn, ninst, flags, 1); - if(!marked_flags) { -//printf_log(LOG_INFO, "unmarkNativeFlags ninst=%d, because marked_flags is 0\n", ninst); - unmarkNativeFlags(dyn, ninst, 1); - return; + if(dyn->insts[ninst].x64.jmp && (jmp!=-1) && (getNominalPred(dyn, jmp)==ninst)) + ninst = jmp; + else + return used_flags; + } else + ++ninst; } - uint8_t need_flags; - // check if all next have the correct flag, or if using non-native flags while native are used - if(dyn->insts[ninst].x64.has_next && (flags_after&marked_flags)) { - need_flags = dyn->insts[ninst+1].nat_flags_op?dyn->insts[ninst+1].before_nat_flags:dyn->insts[ninst+1].need_nat_flags; // native flags used - flags_after = flag2native(dyn->insts[ninst+1].x64.need_before)&~need_flags; // flags that are needs to be x86 - if((need_flags&~marked_flags) || (!need_flags && (flags_after&marked_flags))) { -//printf_log(LOG_INFO, "unmarkNativeFlags ninst=%d, because: need_flags=%hhx, flag_after=%hhx, marked_flags=%hhx\n", ninst, need_flags, flags_after, marked_flags); - unmarkNativeFlags(dyn, ninst, 1); + return used_flags; +} + +static void propagateNativeFlags(dynarec_arm_t* dyn, int start) +{ + int ninst = start; + // those are the flags generated by the opcode and used later on + uint8_t flags = dyn->insts[ninst].set_nat_flags&flag2native(dyn->insts[ninst].x64.need_after); + //check if they are actualy used before starting +//printf_log(LOG_INFO, "propagateNativeFlags called for start=%d, flags=%x, will need:%x\n", start, flags, flag2native(dyn->insts[ninst].x64.need_after)); + if(!flags) return; + // also check if some native flags are used but not genereated here + if(flag2native(dyn->insts[ninst].x64.need_after)&~flags) return; + uint8_t used_flags = getNativeFlagsUsed(dyn, start, flags); +//printf_log(LOG_INFO, " will use:%x, carry:%d, generate inverted carry:%d\n", used_flags, used_flags&NF_CF, dyn->insts[ninst].gen_inverted_carry); + if(!used_flags) return; // the flags wont be used, so just cancel + int nc = dyn->insts[ninst].gen_inverted_carry?0:1; + int carry = used_flags&NF_CF; + // propagate + while(ninst<dyn->size) { + // check if this is an opcode that generate flags but consume flags before + if((start!=ninst) && dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH) { + if(dyn->insts[ninst].use_nat_flags_before) { + dyn->insts[ninst].before_nat_flags |= used_flags; + if(carry) dyn->insts[ninst].normal_carry_before = nc; + } + // if the opcode generate flags, return return; } - } - #if 0 - // check at jump point, as native flags are not converted - int jmp = dyn->insts[ninst].x64.jmp_insts; - if(dyn->insts[ninst].x64.jmp && jmp!=-1) { - need_flags = dyn->insts[jmp].need_nat_flags; - flags_after = flag2native(dyn->insts[jmp].x64.need_before); - if(((need_flags&flags_after)!=need_flags) || (!need_flags && (flags_after&marked_flags))) { - unmarkNativeFlags(dyn, ninst, 1); + // check if flags are generated without native option + if((start!=ninst) && dyn->insts[ninst].x64.gen_flags && (flag2native(dyn->insts[ninst].x64.gen_flags&dyn->insts[ninst].x64.need_after)&used_flags)) return; - } + // mark the opcode + uint8_t use_flags = flag2native(dyn->insts[ninst].x64.need_before|dyn->insts[ninst].x64.need_after); + if(dyn->insts[ninst].x64.use_flags) use_flags |= flag2native(dyn->insts[ninst].x64.use_flags); // should not change anything +//printf_log(LOG_INFO, " marking ninst=%d with %x | %x&%x => %x\n", ninst, dyn->insts[ninst].need_nat_flags, used_flags, use_flags, dyn->insts[ninst].need_nat_flags | (used_flags&use_flags)); + dyn->insts[ninst].need_nat_flags |= used_flags&use_flags; + if(carry) dyn->insts[ninst].normal_carry = nc; + if(carry && dyn->insts[ninst].invert_carry) nc = 0; + // check if flags are still needed + if(!(flag2native(dyn->insts[ninst].x64.need_after)&used_flags)) + return; + // go next + if(!dyn->insts[ninst].x64.has_next) { + // check if it's a jump to an opcode with only 1 preds, then just follow the jump + int jmp = dyn->insts[ninst].x64.jmp_insts; + if(dyn->insts[ninst].x64.jmp && (jmp!=-1) && (getNominalPred(dyn, jmp)==ninst)) + ninst = jmp; + else + return; + } else + ++ninst; } - #endif } void updateNatveFlags(dynarec_native_t* dyn) @@ -975,8 +957,8 @@ void updateNatveFlags(dynarec_native_t* dyn) if(!box64_dynarec_nativeflags) return; // backward check if native flags are used - for(int ninst=dyn->size-1; ninst>=0; --ninst) - if(dyn->insts[ninst].use_nat_flags) { + for(int ninst=0; ninst<dyn->size; ++ninst) + if(flag2native(dyn->insts[ninst].x64.gen_flags) && (dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH)) { propagateNativeFlags(dyn, ninst); } } @@ -996,11 +978,19 @@ int nativeFlagsNeedsTransform(dynarec_arm_t* dyn, int ninst) if(dyn->insts[ninst].set_nat_flags) return 0; uint8_t flags_before = dyn->insts[ninst].need_nat_flags; + uint8_t nc_before = dyn->insts[ninst].normal_carry; + if(dyn->insts[ninst].invert_carry) + nc_before = 0; uint8_t flags_after = dyn->insts[jmp].need_nat_flags; - if(dyn->insts[jmp].nat_flags_op==NAT_FLAG_OP_TOUCH) + uint8_t nc_after = dyn->insts[jmp].normal_carry; + if(dyn->insts[jmp].nat_flags_op==NAT_FLAG_OP_TOUCH) { flags_after = dyn->insts[jmp].before_nat_flags; + nc_after = dyn->insts[jmp].normal_carry_before; + } uint8_t flags_x86 = flag2native(dyn->insts[jmp].x64.need_before); flags_x86 &= ~flags_after; + if((flags_before&NF_CF) && (flags_after&NF_CF) && (nc_before!=nc_after)) + return 1; // all flags_after should be present and none remaining flags_x86 if(((flags_before&flags_after)!=flags_after) || (flags_before&flags_x86)) return 1; diff --git a/src/dynarec/arm64/dynarec_arm64_functions.h b/src/dynarec/arm64/dynarec_arm64_functions.h index 446c1cb1..b17a5bf1 100644 --- a/src/dynarec/arm64/dynarec_arm64_functions.h +++ b/src/dynarec/arm64/dynarec_arm64_functions.h @@ -54,7 +54,7 @@ int neoncache_no_i64(dynarec_arm_t* dyn, int ninst, int st, int a); // transform x86 flags to native flags uint8_t flag2native(uint8_t flags); // mark a instruction as using/generating flags. return flag -uint8_t mark_natflag(dynarec_arm_t* dyn, int ninst, uint8_t flag); +uint8_t mark_natflag(dynarec_arm_t* dyn, int ninst, uint8_t flag, int before); // propage the use of nativeflags or not (done between step 0 and step 1) void updateNatveFlags(dynarec_arm_t* dyn); // raz arm speicifc state when an opcode is unused diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c index 38eacb2b..7a29562b 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.c +++ b/src/dynarec/arm64/dynarec_arm64_helper.c @@ -2383,7 +2383,11 @@ static void flagsCacheTransform(dynarec_arm_t* dyn, int ninst, int s1) j64 = (GETMARKF2)-(dyn->native_size); CBZw(s1, j64); } - CALL_(UpdateFlags, -1, 0); + if(dyn->insts[ninst].need_nat_flags) + MRS_nzcv(s1); + CALL_(UpdateFlags, -1, s1); + if(dyn->insts[ninst].need_nat_flags) + MSR_nzcv(s1); MARKF2; } } @@ -2395,12 +2399,18 @@ static void nativeFlagsTransform(dynarec_arm_t* dyn, int ninst, int s1, int s2) if(jmp<0) return; uint8_t flags_before = dyn->insts[ninst].need_nat_flags; + uint8_t nc_before = dyn->insts[ninst].normal_carry; + if(dyn->insts[ninst].invert_carry) + nc_before = 0; uint8_t flags_after = dyn->insts[jmp].need_nat_flags; - if(dyn->insts[jmp].nat_flags_op==NAT_FLAG_OP_TOUCH) + uint8_t nc_after = dyn->insts[jmp].normal_carry; + if(dyn->insts[jmp].nat_flags_op==NAT_FLAG_OP_TOUCH) { flags_after = dyn->insts[jmp].before_nat_flags; + nc_after = dyn->insts[jmp].normal_carry_before; + } uint8_t flags_x86 = flag2native(dyn->insts[jmp].x64.need_before); flags_x86 &= ~flags_after; - MESSAGE(LOG_DUMP, "\tFNative flags transform ---- ninst=%d -> %d %hhx -> %hhx/%hhx\n", ninst, jmp, flags_before, flags_after, flags_x86); + MESSAGE(LOG_DUMP, "\tNative Flags transform ---- ninst=%d -> %d %hhx -> %hhx/%hhx\n", ninst, jmp, flags_before, flags_after, flags_x86); // flags present in before and missing in after if((flags_before&NF_EQ) && (flags_x86&NF_EQ)) { CSETw(s1, cEQ); @@ -2414,28 +2424,50 @@ static void nativeFlagsTransform(dynarec_arm_t* dyn, int ninst, int s1, int s2) CSETw(s1, cVS); BFIw(xFlags, s1, F_OF, 1); } + if((flags_before&NF_CF) && (flags_x86&NF_CF)) { + if(nc_before) // might need to invert carry + CSETw(s1, cCS); + else + CSETw(s1, cCC); + BFIw(xFlags, s1, F_CF, 1); + } // flags missing and needed later int mrs = 0; - #define GO_MRS(A) if(!mrs) {mrs=1; MRS_nzvc(s2); } + #define GO_MRS(A) if(!mrs) {mrs=1; MRS_nzcv(s2); } if(!(flags_before&NF_EQ) && (flags_after&NF_EQ)) { GO_MRS(s2); - BFIw(s1, xFlags, F_ZF, 1); + UBFXw(s1, xFlags, F_ZF, 1); BFIx(s2, s1, NZCV_Z, 1); } if(!(flags_before&NF_SF) && (flags_after&NF_SF)) { GO_MRS(s2); - BFIw(s1, xFlags, F_SF, 1); + UBFXw(s1, xFlags, F_SF, 1); BFIx(s2, s1, NZCV_N, 1); } if(!(flags_before&NF_VF) && (flags_after&NF_VF)) { GO_MRS(s2); - BFIw(s1, xFlags, F_OF, 1); + UBFXw(s1, xFlags, F_OF, 1); BFIx(s2, s1, NZCV_V, 1); } + if(!(flags_before&NF_CF) && (flags_after&NF_CF)) { + GO_MRS(s2); + BFIx(s2, xFlags, NZCV_C, 1); // F_CF is bit 0 + if(!nc_after) + EORx_mask(s2, s2, 1, 35, 0); //mask=1<<NZCV_C + } + // special case for NF_CF changing state + if((flags_before&NF_CF) && (flags_after&NF_CF) && (nc_before!=nc_after)) { + if(arm64_flagm && !mrs) { + CFINV(); + } else { + GO_MRS(s2); + EORx_mask(s2, s2, 1, 35, 0); //mask=1<<NZCV_C + } + } #undef GL_MRS - if(mrs) MSR_nzvc(s2); + if(mrs) MSR_nzcv(s2); - MESSAGE(LOG_DUMP, "\tF---- Native flags transform\n"); + MESSAGE(LOG_DUMP, "\t---- Native Flags transform\n"); } void CacheTransform(dynarec_arm_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3) { diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index dc4ad69b..dbf04ed6 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -910,6 +910,7 @@ #ifndef IFNATIVE #define IFNATIVE(A) if(dyn->insts[ninst].need_nat_flags&(A)) #define IFNATIVEN(A) if((dyn->insts[ninst].need_nat_flags&(A))==(A)) +#define IFNATIVE_BEFORE(A) if(dyn->insts[ninst].before_nat_flags&(A)) #endif #ifndef IFX @@ -921,6 +922,16 @@ #define IFXN(A, B) if((dyn->insts[ninst].x64.gen_flags&(A) && !(dyn->insts[ninst].x64.gen_flags&(B)))) #define IFXNATIVE(X, N) if((dyn->insts[ninst].x64.gen_flags&(X)) && (dyn->insts[ninst].need_nat_flags&(N))) #endif +#ifndef INVERTED_CARRY +#define INVERTED_CARRY !dyn->insts[ninst].normal_carry +#define INVERTED_CARRY_BEFORE !dyn->insts[ninst].normal_carry_before +#endif +#ifndef GEN_INVERTED_CARRY +#define GEN_INVERTED_CARRY() +#endif +#ifndef INVERT_CARRY +#define INVERT_CARRY(A) if(dyn->insts[ninst].normal_carry) {if(arm64_flagm) CFINV(); else {MRS_nzcv(A); EORx_mask(A, A, 1, 35, 0); MSR_nzcv(A);}} +#endif // Generate FCOM with s1 and s2 scratch regs (the VCMP is already done) #define FCOM(s1, s2, s3) \ @@ -1711,13 +1722,29 @@ uintptr_t dynarec64_AVX_F3_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip break; \ case B+0x2: \ INST_NAME(T1 "C " T2); \ + IFNATIVE(NF_CF) { \ + if(INVERTED_CARRY) { \ + GO( , cCS, cCC, X_CF) \ + } else { \ + GO( , cCC, cCS, X_CF) \ + } \ + } else { \ GO( TSTw_mask(xFlags, 0, 0) \ , cEQ, cNE, X_CF) \ + } \ break; \ case B+0x3: \ INST_NAME(T1 "NC " T2); \ + IFNATIVE(NF_CF) { \ + if(INVERTED_CARRY) { \ + GO( , cCC, cCS, X_CF) \ + } else { \ + GO( , cCS, cCC, X_CF) \ + } \ + } else { \ GO( TSTw_mask(xFlags, 0, 0) \ , cNE, cEQ, X_CF) \ + } \ break; \ case B+0x4: \ INST_NAME(T1 "Z " T2); \ @@ -1739,15 +1766,25 @@ uintptr_t dynarec64_AVX_F3_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip break; \ case B+0x6: \ INST_NAME(T1 "BE " T2); \ + IFNATIVEN(NF_EQ|NF_CF) { \ + INVERT_CARRY(x1); \ + GO( , cHI, cLS, X_ZF|X_CF) \ + } else { \ GO( MOV32w(x1, (1<<F_CF)|(1<<F_ZF)); \ TSTw_REG(xFlags, x1) \ , cEQ, cNE, X_CF|X_ZF) \ + } \ break; \ case B+0x7: \ INST_NAME(T1 "NBE " T2); \ + IFNATIVEN(NF_EQ|NF_CF) { \ + INVERT_CARRY(x1); \ + GO( , cLS, cHI, X_ZF|X_CF) \ + } else { \ GO( MOV32w(x1, (1<<F_CF)|(1<<F_ZF)); \ TSTw_REG(xFlags, x1) \ , cNE, cEQ, X_CF|X_ZF) \ + } \ break; \ case B+0x8: \ INST_NAME(T1 "S " T2); \ diff --git a/src/dynarec/arm64/dynarec_arm64_pass0.h b/src/dynarec/arm64/dynarec_arm64_pass0.h index d03223ed..4429aefb 100644 --- a/src/dynarec/arm64/dynarec_arm64_pass0.h +++ b/src/dynarec/arm64/dynarec_arm64_pass0.h @@ -51,12 +51,16 @@ } #define FEMIT(A) dyn->insts[ninst].nat_flags_op = dyn->insts[ninst].x64.set_flags?NAT_FLAG_OP_TOUCH:NAT_FLAG_OP_UNUSABLE -#define IFNATIVE(A) if(mark_natflag(dyn, ninst, A)) -#define IFNATIVEN(A) if(mark_natflag(dyn, ninst, A)) +#define IFNATIVE(A) if(mark_natflag(dyn, ninst, A, 0)) +#define IFNATIVEN(A) if(mark_natflag(dyn, ninst, A, 0)) #define IFX(A) if((dyn->insts[ninst].x64.set_flags&(A))) #define IFX2(A, B) if((dyn->insts[ninst].x64.set_flags&(A)) B) #define IFX_PENDOR0 if((dyn->insts[ninst].x64.set_flags&(X_PEND) || !dyn->insts[ninst].x64.set_flags)) #define IFXX(A) if((dyn->insts[ninst].x64.set_flags==(A))) #define IFX2X(A, B) if((dyn->insts[ninst].x64.set_flags==(A) || dyn->insts[ninst].x64.set_flags==(B) || dyn->insts[ninst].x64.set_flags==((A)|(B)))) #define IFXN(A, B) if((dyn->insts[ninst].x64.set_flags&(A) && !(dyn->insts[ninst].x64.set_flags&(B)))) -#define IFXNATIVE(X, N) if((dyn->insts[ninst].x64.set_flags&(X)) && mark_natflag(dyn, ninst, N)) \ No newline at end of file +#define IFXNATIVE(X, N) if((dyn->insts[ninst].x64.set_flags&(X)) && mark_natflag(dyn, ninst, N, 0)) +#define GEN_INVERTED_CARRY() dyn->insts[ninst].gen_inverted_carry = 1 +#define IFNATIVE_BEFORE(A) if(mark_natflag(dyn, ninst, A, 1)) +#define INVERT_CARRY(A) dyn->insts[ninst].invert_carry = 1 +#define INVERT_CARRY_BEFORE(A) dyn->insts[ninst].invert_carry_before = 1 diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h index 1ea9f658..b010d4ed 100644 --- a/src/dynarec/arm64/dynarec_arm64_private.h +++ b/src/dynarec/arm64/dynarec_arm64_private.h @@ -12,6 +12,7 @@ typedef struct instsize_s instsize_t; #define NF_EQ (1<<0) #define NF_SF (1<<1) #define NF_VF (1<<2) +#define NF_CF (1<<3) // Nothing happens to the native flags #define NAT_FLAG_OP_NONE 0 @@ -109,9 +110,15 @@ typedef struct instruction_arm64_s { uint8_t last_write; uint8_t set_nat_flags; // 0 or combinaison of native flags define uint8_t use_nat_flags; // 0 or combinaison of native flags define - uint8_t nat_flags_op;// what happens to native flags here + uint8_t use_nat_flags_before; // 0 or combinaison of native flags define + uint8_t nat_flags_op:4;// what happens to native flags here + uint8_t nat_flags_op_before:4;// what happens to native flags here uint8_t before_nat_flags; // 0 or combinaison of native flags define uint8_t need_nat_flags; + unsigned gen_inverted_carry:1; + unsigned normal_carry:1; + unsigned normal_carry_before:1; + unsigned invert_carry:1; // this opcode force an inverted carry flagcache_t f_exit; // flags status at end of instruction neoncache_t n; // neoncache at end of instruction (but before poping) flagcache_t f_entry; // flags status before the instruction begin @@ -172,11 +179,11 @@ void CreateJmpNext(void* addr, void* next); #define GO_TRACE(A, B, s0) \ GETIP(addr); \ MOVx_REG(x1, xRIP); \ - MRS_nzvc(s0); \ + MRS_nzcv(s0); \ STORE_XEMU_CALL(xRIP); \ MOV32w(x2, B); \ CALL_(A, -1, s0); \ - MSR_nzvc(s0); \ + MSR_nzcv(s0); \ LOAD_XEMU_CALL(xRIP) #endif //__DYNAREC_ARM_PRIVATE_H_ |