From 9ef3e080fade70147de0569de6eb23b89f827142 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Sat, 15 Feb 2025 13:54:50 +0100 Subject: [ARM64_DYNAREC] More work on UD flags --- src/dynarec/arm64/dynarec_arm64_00.c | 18 +- src/dynarec/arm64/dynarec_arm64_0f.c | 8 +- src/dynarec/arm64/dynarec_arm64_66.c | 49 ++-- src/dynarec/arm64/dynarec_arm64_emit_shift.c | 376 ++++++++++++--------------- 4 files changed, 212 insertions(+), 239 deletions(-) (limited to 'src') diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c index 1bb39f19..8ccf25ba 100644 --- a/src/dynarec/arm64/dynarec_arm64_00.c +++ b/src/dynarec/arm64/dynarec_arm64_00.c @@ -2784,7 +2784,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SETFLAGS(X_OF|X_CF, SF_SUBSET); if(BOX64DRENV(dynarec_safeflags)>1) MAYSETFLAGS(); - SET_DFNONE(); UFLAG_IF { ANDw_mask(x2, xRCX, 0, 0b00100); //mask=0x00000001f CBZw_NEXT(x2); @@ -2793,10 +2792,15 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOV32w(x4, 8); SUBx_REG(x2, x4, x2); GETEB(x1, 0); + IFX2(X_OF, && !BOX64ENV(cputype)) { + LSRw(x4, ed, 6); + EORw_REG_LSR(x4, x4, x4, 1); + BFIw(xFlags, x4, F_OF, 1); + } ORRw_REG_LSL(ed, ed, ed, 8); LSRw_REG(ed, ed, x2); EBBACK; - IFX(X_OF) { + IFX2(X_OF, && BOX64ENV(cputype)) { EORxw_REG_LSR(x3, ed, ed, 7); BFIw(xFlags, x3, F_OF, 1); } @@ -2809,7 +2813,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SETFLAGS(X_OF|X_CF, SF_SUBSET); if(BOX64DRENV(dynarec_safeflags)>1) MAYSETFLAGS(); - SET_DFNONE(); UFLAG_IF { ANDw_mask(x2, xRCX, 0, 0b00100); //mask=0x00000001f CBZw_NEXT(x2); @@ -2838,7 +2841,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SETFLAGS(X_OF|X_CF, SF_SUBSET); if(BOX64DRENV(dynarec_safeflags)>1) MAYSETFLAGS(); - SET_DFNONE(); ANDw_mask(x2, xRCX, 0, 0b00100); //mask=0x00000001f // get CL % 9 MOV32w(x3, 0x1c72); // 0x10000 / 9 + 1 (this is precise enough in the 0..31 range) @@ -2848,6 +2850,11 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MSUBw(x2, x3, x4, x2); // CL mod 9 CBZw_NEXT(x2); GETEB(x1, 0); + IFX2(X_OF, && !BOX64ENV(cputype)) { + LSRw(x5, ed, 6); + EORw_REG_LSR(x5, x5, x5, 1); + BFIw(xFlags, x5, F_OF, 1); + } BFIw(ed, xFlags, 8, 1); // insert CF ORRw_REG_LSL(ed, ed, ed, 9); // insert rest of ed SUBw_REG(x2, x4, x2); @@ -2857,7 +2864,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } LSRw_REG(ed, ed, x2); EBBACK; - IFX(X_OF) { + IFX2(X_OF, && BOX64ENV(cputype)) { EORw_REG_LSR(x2, x5, ed, 7); BFIw(xFlags, x2, F_OF, 1); } @@ -2871,7 +2878,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SETFLAGS(X_OF|X_CF, SF_SUBSET); if(BOX64DRENV(dynarec_safeflags)>1) MAYSETFLAGS(); - SET_DFNONE(); ANDw_mask(x2, xRCX, 0, 0b00100); //mask=0x00000001f // get CL % 9 MOV32w(x3, 0x1c72); // 0x10000 / 9 + 1 diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index ca43093f..a157a9a4 100644 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -2322,7 +2322,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 0xBC: INST_NAME("BSF Gd, Ed"); - SETFLAGS(X_ZF, SF_SET_DF); + SETFLAGS(X_ZF, SF_SET); SET_DFNONE(); nextop = F8; GETED(0); @@ -2345,7 +2345,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 0xBD: INST_NAME("BSR Gd, Ed"); - SETFLAGS(X_ZF, SF_SET_DF); + SETFLAGS(X_ZF, SF_SET); SET_DFNONE(); nextop = F8; GETED(0); @@ -2550,7 +2550,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(MODREG) switch((nextop>>3)&7) { case 6: INST_NAME("RDRAND Ed"); - SETFLAGS(X_ALL, SF_SET_DF); + SETFLAGS(X_ALL, SF_SET); SET_DFNONE(); GETED(0); IFX(X_OF|X_SF|X_ZF|X_PF|X_AF) { @@ -2610,7 +2610,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 6: INST_NAME("RDRAND Ed"); - SETFLAGS(X_ALL, SF_SET_DF); + SETFLAGS(X_ALL, SF_SET); SET_DFNONE(); IFX(X_OF|X_SF|X_ZF|X_PF|X_AF) { MOV32w(x1, (1<>3)&7) { case 0: INST_NAME("ROL Ew, Ib"); - u8 = geted_ib(dyn, addr, ninst, nextop) & 15; + u8 = geted_ib(dyn, addr, ninst, nextop) & 0x1f; if (u8) { SETFLAGS(X_OF|X_CF, SF_SUBSET); // removed PENDING on purpose GETEW(x1, 1); @@ -1042,7 +1042,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 1: INST_NAME("ROR Ew, Ib"); - if (geted_ib(dyn, addr, ninst, nextop) & 15) { + if (geted_ib(dyn, addr, ninst, nextop) & 0x1f) { SETFLAGS(X_OF|X_CF, SF_SUBSET); // removed PENDING on purpose GETEW(x1, 1); u8 = (F8)&0x1f; @@ -1055,7 +1055,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 2: INST_NAME("RCL Ew, Ib"); - if (geted_ib(dyn, addr, ninst, nextop) & 31) { + if (geted_ib(dyn, addr, ninst, nextop) & 0x1f) { READFLAGS(X_CF); SETFLAGS(X_OF|X_CF, SF_SUBSET); // removed PENDING on purpose GETEW(x1, 1); @@ -1069,7 +1069,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 3: INST_NAME("RCR Ew, Ib"); - if (geted_ib(dyn, addr, ninst, nextop) & 31) { + if (geted_ib(dyn, addr, ninst, nextop) & 0x1f) { READFLAGS(X_CF); SETFLAGS(X_OF|X_CF, SF_SUBSET); // removed PENDING on purpose GETEW(x1, 1); @@ -1222,17 +1222,20 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOV32w(x4, 16); SUBx_REG(x2, x4, x2); GETEW(x1, 0); + IFX2(X_OF, && !BOX64ENV(cputype)) { + LSRw(x4, ed, 14); + EORw_REG_LSR(x4, x4, x4, 1); + BFIw(xFlags, x4, F_OF, 1); + } ORRw_REG_LSL(ed, ed, ed, 16); LSRw_REG(ed, ed, x2); EWBACK; - UFLAG_IF { // calculate flags directly - SUBw_U12(x2, x2, 15); - CBNZw_MARK(x2); - EORw_REG_LSR(x3, ed, ed, 15); - BFIw(xFlags, x3, F_OF, 1); - MARK; + IFX2(X_OF, && BOX64ENV(cputype)) { + EORxw_REG_LSR(x3, ed, ed, 15); + BFIw(xFlags, x3, F_OF, 1); + } + IFX(X_CF) { BFIw(xFlags, ed, F_CF, 1); - UFLAG_DF(x2, d_none); } break; case 1: @@ -1246,18 +1249,20 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } ANDw_mask(x2, xRCX, 0, 0b00011); //mask=0x00000000f GETEW(x1, 0); + IFX2(X_OF, && !BOX64ENV(cputype)) { + EORw_REG_LSR(x4, ed, ed, 15); + BFIw(xFlags, x4, F_OF, 1); + } ORRw_REG_LSL(ed, ed, ed, 16); LSRw_REG(ed, ed, x2); EWBACK; - UFLAG_IF { // calculate flags directly - SUBw_U12(x2, x2, 1); - CBNZw_MARK(x2); - LSRxw(x2, ed, 14); // x2 = d>>14 - EORw_REG_LSR(x2, x2, x2, 1); // x2 = ((d>>14) ^ ((d>>14)>>1)) - BFIw(xFlags, x2, F_OF, 1); - MARK; + IFX2(X_OF, && BOX64ENV(cputype)) { + LSRxw(x2, ed, 14); // x2 = d>>6 + EORw_REG_LSR(x2, x2, x2, 1); // x2 = ((d>>14) ^ ((d>>14)>>1)) + BFIw(xFlags, x2, F_OF, 1); + } + IFX(X_CF) { BFXILw(xFlags, ed, 15, 1); - UFLAG_DF(x2, d_none); } break; case 2: @@ -1447,7 +1452,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin BFXILx(xRDX, x1, 16, 16); SET_DFNONE(); IFX(X_CF|X_OF) { - ASRw(x2, x1, 16); + ASRw(x2, x1, 15); CMPSw_REG_ASR(x2, x1, 31); CSETw(x3, cNE); IFX(X_CF) { diff --git a/src/dynarec/arm64/dynarec_arm64_emit_shift.c b/src/dynarec/arm64/dynarec_arm64_emit_shift.c index 6637ca51..7a290e54 100644 --- a/src/dynarec/arm64/dynarec_arm64_emit_shift.c +++ b/src/dynarec/arm64/dynarec_arm64_emit_shift.c @@ -59,19 +59,17 @@ void emit_shl32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 IFX(X_PEND) { STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); } - if(BOX64ENV(cputype)) - IFX(X_OF) { - LSRxw(s4, s1, (rex.w)?63:31); - EORxw_REG(s3, s4, xFlags); // CF is set if OF is asked - BFIw(xFlags, s3, F_OF, 1); - } + IFX2(X_OF, && BOX64ENV(cputype)) { + EORxw_REG_LSR(s3, xFlags, s1, rex.w?63:31); // CF is set if OF is asked + BFIw(xFlags, s3, F_OF, 1); + } int need_tst = 0; IFX(X_ZF) need_tst = 1; IFXNATIVE(X_SF, NF_SF) need_tst = 1; if(need_tst) TSTxw_REG(s1, s1); IFX(X_SF) { IFNATIVE(NF_SF) {} else { - IFX2(X_OF, && BOX64ENV(cputype)) {} else {LSRxw(s4, s1, (rex.w)?63:31);} + LSRxw(s4, s1, (rex.w)?63:31); BFIw(xFlags, s4, F_SF, 1); } } @@ -427,69 +425,38 @@ void emit_shl8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s } else { SET_DFNONE(); } - if(c<8) { - if(BOX64ENV(cputype)) { - IFX(X_CF|X_OF) { - BFXILw(xFlags, s1, 8-c, 1); - } - } else { - IFX(X_OF) { - LSRw(s4, s1, 6); - EORw_REG_LSR(s4, s4, s4, 1); - BFIw(xFlags, s4, F_OF, 1); - } - IFX(X_CF) { - BFXILw(xFlags, s1, 8-c, 1); - } - } - LSLw(s1, s1, c); - - IFX(X_PEND) { - STRB_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - COMP_ZFSF(s1, 8) - if(BOX64ENV(cputype)) - IFX(X_OF) { - IFX2(X_SF, && !arm64_flagm) {} else {LSRw(s3, s1, 7);} //use COMP_ZFSF operation - EORw_REG(s4, s3, xFlags); // CF is set if OF is asked - BFIw(xFlags, s4, F_OF, 1); - } - IFX (X_AF) { - BFCw(xFlags, F_AF, 1); - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s4); + if(BOX64ENV(cputype)) { + IFX(X_CF|X_OF) { + BFXILw(xFlags, s1, 8-(c&7), 1); } } else { - IFXNATIVE(X_ZF|X_SF|X_CF, NF_EQ|NF_SF|NF_CF) { - SUBSw_REG(s1, s1, s1); - } else { - MOVw_REG(s1, xZR); - } - IFX(X_PEND) { - STRB_U12(s1, xEmu, offsetof(x64emu_t, res)); + IFX(X_OF) { + LSRw(s4, s1, 6); + EORw_REG_LSR(s4, s4, s4, 1); + BFIw(xFlags, s4, F_OF, 1); } + } + LSLw(s1, s1, c); + if(!BOX64ENV(cputype)) IFX(X_CF) { - // sub X, X will generate a carry flags - IFNATIVE(NF_CF) {GEN_INVERTED_CARRY();} else {BFCw(xFlags, F_CF, 1);} + BFXILw(xFlags, s1, 8, 1); } + + IFX(X_PEND) { + STRB_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + COMP_ZFSF(s1, 8) + if(BOX64ENV(cputype)) IFX(X_OF) { - IFNATIVE(NF_VF) {} else BFCw(xFlags, F_OF, 1); - } - IFX(X_SF) { - IFNATIVE(NF_SF) {} else BFCw(xFlags, F_SF, 1); - } - IFX (X_AF) { - BFCw(xFlags, F_AF, 1); - } - IFX(X_ZF) { - IFNATIVE(NF_EQ) {} else { - ORRw_mask(xFlags, xFlags, 26, 0); //1<7) { - // the 0xff area will be 0, so PF is known - ORRw_mask(xFlags, xFlags, 30, 0); //1<7) { + // the 0xff area will be 0, so PF is known ORRw_mask(xFlags, xFlags, 30, 0); //1<1)) { - EORx_REG_LSR(s3, s4, s1, rex.w?63:31); // OF is set if sign changed - BFIw(xFlags, s3, F_OF, 1); - } + IFX2(X_OF, && BOX64ENV(cputype)) { + EORx_REG_LSR(s3, s4, s1, rex.w?63:31); // OF is set if sign changed + BFIw(xFlags, s3, F_OF, 1); } int need_tst = 0; IFX(X_ZF) need_tst = 1; @@ -1334,7 +1275,11 @@ void emit_shrd32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s LSRxw_REG(s3, s1, s3); BFIw(xFlags, s3, F_CF, 1); } - IFX(X_OF) { + IFX2(X_OF, && !BOX64ENV(cputype)) { + EORx_REG_LSR(s3, s2, s1, rex.w?63:31); // OF is set if sign changed + BFIw(xFlags, s3, F_OF, 1); + } + IFX2(X_OF, && BOX64ENV(cputype)) { LSRxw(s4, s1, rex.w?63:31); } if(s1==s2) { @@ -1349,7 +1294,7 @@ void emit_shrd32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX(X_PEND) { STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); } - IFX(X_OF) { + IFX2(X_OF, && BOX64ENV(cputype)) { EORxw_REG_LSR(s3, s4, s1, rex.w?63:31); // OF is set if sign changed BFIw(xFlags, s3, F_OF, 1); } @@ -1390,9 +1335,14 @@ void emit_shld32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s LSRxw_REG(s4, s1, s3); BFIxw(xFlags, s4, F_CF, 1); } - IFX(X_OF) { + IFX2(X_OF, && BOX64ENV(cputype)) { LSRxw(s4, s1, rex.w?63:31); } + IFX2(X_OF, && !BOX64ENV(cputype)) { + LSRxw(s4, s1, rex.w?62:30); + EORx_REG_LSR(s4, s4, s4, 1); // OF is set if sign changed + BFIw(xFlags, s4, F_OF, 1); + } if(s1==s2) { RORxw_REG(s1, s1, s3); } else { @@ -1404,7 +1354,7 @@ void emit_shld32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX(X_PEND) { STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); } - IFX(X_OF) { + IFX2(X_OF, && BOX64ENV(cputype)) { EORx_REG_LSR(s3, s4, s1, rex.w?63:31); // OF is set if sign changed BFIw(xFlags, s3, F_OF, 1); } @@ -1442,28 +1392,26 @@ void emit_shrd16c(dynarec_arm_t* dyn, int ninst, int s1, int s2, uint32_t c, int } else { SET_DFNONE(); } - if(!c) { - IFX(X_PEND) { - STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - return; - } ORRw_REG_LSL(s1, s1, s2, 16); // create concat first IFX(X_CF) { BFXILw(xFlags, s1, c-1, 1); // set CF } - IFX(X_OF) { + IFX2(X_OF, && BOX64ENV(cputype)) { LSRw(s4, s1, 15); } + IFX2(X_OF, && !BOX64ENV(cputype)) { + EORw_REG_LSR(s4, s2, s1, 15); + BFIw(xFlags, s4, F_OF, 1); + } RORw(s1, s1, c); IFX(X_PEND) { STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); } - COMP_ZFSF(s1, 16) - IFX(X_OF) { + IFX2(X_OF, && BOX64ENV(cputype)) { EORx_REG_LSR(s3, s4, s1, 15); // OF is set if sign changed BFIw(xFlags, s3, F_OF, 1); } + COMP_ZFSF(s1, 16) IFX(X_AF) {BFCw(xFlags, F_AF, 1);} IFX(X_PF) { emit_pf(dyn, ninst, s1, s4); @@ -1487,14 +1435,18 @@ void emit_shrd16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s5, int s3, LSRw_REG(s3, s1, s3); BFIw(xFlags, s3, F_CF, 1); } - IFX(X_OF) { + IFX2(X_OF, && BOX64ENV(cputype)) { LSRw(s4, s1, 15); } + IFX2(X_OF, && !BOX64ENV(cputype)) { + EORw_REG_LSR(s4, s2, s1, 15); + BFIw(xFlags, s4, F_OF, 1); + } RORw_REG(s1, s1, s5); IFX(X_PEND) { STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); } - IFX(X_OF) { + IFX2(X_OF, && BOX64ENV(cputype)) { EORw_REG_LSR(s3, s4, s1, 15); // OF is set if sign changed BFIw(xFlags, s3, F_OF, 1); } @@ -1517,16 +1469,21 @@ void emit_shld16c(dynarec_arm_t* dyn, int ninst, int s1, int s2, uint32_t c, int SET_DFNONE(); } BFIw(s1, s2, 16, 16); // create concat first - IFX2(X_CF, && c) { - if(c<16) + IFX(X_CF) { + if(c<=16) LSRw(s3, s1, 16-c); else LSRw(s3, s2, 32-c); BFIw(xFlags, s3, F_CF, 1); } - IFX(X_OF) { + IFX2(X_OF, && BOX64ENV(cputype)) { LSRw(s4, s1, 15); } + IFX2(X_OF, && !BOX64ENV(cputype)) { + LSRw(s4, s1, 14); + EORw_REG_LSR(s4, s4, s4, 1); // OF is set if sign changed + BFIw(xFlags, s4, F_OF, 1); + } if(c) RORw(s1, s1, 32-c); @@ -1534,7 +1491,7 @@ void emit_shld16c(dynarec_arm_t* dyn, int ninst, int s1, int s2, uint32_t c, int STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); } COMP_ZFSF(s1, 16) - IFX(X_OF) { + IFX2(X_OF, && BOX64ENV(cputype)) { EORw_REG_LSR(s3, s4, s1, 15); // OF is set if sign changed BFIw(xFlags, s3, F_OF, 1); } @@ -1560,9 +1517,14 @@ void emit_shld16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s5, int s3, LSRw_REG(s3, s4, s3); BFIw(xFlags, s3, F_CF, 1); } - IFX(X_OF) { + IFX2(X_OF, && BOX64ENV(cputype)) { LSRw(s4, s1, 15); } + IFX2(X_OF, && !BOX64ENV(cputype)) { + LSRw(s4, s1, 14); + EORw_REG_LSR(s4, s4, s4, 1); // OF is set if sign changed + BFIw(xFlags, s4, F_OF, 1); + } BFIw(s1, s2, 16, 16); // create concat first MOV32w(s3, 32); SUBw_REG(s3, s3, s5); @@ -1571,9 +1533,9 @@ void emit_shld16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s5, int s3, IFX(X_PEND) { STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); } - IFX(X_OF) { + IFX2(X_OF, && BOX64ENV(cputype)) { EORw_REG_LSR(s3, s4, s1, 15); // OF is set if sign changed - BFIw(xFlags, s3, F_OF, 1); + BFIw(xFlags, s3, F_OF, 1); } COMP_ZFSF(s1, 16) IFX(X_AF) {BFCw(xFlags, F_AF, 1);} -- cgit 1.4.1