From fc7d204d4bca211f2462a0534e77f0929f121330 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Sun, 20 Oct 2024 21:47:21 +0200 Subject: [ARM64_DYNAREC] More work around native flags handling --- src/dynarec/arm64/dynarec_arm64_00.c | 11 +-- src/dynarec/arm64/dynarec_arm64_0f.c | 59 ++++++++------- src/dynarec/arm64/dynarec_arm64_64.c | 60 +++++++-------- src/dynarec/arm64/dynarec_arm64_66.c | 38 ++++------ src/dynarec/arm64/dynarec_arm64_660f.c | 43 +++++++---- src/dynarec/arm64/dynarec_arm64_67.c | 8 +- src/dynarec/arm64/dynarec_arm64_67_32.c | 8 +- src/dynarec/arm64/dynarec_arm64_emit_shift.c | 53 +++++++++++++- src/dynarec/arm64/dynarec_arm64_f0.c | 22 +++--- src/dynarec/arm64/dynarec_arm64_helper.h | 105 ++++++++++++++++----------- 10 files changed, 242 insertions(+), 165 deletions(-) (limited to 'src') diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c index 53dec770..5a55e015 100644 --- a/src/dynarec/arm64/dynarec_arm64_00.c +++ b/src/dynarec/arm64/dynarec_arm64_00.c @@ -2753,13 +2753,13 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin UFLAG_IF { UFLAG_DF(x2, d_none); } - MOV64xw(x4, (rex.w?64:32)); - SUBx_REG(x3, x4, x3); GETED(0); UFLAG_IF { if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);} CBZw_NEXT(x3); } + MOV64xw(x4, (rex.w?64:32)); + SUBx_REG(x3, x4, x3); RORxw_REG(ed, ed, x3); WBACK; UFLAG_IF { // calculate flags directly @@ -2884,7 +2884,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 7: INST_NAME("SAR Ed, CL"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING); if(box64_dynarec_safeflags>1) MAYSETFLAGS(); if(rex.w) { @@ -2897,11 +2897,8 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);} CBZw_NEXT(x3); } - UFLAG_OP12(ed, x3); - ASRxw_REG(ed, ed, x3); + emit_sar32(dyn, ninst, rex, ed, x3, x5, x4); WBACK; - UFLAG_RES(ed); - UFLAG_DF(x3, rex.w?d_sar64:d_sar32); break; } break; diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index 79c0c916..3969a0cf 100644 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -69,8 +69,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin switch(nextop) { case 0xD0: INST_NAME("XGETBV"); - CMPSw_REG(xRCX, xZR); - B_MARK(cEQ); + CBZw_MARK(xRCX); UDF(0); MARK; MOV32w(xRAX, 0b111); @@ -154,8 +153,8 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin CALL_S(x64Syscall, -1); LOAD_XEMU_CALL(xRIP); TABLE64(x3, addr); // expected return address - CMPSx_REG(xRIP, x3); - B_MARK(cNE); + SUBx_REG(x3, x3, xRIP); + CBNZx_MARK(x3); LDRw_U12(w1, xEmu, offsetof(x64emu_t, quit)); CBZw_NEXT(w1); MARK; @@ -1679,11 +1678,11 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin GETED(0); if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);} if(rex.w) { - ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + ANDx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f } else { - ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f + ANDw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f } - B_NEXT(cEQ); + CBZw_NEXT(x3); emit_shld32(dyn, ninst, rex, ed, gd, x3, x5, x4); WBACK; break; @@ -1760,11 +1759,11 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin GETED(0); if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);} if(rex.w) { - ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + ANDx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f } else { - ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f + ANDw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f } - B_NEXT(cEQ); + CBZw_NEXT(x3); emit_shrd32(dyn, ninst, rex, ed, gd, x3, x5, x4); WBACK; break; @@ -1936,20 +1935,16 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin wback = 0; UFLAG_IF {emit_cmp32(dyn, ninst, rex, xRAX, ed, x3, x4, x5);} MOVxw_REG(x1, ed); // save value - CMPSxw_REG(xRAX, x1); - if(rex.w) { - CSELxw(ed, gd, ed, cEQ); - } else { - B_MARK2(cNE); - MOVw_REG(ed, gd); - MARK2; - } + SUBxw_REG(x4, xRAX, x1); + CBNZxw_MARK2(x4); + MOVxw_REG(ed, gd); + MARK2; } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, 0); LDxw(x1, wback, fixedaddress); UFLAG_IF {emit_cmp32(dyn, ninst, rex, xRAX, x1, x3, x4, x5);} - CMPSxw_REG(xRAX, x1); - B_MARK(cNE); + SUBxw_REG(x4, xRAX, x1); + CBNZxw_MARK(x4); // EAX == Ed STxw(gd, wback, fixedaddress); MARK; @@ -2167,8 +2162,12 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin nextop = F8; GETED(0); GETGD; - TSTxw_REG(ed, ed); - B_MARK(cEQ); + IFX(X_ZF) { + TSTxw_REG(ed, ed); + B_MARK(cEQ); + } else { + CBZxw_MARK(ed); + } RBITxw(x1, ed); // reverse CLZxw(gd, x1); // x2 gets leading 0 == BSF MARK; @@ -2186,8 +2185,12 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin nextop = F8; GETED(0); GETGD; - TSTxw_REG(ed, ed); - B_MARK(cEQ); + IFX(X_ZF) { + TSTxw_REG(ed, ed); + B_MARK(cEQ); + } else { + CBZxw_MARK(ed); + } CLZxw(gd, ed); // x2 gets leading 0 SUBxw_U12(gd, gd, rex.w?63:31); NEGxw_REG(gd, gd); // complement @@ -2411,18 +2414,20 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin B_MARK(cNE); // EAX!=ED[0] || EDX!=Ed[1] STPxw_S7_offset(xRBX, xRCX, wback, 0); UFLAG_IF { - MOV32w(x1, 1); + IFNATIVE(NF_EQ) {} else {MOV32w(x1, 1);} } B_MARK3_nocond; MARK; MOVxw_REG(xRAX, x2); MOVxw_REG(xRDX, x3); UFLAG_IF { - MOV32w(x1, 0); + IFNATIVE(NF_EQ) {} else {MOV32w(x1, 0);} } MARK3; UFLAG_IF { - BFIw(xFlags, x1, F_ZF, 1); + IFNATIVE(NF_EQ) {} else { + BFIw(xFlags, x1, F_ZF, 1); + } } SMWRITE(); break; diff --git a/src/dynarec/arm64/dynarec_arm64_64.c b/src/dynarec/arm64/dynarec_arm64_64.c index 956ee565..de3a4041 100644 --- a/src/dynarec/arm64/dynarec_arm64_64.c +++ b/src/dynarec/arm64/dynarec_arm64_64.c @@ -1015,22 +1015,21 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("ROL Ed, CL"); SETFLAGS(X_OF|X_CF, SF_SUBSET); if(rex.w) { - ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + ANDx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f } else { - ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f + ANDw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f } MOV64xw(x4, (rex.w?64:32)); SUBx_REG(x3, x4, x3); GETEDO(x6, 0); if(!rex.w && MODREG) {MOVw_REG(ed, ed);} - B_NEXT(cEQ); + CBZw_NEXT(x3); RORxw_REG(ed, ed, x3); WBACKO(x6); UFLAG_IF { // calculate flags directly - CMPSw_U12(x3, rex.w?63:31); - B_MARK(cNE); - LSRxw(x4, ed, rex.w?63:31); - ADDxw_REG(x4, x4, ed); + SUBw_U12(x4, x3, rex.w?63:31); + CBNZw_MARK(x4); + EORw_REG_LSR(x4, ed, ed, rex.w?63:31); BFIw(xFlags, x4, F_OF, 1); MARK; BFIw(xFlags, ed, F_CF, 1); @@ -1041,18 +1040,18 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("ROR Ed, CL"); SETFLAGS(X_OF|X_CF, SF_SUBSET); if(rex.w) { - ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + ANDx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f } else { - ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f + ANDw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f } GETEDO(x6, 0); if(!rex.w && MODREG) {MOVw_REG(ed, ed);} - B_NEXT(cEQ); + CBZw_NEXT(x3); RORxw_REG(ed, ed, x3); WBACKO(x6); UFLAG_IF { // calculate flags directly - CMPSw_U12(x3, 1); - B_MARK(cNE); + SUBw_U12(x2, x3, 1); + CBNZw_MARK(x2); LSRxw(x2, ed, rex.w?62:30); // x2 = d>>30 EORw_REG_LSR(x2, x2, x2, 1); // x2 = ((d>>30) ^ ((d>>30)>>1)) BFIw(xFlags, x2, F_OF, 1); @@ -1068,14 +1067,14 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin READFLAGS(X_CF); SETFLAGS(X_OF|X_CF, SF_SET_DF); if(rex.w) { - ANDSx_mask(x2, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + ANDx_mask(x2, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f } else { - ANDSw_mask(x2, xRCX, 0, 0b00100); //mask=0x00000001f + ANDw_mask(x2, xRCX, 0, 0b00100); //mask=0x00000001f } GETEDO(x6, 0); if(wback) {ADDx_REG(x6, x6, wback); wback=x6;} if(!rex.w && MODREG) {MOVw_REG(ed, ed);} - B_NEXT(cEQ); + CBZw_NEXT(x2); CALL_(rex.w?((void*)rcl64):((void*)rcl32), ed, x6); WBACK; break; @@ -1085,14 +1084,14 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin READFLAGS(X_CF); SETFLAGS(X_OF|X_CF, SF_SET_DF); if(rex.w) { - ANDSx_mask(x2, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + ANDx_mask(x2, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f } else { - ANDSw_mask(x2, xRCX, 0, 0b00100); //mask=0x00000001f + ANDw_mask(x2, xRCX, 0, 0b00100); //mask=0x00000001f } GETEDO(x6, 0); if(wback) {ADDx_REG(x6, x6, wback); wback=x6;} if(!rex.w && MODREG) {MOVw_REG(ed, ed);} - B_NEXT(cEQ); + CBZw_NEXT(x2); CALL_(rex.w?((void*)rcr64):((void*)rcr32), ed, x6); WBACK; break; @@ -1101,13 +1100,13 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("SHL Ed, CL"); SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined if(rex.w) { - ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + ANDx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f } else { - ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f + ANDw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f } GETEDO(x6, 0); if(!rex.w && MODREG) {MOVw_REG(ed, ed);} - B_NEXT(cEQ); + CBZw_NEXT(x3); emit_shl32(dyn, ninst, rex, ed, x3, x5, x4); WBACKO(x6); break; @@ -1115,32 +1114,29 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("SHR Ed, CL"); SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined if(rex.w) { - ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + ANDx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f } else { - ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f + ANDw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f } GETEDO(x6, 0); if(!rex.w && MODREG) {MOVw_REG(ed, ed);} - B_NEXT(cEQ); + CBZw_NEXT(x3); emit_shr32(dyn, ninst, rex, ed, x3, x5, x4); WBACKO(x6); break; case 7: INST_NAME("SAR Ed, CL"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_SET_PENDING); if(rex.w) { - ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + ANDx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f } else { - ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f + ANDw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f } GETEDO(x6, 0); if(!rex.w && MODREG) {MOVw_REG(ed, ed);} - B_NEXT(cEQ); - UFLAG_OP12(ed, x3); - ASRxw_REG(ed, ed, x3); + CBZw_NEXT(x3); + emit_sar32(dyn, ninst, rex, ed, x3, x5, x4); WBACKO(x6); - UFLAG_RES(ed); - UFLAG_DF(x3, rex.w?d_sar64:d_sar32); break; } break; diff --git a/src/dynarec/arm64/dynarec_arm64_66.c b/src/dynarec/arm64/dynarec_arm64_66.c index aa1333e9..8a270619 100644 --- a/src/dynarec/arm64/dynarec_arm64_66.c +++ b/src/dynarec/arm64/dynarec_arm64_66.c @@ -581,8 +581,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin GETGD; addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); if(!ALIGNED_ATOMICH) { - TSTx_mask(ed, 1, 0, 0); // mask=1 - B_MARK(cNE); + TBNZ_MARK(ed, 0); } if(arm64_atomics) { SWPALH(gd, x1, ed); @@ -1161,8 +1160,8 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(box64_dynarec_safeflags>1) MAYSETFLAGS(); UFLAG_IF { - TSTw_mask(xRCX, 0, 0b00100); //mask=0x00000001f - B_NEXT(cEQ); + ANDw_mask(x2, xRCX, 0, 0b00100); //mask=0x00000001f + CBZw_NEXT(x2); } ANDw_mask(x2, xRCX, 0, 0b00011); //mask=0x00000000f MOV32w(x4, 16); @@ -1172,9 +1171,9 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin LSRw_REG(ed, ed, x2); EWBACK; UFLAG_IF { // calculate flags directly - CMPSw_U12(x2, 15); - B_MARK(cNE); - ADDxw_REG_LSR(x3, ed, ed, 15); + SUBw_U12(x2, x2, 15); + CBNZw_MARK(x2); + EORw_REG_LSR(x3, ed, ed, 15); BFIw(xFlags, x3, F_OF, 1); MARK; BFIw(xFlags, ed, F_CF, 1); @@ -1187,8 +1186,8 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(box64_dynarec_safeflags>1) MAYSETFLAGS(); UFLAG_IF { - TSTw_mask(xRCX, 0, 0b00100); //mask=0x00000001f - B_NEXT(cEQ); + ANDw_mask(x2, xRCX, 0, 0b00100); //mask=0x00000001f + CBZw_NEXT(x2); } ANDw_mask(x2, xRCX, 0, 0b00011); //mask=0x00000000f GETEW(x1, 0); @@ -1196,8 +1195,8 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin LSRw_REG(ed, ed, x2); EWBACK; UFLAG_IF { // calculate flags directly - CMPSw_U12(x2, 1); - B_MARK(cNE); + SUBw_U12(x2, x2, 1); + CBNZw_MARK(x2); LSRxw(x2, ed, 14); // x2 = d>>14 EORw_REG_LSR(x2, x2, x2, 1); // x2 = ((d>>14) ^ ((d>>14)>>1)) BFIw(xFlags, x2, F_OF, 1); @@ -1236,11 +1235,9 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined if(box64_dynarec_safeflags>1) MAYSETFLAGS(); + ANDw_mask(x2, xRCX, 0, 0b00100); //mask=0x00000001f UFLAG_IF { - ANDSw_mask(x2, xRCX, 0, 0b00100); //mask=0x00000001f - B_NEXT(cEQ); - } else { - ANDw_mask(x2, xRCX, 0, 0b00100); //mask=0x00000001f + CBZw_NEXT(x2); } GETEW(x1, 0); emit_shl16(dyn, ninst, x1, x2, x5, x4); @@ -1251,11 +1248,9 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined if(box64_dynarec_safeflags>1) MAYSETFLAGS(); + ANDw_mask(x2, xRCX, 0, 0b00100); //mask=0x00000001f UFLAG_IF { - ANDSw_mask(x2, xRCX, 0, 0b00100); //mask=0x00000001f - B_NEXT(cEQ); - } else { - ANDw_mask(x2, xRCX, 0, 0b00100); //mask=0x00000001f + CBZw_NEXT(x2); } GETEW(x1, 0); emit_shr16(dyn, ninst, x1, x2, x5, x4); @@ -1266,11 +1261,10 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined if(box64_dynarec_safeflags>1) MAYSETFLAGS(); + ANDw_mask(x2, xRCX, 0, 0b00100); //mask=0x00000001f UFLAG_IF { - ANDSw_mask(x2, xRCX, 0, 0b00100); //mask=0x00000001f - B_NEXT(cEQ); + CBZw_NEXT(x2); } else { - ANDw_mask(x2, xRCX, 0, 0b00100); //mask=0x00000001f } GETSEW(x1, 0); emit_sar16(dyn, ninst, x1, x2, x5, x4); diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index 8a385ca4..2ddb2212 100644 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -2344,11 +2344,9 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n MAYSETFLAGS(); GETGW(x2); GETEW(x1, 0); + ANDw_mask(x4, xRCX, 0, 0b00100); //mask=0x00000001f UFLAG_IF { - ANDSw_mask(x4, xRCX, 0, 0b00100); //mask=0x00000001f - B_NEXT(cEQ); - } else { - ANDw_mask(x4, xRCX, 0, 0b00100); //mask=0x00000001f + CBZw_NEXT(x4); } emit_shld16(dyn, ninst, ed, gd, x4, x5, x6); EWBACK; @@ -2401,11 +2399,10 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n MAYSETFLAGS(); GETGW(x2); GETEW(x1, 0); + ANDw_mask(x4, xRCX, 0, 0b00100); //mask=0x00000001f UFLAG_IF { - ANDSw_mask(x4, xRCX, 0, 0b00100); //mask=0x00000001f - B_NEXT(cEQ); + CBZw_NEXT(x4); } else { - ANDw_mask(x4, xRCX, 0, 0b00100); //mask=0x00000001f } emit_shrd16(dyn, ninst, ed, gd, x4, x5, x6); EWBACK; @@ -2586,14 +2583,22 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n nextop = F8; GETGD; GETEW(x1, 0); // Get EW - TSTw_REG(x1, x1); - B_MARK(cEQ); + IFX(X_ZF) { + TSTw_REG(x1, x1); + B_MARK(cEQ); + } else { + CBZw_MARK(x1); + } RBITw(x1, x1); // reverse CLZw(x2, x1); // x2 gets leading 0 == BSF BFIx(gd, x2, 0, 16); MARK; - CSETw(x1, cEQ); //ZF not set - BFIw(xFlags, x1, F_ZF, 1); + IFX(X_ZF) { + IFNATIVE(NF_EQ) {} else { + CSETw(x1, cEQ); //ZF not set + BFIw(xFlags, x1, F_ZF, 1); + } + } break; case 0xBD: INST_NAME("BSR Gw,Ew"); @@ -2602,16 +2607,24 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n nextop = F8; GETGD; GETEW(x1, 0); // Get EW - TSTw_REG(x1, x1); // Don't use CBZ here, as the flag is reused later - B_MARK(cEQ); + IFX(X_ZF) { + TSTw_REG(x1, x1); // Don't use CBZ here, as the flag is reused later + B_MARK(cEQ); + } else { + CBZw_MARK(x1); + } LSLw(x1, x1, 16); // put bits on top CLZw(x2, x1); // x2 gets leading 0 SUBw_U12(x2, x2, 15); NEGw_REG(x2, x2); // complement BFIx(gd, x2, 0, 16); MARK; - CSETw(x1, cEQ); //ZF not set - BFIw(xFlags, x1, F_ZF, 1); + IFX(X_ZF) { + IFNATIVE(NF_EQ) {} else { + CSETw(x1, cEQ); //ZF not set + BFIw(xFlags, x1, F_ZF, 1); + } + } break; case 0xBE: INST_NAME("MOVSX Gw, Eb"); diff --git a/src/dynarec/arm64/dynarec_arm64_67.c b/src/dynarec/arm64/dynarec_arm64_67.c index 111bcf8e..94202ba1 100644 --- a/src/dynarec/arm64/dynarec_arm64_67.c +++ b/src/dynarec/arm64/dynarec_arm64_67.c @@ -1321,9 +1321,9 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin READFLAGS(X_ZF); i8 = F8S; MOVw_REG(x1, xRCX); - SUBSw_U12(x1, x1, 1); + SUBw_U12(x1, x1, 1); BFIx(xRCX, x1, 0, 32); - B_NEXT(cEQ); // ECX is 0, no LOOP + CBZw_NEXT(x1); // ECX is 0, no LOOP TSTw_mask(xFlags, 0b011010, 0); //mask=0x40 GO(cNE, cEQ); break; @@ -1332,9 +1332,9 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin READFLAGS(X_ZF); i8 = F8S; MOVw_REG(x1, xRCX); - SUBSw_U12(x1, x1, 1); + SUBw_U12(x1, x1, 1); BFIx(xRCX, x1, 0, 32); - B_NEXT(cEQ); // ECX is 0, no LOOP + CBZw_NEXT(x1); // ECX is 0, no LOOP TSTw_mask(xFlags, 0b011010, 0); //mask=0x40 GO(cEQ, cNE); break; diff --git a/src/dynarec/arm64/dynarec_arm64_67_32.c b/src/dynarec/arm64/dynarec_arm64_67_32.c index 691c470c..61556716 100644 --- a/src/dynarec/arm64/dynarec_arm64_67_32.c +++ b/src/dynarec/arm64/dynarec_arm64_67_32.c @@ -92,9 +92,9 @@ uintptr_t dynarec64_67_32(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int READFLAGS(X_ZF); i8 = F8S; UXTHw(x1, xRCX); - SUBSw_U12(x1, x1, 1); + SUBw_U12(x1, x1, 1); BFIx(xRCX, x1, 0, 16); - B_NEXT(cEQ); // ECX is 0, no LOOP + CBZw_NEXT(x1); // ECX is 0, no LOOP TSTw_mask(xFlags, 0b011010, 0); //mask=0x40 GO(cNE, cEQ); break; @@ -103,9 +103,9 @@ uintptr_t dynarec64_67_32(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int READFLAGS(X_ZF); i8 = F8S; UXTHw(x1, xRCX); - SUBSw_U12(x1, x1, 1); + SUBw_U12(x1, x1, 1); BFIx(xRCX, x1, 0, 16); - B_NEXT(cEQ); // ECX is 0, no LOOP + CBZw_NEXT(x1); // ECX is 0, no LOOP TSTw_mask(xFlags, 0b011010, 0); //mask=0x40 GO(cEQ, cNE); break; diff --git a/src/dynarec/arm64/dynarec_arm64_emit_shift.c b/src/dynarec/arm64/dynarec_arm64_emit_shift.c index c87a43ab..c2d74576 100644 --- a/src/dynarec/arm64/dynarec_arm64_emit_shift.c +++ b/src/dynarec/arm64/dynarec_arm64_emit_shift.c @@ -266,6 +266,57 @@ void emit_shr32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, i } } +// emit SAR32 instruction, from s1 , s2, store result in s1 using s3 and s4 as scratch, s2 can be same as s3 +void emit_sar32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4) +{ + MAYUSE(s2); + int64_t j64; + MAYUSE(j64); + + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); + STRxw_U12(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, rex.w?d_sar64:d_sar32); + } else IFX(X_ALL) { + SET_DFNONE(s4); + } + IFX(X_CF) { + SUBxw_U12(s3, s2, 1); + ASRxw_REG(s3, s1, s3); + BFIw(xFlags, s3, 0, 1); + } + IFX(X_OF) { + BFCw(xFlags, F_OF, 1); + } + ASRxw_REG(s1, s1, s2); + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + int need_tst = 0; + IFX(X_ZF) need_tst = 1; + IFXNATIVE(X_SF, NF_SF) need_tst = 1; + if(need_tst) TSTxw_REG(s1, s1); + IFX(X_ZF) { + IFNATIVE(NF_EQ) {} else { + CSETw(s4, cEQ); + BFIw(xFlags, s4, F_ZF, 1); + } + } + IFX(X_SF) { + IFNATIVE(NF_SF) {} else { + LSRxw(s4, s1, (rex.w)?63:31); + BFIx(xFlags, s4, F_SF, 1); + } + } + if(box64_dynarec_test) + IFX(X_AF) { + BFCw(xFlags, F_AF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s4); + } +} + // emit SAR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch void emit_sar32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4) { @@ -304,7 +355,7 @@ void emit_sar32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, i } } IFX(X_OF) - if(c==1) { + if(c==1 || box64_dynarec_test) { BFCw(xFlags, F_OF, 1); } if(box64_dynarec_test) diff --git a/src/dynarec/arm64/dynarec_arm64_f0.c b/src/dynarec/arm64/dynarec_arm64_f0.c index c6ded5d0..4767f36a 100644 --- a/src/dynarec/arm64/dynarec_arm64_f0.c +++ b/src/dynarec/arm64/dynarec_arm64_f0.c @@ -96,7 +96,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; GETGD; - if((nextop&0xC0)==0xC0) { + if(MODREG) { ed = xRAX+(nextop&7)+(rex.b<<3); emit_add32(dyn, ninst, rex, ed, gd, x3, x4); } else { @@ -271,8 +271,8 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin wb1 = 0; ed = x2; UFLAG_IF {emit_cmp8(dyn, ninst, x6, ed, x3, x4, x5);} - CMPSxw_REG(x6, x2); - B_MARK2(cNE); + SUBxw_REG(x6, x6, x2); + CBNZxw_MARK2(x6); BFIx(wback, x2, wb2, 8); MOVxw_REG(ed, gd); MARK2; @@ -321,8 +321,8 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin wback = 0; UFLAG_IF {emit_cmp32(dyn, ninst, rex, xRAX, ed, x3, x4, x5);} MOVxw_REG(x1, ed); // save value - CMPSxw_REG(xRAX, x1); - B_MARK2(cNE); + SUBxw_REG(x3, xRAX, x1); + CBNZxw_MARK2(x3); MOVxw_REG(ed, gd); MARK2; MOVxw_REG(xRAX, x1); @@ -719,7 +719,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin UFLAG_IF { CMPSxw_REG(x2, xRAX); CCMPxw(x3, xRDX, 0, cEQ); - CSETw(x1, cEQ); + IFNATIVE(NF_EQ) {} else {CSETw(x1, cEQ);} } MOVx_REG(xRAX, x2); MOVx_REG(xRDX, x3); @@ -735,7 +735,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin STLXPxw(x4, xRBX, xRCX, wback); CBNZx_MARKLOCK(x4); UFLAG_IF { - MOV32w(x1, 1); + IFNATIVE(NF_EQ) {} else {MOV32w(x1, 1);} } B_MARK3_nocond; MARK; @@ -744,7 +744,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOVxw_REG(xRAX, x2); MOVxw_REG(xRDX, x3); UFLAG_IF { - MOV32w(x1, 0); + IFNATIVE(NF_EQ) {} else {MOV32w(x1, 0);} } if(!ALIGNED_ATOMICxw) { B_MARK3_nocond; @@ -761,7 +761,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin CBNZx_MARK2(x4); STPxw_S7_offset(xRBX, xRCX, wback, 0); UFLAG_IF { - MOV32w(x1, 1); + IFNATIVE(NF_EQ) {} else {MOV32w(x1, 1);} } B_MARK3_nocond; MARKSEG; @@ -770,13 +770,13 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOVxw_REG(xRAX, x2); MOVxw_REG(xRDX, x3); UFLAG_IF { - MOV32w(x1, 0); + IFNATIVE(NF_EQ) {} else {MOV32w(x1, 0);} } } MARK3; SMDMB(); UFLAG_IF { - BFIw(xFlags, x1, F_ZF, 1); + IFNATIVE(NF_EQ) {} else {BFIw(xFlags, x1, F_ZF, 1);} } break; default: diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index 47c3b806..dc4ad69b 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -752,88 +752,99 @@ #define GETMARKLOCK dyn->insts[ninst].marklock // Branch to MARK if cond (use j64) -#define B_MARK(cond) \ - j64 = GETMARK-(dyn->native_size); \ +#define B_MARK(cond) \ + j64 = GETMARK-(dyn->native_size); \ Bcond(cond, j64) // Branch to MARK unconditionnal (use j64) -#define B_MARK_nocond \ - j64 = GETMARK-(dyn->native_size); \ +#define B_MARK_nocond \ + j64 = GETMARK-(dyn->native_size); \ B(j64) // Branch to MARK if reg is 0 (use j64) -#define CBZxw_MARK(reg) \ - j64 = GETMARK-(dyn->native_size); \ +#define CBZw_MARK(reg) \ + j64 = GETMARK-(dyn->native_size); \ + CBZw(reg, j64) +// Branch to MARK if reg is 0 (use j64) +#define CBZxw_MARK(reg) \ + j64 = GETMARK-(dyn->native_size); \ CBZxw(reg, j64) // Branch to MARK if reg is not 0 (use j64) -#define CBNZx_MARK(reg) \ - j64 = GETMARK-(dyn->native_size); \ +#define CBNZx_MARK(reg) \ + j64 = GETMARK-(dyn->native_size); \ CBNZx(reg, j64) // Branch to MARK if reg is not 0 (use j64) -#define CBNZw_MARK(reg) \ - j64 = GETMARK-(dyn->native_size); \ +#define CBNZxw_MARK(reg) \ + j64 = GETMARK-(dyn->native_size); \ + CBNZxw(reg, j64) +// Branch to MARK if reg is not 0 (use j64) +#define CBNZw_MARK(reg) \ + j64 = GETMARK-(dyn->native_size); \ CBNZw(reg, j64) // Test bit N of A and branch to MARK if not set -#define TBZ_MARK(A, N) \ - j64 = GETMARK-(dyn->native_size); \ +#define TBZ_MARK(A, N) \ + j64 = GETMARK-(dyn->native_size); \ TBZ(A, N, j64) // Test bit N of A and branch to MARK if set -#define TBNZ_MARK(A, N) \ - j64 = GETMARK-(dyn->native_size); \ +#define TBNZ_MARK(A, N) \ + j64 = GETMARK-(dyn->native_size); \ TBNZ(A, N, j64) // Branch to MARK2 if cond (use j64) -#define B_MARK2(cond) \ - j64 = GETMARK2-(dyn->native_size); \ +#define B_MARK2(cond) \ + j64 = GETMARK2-(dyn->native_size); \ Bcond(cond, j64) // Branch to MARK2 unconditionnal (use j64) -#define B_MARK2_nocond \ - j64 = GETMARK2-(dyn->native_size); \ +#define B_MARK2_nocond \ + j64 = GETMARK2-(dyn->native_size); \ B(j64) // Branch to MARK2 if reg is 0 (use j64) -#define CBZx_MARK2(reg) \ - j64 = GETMARK2-(dyn->native_size); \ +#define CBZx_MARK2(reg) \ + j64 = GETMARK2-(dyn->native_size); \ CBZx(reg, j64) // Branch to MARK2 if reg is not 0 (use j64) -#define CBNZx_MARK2(reg) \ - j64 = GETMARK2-(dyn->native_size); \ +#define CBNZx_MARK2(reg) \ + j64 = GETMARK2-(dyn->native_size); \ CBNZx(reg, j64) +#define CBNZxw_MARK2(reg) \ + j64 = GETMARK2-(dyn->native_size); \ + CBNZxw(reg, j64) // Test bit N of A and branch to MARK2 if set -#define TBNZ_MARK2(A, N) \ - j64 = GETMARK2-(dyn->native_size); \ +#define TBNZ_MARK2(A, N) \ + j64 = GETMARK2-(dyn->native_size); \ TBNZ(A, N, j64) // Branch to MARK3 if cond (use j64) -#define B_MARK3(cond) \ - j64 = GETMARK3-(dyn->native_size); \ +#define B_MARK3(cond) \ + j64 = GETMARK3-(dyn->native_size); \ Bcond(cond, j64) // Test bit N of A and branch to MARK3 if not set -#define TBZ_MARK2(A, N) \ - j64 = GETMARK2-(dyn->native_size); \ +#define TBZ_MARK2(A, N) \ + j64 = GETMARK2-(dyn->native_size); \ TBZ(A, N, j64) // Branch to MARK3 unconditionnal (use j64) -#define B_MARK3_nocond \ - j64 = GETMARK3-(dyn->native_size); \ +#define B_MARK3_nocond \ + j64 = GETMARK3-(dyn->native_size); \ B(j64) // Branch to MARK3 if reg is not 0 (use j64) -#define CBNZx_MARK3(reg) \ - j64 = GETMARK3-(dyn->native_size); \ +#define CBNZx_MARK3(reg) \ + j64 = GETMARK3-(dyn->native_size); \ CBNZx(reg, j64) // Branch to MARK3 if reg is not 0 (use j64) -#define CBNZw_MARK3(reg) \ - j64 = GETMARK3-(dyn->native_size); \ +#define CBNZw_MARK3(reg) \ + j64 = GETMARK3-(dyn->native_size); \ CBNZw(reg, j64) // Branch to MARK3 if reg is 0 (use j64) -#define CBZx_MARK3(reg) \ - j64 = GETMARK3-(dyn->native_size); \ +#define CBZx_MARK3(reg) \ + j64 = GETMARK3-(dyn->native_size); \ CBZx(reg, j64) // Branch to MARK3 if reg is 0 (use j64) -#define CBZw_MARK3(reg) \ - j64 = GETMARK3-(dyn->native_size); \ +#define CBZw_MARK3(reg) \ + j64 = GETMARK3-(dyn->native_size); \ CBZw(reg, j64) // Test bit N of A and branch to MARK3 if not set -#define TBZ_MARK3(A, N) \ - j64 = GETMARK3-(dyn->native_size); \ +#define TBZ_MARK3(A, N) \ + j64 = GETMARK3-(dyn->native_size); \ TBZ(A, N, j64) // Test bit N of A and branch to MARK3 if set -#define TBNZ_MARK3(A, N) \ - j64 = GETMARK3-(dyn->native_size); \ +#define TBNZ_MARK3(A, N) \ + j64 = GETMARK3-(dyn->native_size); \ TBNZ(A, N, j64) // Branch to next instruction if cond (use j64) #define B_NEXT(cond) \ @@ -851,6 +862,14 @@ #define CBZx_NEXT(reg) \ j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; \ CBZx(reg, j64) +// Branch to next instruction if reg is 0 (use j64) +#define CBZxw_NEXT(reg) \ + j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; \ + CBZxw(reg, j64) +// Branch to next instruction if reg is not 0 (use j64) +#define CBNZw_NEXT(reg) \ + j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; \ + CBNZw(reg, j64) // Branch to next instruction if reg is not 0 (use j64) #define CBNZx_NEXT(reg) \ j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; \ @@ -1295,6 +1314,7 @@ void* arm64_next(x64emu_t* emu, uintptr_t addr); #define emit_shl32c STEPNAME(emit_shl32c) #define emit_shr32 STEPNAME(emit_shr32) #define emit_shr32c STEPNAME(emit_shr32c) +#define emit_sar32 STEPNAME(emit_sar32) #define emit_sar32c STEPNAME(emit_sar32c) #define emit_shl8 STEPNAME(emit_shl8) #define emit_shl8c STEPNAME(emit_shl8c) @@ -1459,6 +1479,7 @@ void emit_shl32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 void emit_shl32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); void emit_shr32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); void emit_shr32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); +void emit_sar32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); void emit_sar32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); void emit_shl8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); void emit_shl8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4); -- cgit 1.4.1