From 8868023ce96ec7eeede80065603bffde310469eb Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Wed, 13 Nov 2024 17:12:03 +0100 Subject: [ARM64_DYNAREC] Reworked 8/16/32/64bits AND opcodes --- src/dynarec/arm64/dynarec_arm64_66.c | 6 +- src/dynarec/arm64/dynarec_arm64_6664.c | 3 +- src/dynarec/arm64/dynarec_arm64_66f0.c | 20 +++- src/dynarec/arm64/dynarec_arm64_67.c | 3 +- src/dynarec/arm64/dynarec_arm64_emit_logic.c | 158 +++++++++++++-------------- src/dynarec/arm64/dynarec_arm64_helper.h | 2 +- 6 files changed, 94 insertions(+), 98 deletions(-) (limited to 'src') diff --git a/src/dynarec/arm64/dynarec_arm64_66.c b/src/dynarec/arm64/dynarec_arm64_66.c index ac14430a..f9acee68 100644 --- a/src/dynarec/arm64/dynarec_arm64_66.c +++ b/src/dynarec/arm64/dynarec_arm64_66.c @@ -227,8 +227,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SETFLAGS(X_ALL, SF_SET_PENDING); i32 = F16; UXTHw(x1, xRAX); - MOV32w(x2, i32); - emit_and16(dyn, ninst, x1, x2, x3, x4); + emit_and16c(dyn, ninst, x1, i32, x3, x4); BFIz(xRAX, x1, 0, 16); break; @@ -523,8 +522,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SETFLAGS(X_ALL, SF_SET_PENDING); GETEW(x1, (opcode==0x81)?2:1); if(opcode==0x81) i16 = F16S; else i16 = F8S; - MOVZw(x5, i16); - emit_and16(dyn, ninst, x1, x5, x2, x4); + emit_and16c(dyn, ninst, x1, i16, x2, x4); EWBACK; break; case 5: //SUB diff --git a/src/dynarec/arm64/dynarec_arm64_6664.c b/src/dynarec/arm64/dynarec_arm64_6664.c index 30a28012..36e4ae0d 100644 --- a/src/dynarec/arm64/dynarec_arm64_6664.c +++ b/src/dynarec/arm64/dynarec_arm64_6664.c @@ -183,8 +183,7 @@ uintptr_t dynarec64_6664(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n grab_segdata(dyn, addr, ninst, x1, seg); GETEWO(x1, (opcode==0x81)?2:1); if(opcode==0x81) i16 = F16S; else i16 = F8S; - MOVZw(x5, i16); - emit_and16(dyn, ninst, x1, x5, x2, x4); + emit_and16c(dyn, ninst, x1, i16, x2, x4); EWBACK; break; case 5: //SUB diff --git a/src/dynarec/arm64/dynarec_arm64_66f0.c b/src/dynarec/arm64/dynarec_arm64_66f0.c index c3311709..6d8f953b 100644 --- a/src/dynarec/arm64/dynarec_arm64_66f0.c +++ b/src/dynarec/arm64/dynarec_arm64_66f0.c @@ -404,27 +404,35 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n if(MODREG) { if(opcode==0x81) i32 = F16S; else i32 = F8S; ed = xRAX+(nextop&7)+(rex.b<<3); - MOV32w(x5, i32); UXTHw(x6, ed); - emit_and16(dyn, ninst, x6, x5, x3, x4); + emit_and16c(dyn, ninst, x6, i32, x3, x4); BFIx(ed, x6, 0, 16); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?2:1); if(opcode==0x81) i32 = F16S; else i32 = F8S; + i64 = convert_bitmask_w(i32); if(arm64_atomics) { MOV32w(x5, ~i32); UFLAG_IF { LDCLRALH(x5, x1, wback); - MVNw_REG(x5, x5); - emit_and16(dyn, ninst, x1, x5, x3, x4); + if(i64) { + emit_and16c(dyn, ninst, x1, i32, x3, x4); + } else { + MVNw_REG(x5, x5); + emit_and16(dyn, ninst, x1, x5, x3, x4); + } } else { STCLRLH(x5, wback); } } else { - MOV32w(x5, i32); + if(!i64) {MOV32w(x5, i32);} MARKLOCK; LDAXRH(x1, wback); - emit_and16(dyn, ninst, x1, x5, x3, x4); + if(i64) { + emit_and16c(dyn, ninst, x1, i32, x3, x4); + } else { + emit_and16(dyn, ninst, x1, x5, x3, x4); + } STLXRH(x3, x1, wback); CBNZx_MARKLOCK(x3); SMDMB(); diff --git a/src/dynarec/arm64/dynarec_arm64_67.c b/src/dynarec/arm64/dynarec_arm64_67.c index b6210534..4adac1d9 100644 --- a/src/dynarec/arm64/dynarec_arm64_67.c +++ b/src/dynarec/arm64/dynarec_arm64_67.c @@ -847,8 +847,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SETFLAGS(X_ALL, SF_SET_PENDING); GETEW32(x1, (opcode==0x81)?2:1); if(opcode==0x81) i16 = F16S; else i16 = F8S; - MOVZw(x5, i16); - emit_and16(dyn, ninst, x1, x5, x2, x4); + emit_and16c(dyn, ninst, x1, i16, x2, x4); EWBACK; break; case 5: //SUB diff --git a/src/dynarec/arm64/dynarec_arm64_emit_logic.c b/src/dynarec/arm64/dynarec_arm64_emit_logic.c index 8e6b9bff..f9921d8e 100644 --- a/src/dynarec/arm64/dynarec_arm64_emit_logic.c +++ b/src/dynarec/arm64/dynarec_arm64_emit_logic.c @@ -277,25 +277,21 @@ void emit_and32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 // emit AND32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch void emit_and32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4) { + int mask = convert_bitmask_xw(c); + if(!mask) { + MOV64xw(s3, c); + emit_and32(dyn, ninst, rex, s1, s3, s3, s4); + return; + } IFX(X_PEND) { SET_DF(s4, rex.w?d_and64:d_and32); } else IFX(X_ALL) { SET_DFNONE(s4); } - int mask = convert_bitmask_xw(c); - if(mask) { - IFX(X_ALL) { - ANDSxw_mask(s1, s1, (mask>>12)&1, mask&0x3F, (mask>>6)&0x3F); - } else { - ANDxw_mask(s1, s1, (mask>>12)&1, mask&0x3F, (mask>>6)&0x3F); - } + IFX(X_ZF|X_SF|X_CF|X_OF) { + ANDSxw_mask(s1, s1, (mask>>12)&1, mask&0x3F, (mask>>6)&0x3F); } else { - MOV64xw(s3, c); - IFX(X_ALL) { - ANDSxw_REG(s1, s1, s3); - } else { - ANDxw_REG(s1, s1, s3); - } + ANDxw_mask(s1, s1, (mask>>12)&1, mask&0x3F, (mask>>6)&0x3F); } IFX(X_PEND) { STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); @@ -437,9 +433,9 @@ void emit_and8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) { MAYUSE(s2); IFX(X_PEND) { - SET_DF(s3, d_and8); + SET_DF(s4, d_and8); } else IFX(X_ALL) { - SET_DFNONE(s3); + SET_DFNONE(s4); } IFX(X_ZF) { ANDSw_REG(s1, s1, s2); @@ -475,25 +471,21 @@ void emit_and8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) // emit AND8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch void emit_and8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4) { + int mask = convert_bitmask_w(c); + if(!mask) { + MOV32w(s3, c); + emit_and8(dyn, ninst, s1, s3, s3, s4); + return; + } IFX(X_PEND) { SET_DF(s4, d_and8); } else IFX(X_ALL) { SET_DFNONE(s4); } - int mask = convert_bitmask_w(c); - if(mask) { - IFX(X_ZF|X_SF) { - ANDSw_mask(s1, s1, mask&0x3F, (mask>>6)&0x3F); - } else { - ANDw_mask(s1, s1, mask&0x3F, (mask>>6)&0x3F); - } + IFX(X_ZF) { + ANDSw_mask(s1, s1, mask&0x3F, (mask>>6)&0x3F); } else { - MOV32w(s3, c&0xff); - IFX(X_ZF|X_SF) { - ANDSw_REG(s1, s1, s3); - } else { - ANDw_REG(s1, s1, s3); - } + ANDw_mask(s1, s1, mask&0x3F, (mask>>6)&0x3F); } IFX(X_PEND) { STRB_U12(s1, xEmu, offsetof(x64emu_t, res)); @@ -502,14 +494,16 @@ void emit_and8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4 MOV32w(s3, (1<=0 && c<256) { -// IFX(X_ALL) { -// ANDS_IMM8(s1, s1, c); -// } else { -// AND_IMM8(s1, s1, c); -// } -// } else { -// IFX(X_PEND) {} else {MOVW(s3, c);} -// IFX(X_ALL) { -// ANDS_REG_LSL_IMM5(s1, s1, s3, 0); -// } else { -// AND_REG_LSL_IMM5(s1, s1, s3, 0); -// } -// } -// IFX(X_PEND) { -// STR_IMM9(s1, xEmu, offsetof(x64emu_t, res)); -// } -// IFX(X_CF | X_AF | X_ZF) { -// BIC_IMM8(xFlags, xFlags, (1<>6)&0x3F); + } else { + ANDw_mask(s1, s1, mask&0x3F, (mask>>6)&0x3F); + } + IFX(X_PEND) { + STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_CF | X_AF | X_OF) { + MOV32w(s3, (1<