diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2024-12-22 15:22:44 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2024-12-22 15:22:44 +0100 |
| commit | 1bb7d9efd88690bf8739f9d1aebd0d3476e7d90b (patch) | |
| tree | 50cfef8c1f2b81f9ee79920be9d117f3a5370936 | |
| parent | 25b4dd1457ac5fc4a9921af1b098bdc78ace4d14 (diff) | |
| download | box64-1bb7d9efd88690bf8739f9d1aebd0d3476e7d90b.tar.gz box64-1bb7d9efd88690bf8739f9d1aebd0d3476e7d90b.zip | |
[ARM64_DYNAREC] Reworked MUL/IMUL opcodes a bit
| -rw-r--r-- | src/dynarec/arm64/arm64_emitter.h | 8 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_00.c | 62 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_0f.c | 6 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_66.c | 71 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_660f.c | 24 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_67.c | 32 |
6 files changed, 162 insertions, 41 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index c4679462..499751af 100644 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -205,17 +205,25 @@ int convert_bitmask(uint64_t bitmask); #define SUBx_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(1, 1, 0, 0b00, Rm, 0, Rn, Rd)) #define SUBSx_REG(Rd, Rn, Rm) FEMIT(ADDSUB_REG_gen(1, 1, 1, 0b00, Rm, 0, Rn, Rd)) +#define SUBSx_REG_ASR(Rd, Rn, Rm, asr) FEMIT(ADDSUB_REG_gen(1, 1, 1, 0b10, Rm, asr, Rn, Rd)) #define SUBx_REG_LSL(Rd, Rn, Rm, lsl) EMIT(ADDSUB_REG_gen(1, 1, 0, 0b00, Rm, lsl, Rn, Rd)) #define SUBw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(0, 1, 0, 0b00, Rm, 0, Rn, Rd)) #define SUBw_REG_LSL(Rd, Rn, Rm, lsl) EMIT(ADDSUB_REG_gen(0, 1, 0, 0b00, Rm, lsl, Rn, Rd)) #define SUBSw_REG(Rd, Rn, Rm) FEMIT(ADDSUB_REG_gen(0, 1, 1, 0b00, Rm, 0, Rn, Rd)) #define SUBSw_REG_LSL(Rd, Rn, Rm, lsl) FEMIT(ADDSUB_REG_gen(0, 1, 1, 0b00, Rm, lsl, Rn, Rd)) +#define SUBSw_REG_LSR(Rd, Rn, Rm, lsr) FEMIT(ADDSUB_REG_gen(0, 1, 1, 0b01, Rm, lsr, Rn, Rd)) +#define SUBSw_REG_ASR(Rd, Rn, Rm, asr) FEMIT(ADDSUB_REG_gen(0, 1, 1, 0b10, Rm, asr, Rn, Rd)) #define SUBxw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.w, 1, 0, 0b00, Rm, 0, Rn, Rd)) #define SUBz_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.is32bits?0:1, 1, 0, 0b00, Rm, 0, Rn, Rd)) #define SUBSxw_REG(Rd, Rn, Rm) FEMIT(ADDSUB_REG_gen(rex.w, 1, 1, 0b00, Rm, 0, Rn, Rd)) +#define SUBSxw_REG_ASR(Rd, Rn, Rm, asr) FEMIT(ADDSUB_REG_gen(rex.w, 1, 1, 0b10, Rm, asr, Rn, Rd)) #define CMPSx_REG(Rn, Rm) SUBSx_REG(xZR, Rn, Rm) +#define CMPSx_REG_ASR(Rn, Rm, asr) SUBSx_REG_ASR(xZR, Rn, Rm, asr) #define CMPSw_REG(Rn, Rm) SUBSw_REG(wZR, Rn, Rm) +#define CMPSw_REG_LSR(Rn, Rm, lsr) SUBSw_REG_LSR(wZR, Rn, Rm, lsr) +#define CMPSw_REG_ASR(Rn, Rm, asr) SUBSw_REG_ASR(wZR, Rn, Rm, asr) #define CMPSxw_REG(Rn, Rm) SUBSxw_REG(xZR, Rn, Rm) +#define CMPSxw_REG_ASR(Rn, Rm, asr) SUBSxw_REG_ASR(xZR, Rn, Rm, asr) #define NEGx_REG(Rd, Rm) SUBx_REG(Rd, xZR, Rm); #define NEGw_REG(Rd, Rm) SUBw_REG(Rd, wZR, Rm); #define NEGxw_REG(Rd, Rm) SUBxw_REG(Rd, xZR, Rm); diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c index 4abbaeaf..cafd5632 100644 --- a/src/dynarec/arm64/dynarec_arm64_00.c +++ b/src/dynarec/arm64/dynarec_arm64_00.c @@ -836,8 +836,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin BICw(xFlags, xFlags, x1); } IFX(X_CF | X_OF) { - ASRx(x4, gd, 63); - CMPSx_REG(x3, x4); + CMPSx_REG_ASR(x3, gd, 63); CSETw(x1, cNE); IFX(X_CF) { BFIw(xFlags, x1, F_CF, 1); @@ -861,8 +860,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin BICw(xFlags, xFlags, x1); } IFX(X_CF | X_OF) { - ASRw(x4, gd, 31); - CMPSw_REG(x3, x4); + CMPSw_REG_ASR(x3, gd, 31); CSETw(x1, cNE); IFX(X_CF) { BFIw(xFlags, x1, F_CF, 1); @@ -902,8 +900,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin BICw(xFlags, xFlags, x1); } IFX(X_CF | X_OF) { - ASRx(x4, gd, 63); - CMPSx_REG(x3, x4); + CMPSx_REG_ASR(x3, gd, 63); CSETw(x1, cNE); IFX(X_CF) { BFIw(xFlags, x1, F_CF, 1); @@ -927,8 +924,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin BICw(xFlags, xFlags, x1); } IFX(X_CF | X_OF) { - ASRw(x4, gd, 31); - CMPSw_REG(x3, x4); + CMPSw_REG_ASR(x3, gd, 31); CSETw(x1, cNE); IFX(X_CF) { BFIw(xFlags, x1, F_CF, 1); @@ -3294,23 +3290,58 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 4: INST_NAME("MUL AL, Eb"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_SET); GETEB(x1, 0); UXTBw(x2, xRAX); MULw(x1, x2, x1); - UFLAG_RES(x1); BFIx(xRAX, x1, 0, 16); - UFLAG_DF(x1, d_mul8); + UFLAG_IF { + SET_DFNONE(x4); + IFX(X_CF|X_OF) { + CMPSw_REG_LSR(xZR, x1, 8); + CSETw(x3, cNE); + IFX(X_CF) { + BFIw(xFlags, x3, F_CF, 1); + } + IFX(X_OF) { + BFIw(xFlags, x3, F_OF, 1); + } + } + IFX(X_AF | X_PF | X_ZF | X_SF) + if(box64_dynarec_test) { + // to avoid noise during test + MOV32w(x3, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF)); + BICw(xFlags, xFlags, x3); + } + } break; case 5: INST_NAME("IMUL AL, Eb"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_SET); GETSEB(x1, 0); SXTBw(x2, xRAX); MULw(x1, x2, x1); - UFLAG_RES(x1); BFIx(xRAX, x1, 0, 16); - UFLAG_DF(x1, d_imul8); + UFLAG_IF { + SET_DFNONE(x4); + IFX(X_CF|X_OF) { + ASRxw(x2, x1, 8); + CMPSw_REG_ASR(x2, x1, 16); + CSETw(x3, cNE); + IFX(X_CF) { + BFIw(xFlags, x3, F_CF, 1); + } + IFX(X_OF) { + BFIw(xFlags, x3, F_OF, 1); + } + } + IFX(X_AF | X_PF | X_ZF | X_SF) + if(box64_dynarec_test) { + // to avoid noise during test + MOV32w(x3, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF)); + BICw(xFlags, xFlags, x3); + } + } break; case 6: INST_NAME("DIV Eb"); @@ -3442,8 +3473,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin UFLAG_IF { SET_DFNONE(x4); IFX(X_CF|X_OF) { - ASRxw(x4, xRAX, rex.w?63:31); - CMPSxw_REG(xRDX, x4); + CMPSxw_REG_ASR(xRDX, xRAX, rex.w?63:31); CSETw(x3, cNE); IFX(X_CF) { BFIw(xFlags, x3, F_CF, 1); diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index c19f4857..e7375bf9 100644 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -1910,8 +1910,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MULx(gd, gd, ed); SET_DFNONE(x4); IFX(X_CF|X_OF) { - ASRx(x4, gd, 63); - CMPSx_REG(x3, x4); + CMPSx_REG_ASR(x3, gd, 63); CSETw(x3, cNE); IFX(X_CF) { BFIw(xFlags, x3, F_CF, 1); @@ -1931,8 +1930,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOVw_REG(gd, gd); SET_DFNONE(x4); IFX(X_CF|X_OF) { - ASRw(x4, gd, 31); - CMPSw_REG(x3, x4); + CMPSw_REG_ASR(x3, gd, 31); CSETw(x3, cNE); IFX(X_CF) { BFIw(xFlags, x3, F_CF, 1); diff --git a/src/dynarec/arm64/dynarec_arm64_66.c b/src/dynarec/arm64/dynarec_arm64_66.c index b50c0dae..b9e50d70 100644 --- a/src/dynarec/arm64/dynarec_arm64_66.c +++ b/src/dynarec/arm64/dynarec_arm64_66.c @@ -436,16 +436,34 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { INST_NAME("IMUL Gw,Ew,Ib"); } - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_SET); nextop = F8; GETSEW(x1, (opcode==0x69)?2:1); if(opcode==0x69) i32 = F16S; else i32 = F8S; MOV32w(x2, i32); MULw(x2, x2, x1); - UFLAG_RES(x2); gd=x2; GWBACK; - UFLAG_DF(x1, d_imul16); + UFLAG_IF { + SET_DFNONE(x4); + IFX(X_CF|X_OF) { + ASRxw(x1, x2, 16); + CMPSw_REG_ASR(x1, x2, 31); + CSETw(x3, cNE); + IFX(X_CF) { + BFIw(xFlags, x3, F_CF, 1); + } + IFX(X_OF) { + BFIw(xFlags, x3, F_OF, 1); + } + } + IFX(X_AF | X_PF | X_ZF | X_SF) + if(box64_dynarec_test) { + // to avoid noise during test + MOV32w(x3, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF)); + BICw(xFlags, xFlags, x3); + } + } break; case 0x6A: INST_NAME("PUSH Ib"); @@ -1342,25 +1360,60 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 4: INST_NAME("MUL AX, Ew"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_SET); GETEW(x1, 0); UXTHw(x2, xRAX); MULw(x1, x2, x1); - UFLAG_RES(x1); BFIz(xRAX, x1, 0, 16); BFXILx(xRDX, x1, 16, 16); - UFLAG_DF(x1, d_mul16); + UFLAG_IF { + SET_DFNONE(x4); + IFX(X_CF|X_OF) { + CMPSw_REG_LSR(xZR, x1, 16); + CSETw(x3, cNE); + IFX(X_CF) { + BFIw(xFlags, x3, F_CF, 1); + } + IFX(X_OF) { + BFIw(xFlags, x3, F_OF, 1); + } + } + IFX(X_AF | X_PF | X_ZF | X_SF) + if(box64_dynarec_test) { + // to avoid noise during test + MOV32w(x3, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF)); + BICw(xFlags, xFlags, x3); + } + } break; case 5: INST_NAME("IMUL AX, Ew"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_SET); GETSEW(x1, 0); SXTHw(x2, xRAX); MULw(x1, x2, x1); - UFLAG_RES(x1); BFIz(xRAX, x1, 0, 16); BFXILx(xRDX, x1, 16, 16); - UFLAG_DF(x1, d_imul16); + UFLAG_IF { + SET_DFNONE(x4); + IFX(X_CF|X_OF) { + ASRxw(x2, x1, 16); + CMPSw_REG_ASR(x2, x1, 31); + CSETw(x3, cNE); + IFX(X_CF) { + BFIw(xFlags, x3, F_CF, 1); + } + IFX(X_OF) { + BFIw(xFlags, x3, F_OF, 1); + } + } + IFX(X_AF | X_PF | X_ZF | X_SF) + if(box64_dynarec_test) { + // to avoid noise during test + MOV32w(x3, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF)); + BICw(xFlags, xFlags, x3); + } + } break; case 6: INST_NAME("DIV Ew"); diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index 8905218c..66b17e6e 100644 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -2444,14 +2444,32 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n case 0xAF: INST_NAME("IMUL Gw,Ew"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_SET); nextop = F8; GETSEW(x1, 0); GETSGW(x2); MULw(x2, x2, x1); - UFLAG_RES(x2); GWBACK; - UFLAG_DF(x1, d_imul16); + UFLAG_IF { + SET_DFNONE(x4); + IFX(X_CF|X_OF) { + ASRw(x1, x2, 16); + CMPSw_REG_ASR(x1, x2, 31); + CSETw(x3, cNE); + IFX(X_CF) { + BFIw(xFlags, x3, F_CF, 1); + } + IFX(X_OF) { + BFIw(xFlags, x3, F_OF, 1); + } + } + IFX(X_AF | X_PF | X_ZF | X_SF) + if(box64_dynarec_test) { + // to avoid noise during test + MOV32w(x3, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF)); + BICw(xFlags, xFlags, x3); + } + } break; case 0xB3: diff --git a/src/dynarec/arm64/dynarec_arm64_67.c b/src/dynarec/arm64/dynarec_arm64_67.c index 6855bf58..6bc96c2a 100644 --- a/src/dynarec/arm64/dynarec_arm64_67.c +++ b/src/dynarec/arm64/dynarec_arm64_67.c @@ -917,7 +917,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0x6B: INST_NAME("IMUL Gd, Ed, Ib"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_SET); nextop = F8; GETGD; GETED32(1); @@ -934,8 +934,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin BICw(xFlags, xFlags, x1); } IFX(X_CF | X_OF) { - ASRx(x4, gd, 63); - CMPSx_REG(x3, x4); + CMPSx_REG_ASR(x3, gd, 63); CSETw(x1, cNE); IFX(X_CF) { BFIw(xFlags, x1, F_CF, 1); @@ -959,8 +958,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin BICw(xFlags, xFlags, x1); } IFX(X_CF | X_OF) { - ASRw(x4, gd, 31); - CMPSw_REG(x3, x4); + CMPSw_REG_ASR(x3, gd, 31); CSETw(x1, cNE); IFX(X_CF) { BFIw(xFlags, x1, F_CF, 1); @@ -1501,7 +1499,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 5: INST_NAME("IMUL EAX, Ed"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_SET); GETED32(0); if(rex.w) { if(ed==xRDX) gd=x3; else gd=xRDX; @@ -1513,9 +1511,25 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOVw_REG(xRAX, xRDX); LSRx(xRDX, xRDX, 32); } - UFLAG_RES(xRAX); - UFLAG_OP1(xRDX); - UFLAG_DF(x2, rex.w?d_imul64:d_imul32); + UFLAG_IF { + SET_DFNONE(x4); + IFX(X_CF|X_OF) { + CMPSxw_REG_ASR(xRDX, xRAX, rex.w?63:31); + CSETw(x3, cNE); + IFX(X_CF) { + BFIw(xFlags, x3, F_CF, 1); + } + IFX(X_OF) { + BFIw(xFlags, x3, F_OF, 1); + } + } + IFX(X_AF | X_PF | X_ZF | X_SF) + if(box64_dynarec_test) { + // to avoid noise during test + MOV32w(x3, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF)); + BICw(xFlags, xFlags, x3); + } + } break; case 6: INST_NAME("DIV Ed"); |