diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2024-11-13 13:31:32 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2024-11-13 13:31:32 +0100 |
| commit | 937e2cf853255dd636388134c882c6277ce74552 (patch) | |
| tree | 2acd0bc5744f173cc3426a8cdf850345732f890c /src | |
| parent | 82bf71ee19a09ceef7b45bf640d2670c492f4347 (diff) | |
| download | box64-937e2cf853255dd636388134c882c6277ce74552.tar.gz box64-937e2cf853255dd636388134c882c6277ce74552.zip | |
[ARM64_DYNAREC] Improved 32/64bits imul/mul opcodes flags handling
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_00.c | 87 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_0f.c | 31 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_64.c | 104 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_67.c | 62 |
4 files changed, 184 insertions, 100 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c index e06cd2d6..0e4b22c2 100644 --- a/src/dynarec/arm64/dynarec_arm64_00.c +++ b/src/dynarec/arm64/dynarec_arm64_00.c @@ -822,7 +822,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 0x69: INST_NAME("IMUL Gd, Ed, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET); nextop = F8; GETGD; GETED(4); @@ -833,13 +833,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin UFLAG_IF { SMULH(x3, ed, x4); MULx(gd, ed, x4); - IFX(X_PEND) { - UFLAG_OP1(x3); - UFLAG_RES(gd); - UFLAG_DF(x1, d_imul64); - } else { - SET_DFNONE(x1); - } + SET_DFNONE(x1); IFX(X_ZF | X_PF | X_AF | X_SF) { MOV32w(x1, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF)); BICw(xFlags, xFlags, x1); @@ -864,13 +858,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SMULL(gd, ed, x4); LSRx(x3, gd, 32); MOVw_REG(gd, gd); - IFX(X_PEND) { - UFLAG_RES(gd); - UFLAG_OP1(x3); - UFLAG_DF(x1, d_imul32); - } else { - SET_DFNONE(x1); - } + SET_DFNONE(x1); IFX(X_ZF | X_PF | X_AF | X_SF) { MOV32w(x1, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF)); BICw(xFlags, xFlags, x1); @@ -901,7 +889,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 0x6B: INST_NAME("IMUL Gd, Ed, Ib"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET); nextop = F8; GETGD; GETED(1); @@ -912,13 +900,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin UFLAG_IF { SMULH(x3, ed, x4); MULx(gd, ed, x4); - IFX(X_PEND) { - UFLAG_OP1(x3); - UFLAG_RES(gd); - UFLAG_DF(x1, d_imul64); - } else { - SET_DFNONE(x1); - } + SET_DFNONE(x1); IFX(X_ZF | X_PF | X_AF | X_SF) { MOV32w(x1, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF)); BICw(xFlags, xFlags, x1); @@ -943,13 +925,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SMULL(gd, ed, x4); LSRx(x3, gd, 32); MOVw_REG(gd, gd); - IFX(X_PEND) { - UFLAG_RES(gd); - UFLAG_OP1(x3); - UFLAG_DF(x1, d_imul32); - } else { - SET_DFNONE(x1); - } + SET_DFNONE(x1); IFX(X_ZF | X_PF | X_AF | X_SF) { MOV32w(x1, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF)); BICw(xFlags, xFlags, x1); @@ -3272,7 +3248,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin EBBACK; break; case 4: - INST_NAME("MUL AL, Ed"); + INST_NAME("MUL AL, Eb"); SETFLAGS(X_ALL, SF_PENDING); GETEB(x1, 0); UXTBw(x2, xRAX); @@ -3363,7 +3339,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 4: INST_NAME("MUL EAX, Ed"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_SET); GETED(0); if(rex.w) { if(ed==xRDX) gd=x3; else gd=xRDX; @@ -3375,13 +3351,29 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOVw_REG(xRAX, xRDX); LSRx(xRDX, xRDX, 32); } - UFLAG_RES(xRAX); - UFLAG_OP1(xRDX); - UFLAG_DF(x2, rex.w?d_mul64:d_mul32); + UFLAG_IF { + SET_DFNONE(x4); + IFX(X_CF|X_OF) { + CMPSxw_U12(xRDX, 0); + CSETw(x3, cNE); + IFX(X_CF) { + BFIw(xFlags, x3, F_CF, 1); + } + IFX(X_OF) { + BFIw(xFlags, x3, F_OF, 1); + } + } + IFX(X_AF | X_PF | X_ZF | X_SF) + if(box64_dynarec_test) { + // to avoid noise during test + MOV32w(x3, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF)); + BICw(xFlags, xFlags, x3); + } + } break; case 5: INST_NAME("IMUL EAX, Ed"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_SET); GETED(0); if(rex.w) { if(ed==xRDX) gd=x3; else gd=xRDX; @@ -3393,9 +3385,26 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOVw_REG(xRAX, xRDX); LSRx(xRDX, xRDX, 32); } - UFLAG_RES(xRAX); - UFLAG_OP1(xRDX); - UFLAG_DF(x2, rex.w?d_imul64:d_imul32); + UFLAG_IF { + SET_DFNONE(x4); + IFX(X_CF|X_OF) { + ASRxw(x4, xRAX, rex.w?63:31); + CMPSxw_REG(xRDX, x4); + CSETw(x3, cNE); + IFX(X_CF) { + BFIw(xFlags, x3, F_CF, 1); + } + IFX(X_OF) { + BFIw(xFlags, x3, F_OF, 1); + } + } + IFX(X_AF | X_PF | X_ZF | X_SF) + if(box64_dynarec_test) { + // to avoid noise during test + MOV32w(x3, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF)); + BICw(xFlags, xFlags, x3); + } + } break; case 6: INST_NAME("DIV Ed"); diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index 0c1ba5e3..9034398c 100644 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -1855,7 +1855,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 0xAF: INST_NAME("IMUL Gd, Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET); nextop = F8; GETGD; GETED(0); @@ -1864,13 +1864,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin UFLAG_IF { SMULH(x3, gd, ed); MULx(gd, gd, ed); - IFX(X_PEND) { - UFLAG_OP1(x3); - UFLAG_RES(gd); - UFLAG_DF(x4, d_imul64); - } else IFX(X_CF|X_OF) { - SET_DFNONE(x4); - } + SET_DFNONE(x4); IFX(X_CF|X_OF) { ASRx(x4, gd, 63); CMPSx_REG(x3, x4); @@ -1891,13 +1885,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SMULL(gd, gd, ed); LSRx(x3, gd, 32); MOVw_REG(gd, gd); - IFX(X_PEND) { - UFLAG_RES(gd); - UFLAG_OP1(x3); - UFLAG_DF(x4, d_imul32); - } else IFX(X_CF|X_OF) { - SET_DFNONE(x4); - } + SET_DFNONE(x4); IFX(X_CF|X_OF) { ASRw(x4, gd, 31); CMPSw_REG(x3, x4); @@ -1909,17 +1897,16 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin BFIw(xFlags, x3, F_OF, 1); } } - if(box64_dynarec_test) { - // to avoid noise during test - BFCw(xFlags, F_AF, 1); - BFCw(xFlags, F_PF, 1); - BFCw(xFlags, F_ZF, 1); - BFCw(xFlags, F_SF, 1); - } } else { MULxw(gd, gd, ed); } } + IFX(X_AF | X_PF | X_ZF | X_SF) + if(box64_dynarec_test) { + // to avoid noise during test + MOV32w(x1, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF)); + BICw(xFlags, xFlags, x1); + } break; case 0xB1: diff --git a/src/dynarec/arm64/dynarec_arm64_64.c b/src/dynarec/arm64/dynarec_arm64_64.c index 28078fc6..1fcca3e6 100644 --- a/src/dynarec/arm64/dynarec_arm64_64.c +++ b/src/dynarec/arm64/dynarec_arm64_64.c @@ -271,7 +271,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin switch(rep) { case 0: INST_NAME("IMUL Gd, Ed"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_SET); nextop = F8; grab_segdata(dyn, addr, ninst, x4, seg); GETGD; @@ -281,9 +281,18 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin UFLAG_IF { SMULH(x3, gd, ed); MULx(gd, gd, ed); - UFLAG_OP1(x3); - UFLAG_RES(gd); - UFLAG_DF(x3, d_imul64); + SET_DFNONE(x4); + IFX(X_CF|X_OF) { + ASRx(x4, x3, 63); + CMPSx_REG(x3, x4); + CSETw(x3, cNE); + IFX(X_CF) { + BFIw(xFlags, x3, F_CF, 1); + } + IFX(X_OF) { + BFIw(xFlags, x3, F_OF, 1); + } + } } else { MULxw(gd, gd, ed); } @@ -293,13 +302,29 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SMULL(gd, gd, ed); UFLAG_RES(gd); LSRx(x3, gd, 32); - UFLAG_OP1(x3); MOVw_REG(gd, gd); - UFLAG_DF(x3, d_imul32); + SET_DFNONE(x4); + IFX(X_CF|X_OF) { + ASRw(x4, gd, 31); + CMPSw_REG(x3, x4); + CSETw(x3, cNE); + IFX(X_CF) { + BFIw(xFlags, x3, F_CF, 1); + } + IFX(X_OF) { + BFIw(xFlags, x3, F_OF, 1); + } + } } else { MULxw(gd, gd, ed); } } + IFX(X_AF | X_PF | X_ZF | X_SF) + if(box64_dynarec_test) { + // to avoid noise during test + MOV32w(x1, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF)); + BICw(xFlags, xFlags, x1); + } break; default: DEFAULT; @@ -509,7 +534,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0x69: INST_NAME("IMUL Gd, Ed, Id"); - SETFLAGS(X_ALL, SF_SET_PENDING); + SETFLAGS(X_ALL, SF_SET); nextop = F8; grab_segdata(dyn, addr, ninst, x4, seg); GETGD; @@ -521,13 +546,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin UFLAG_IF { SMULH(x3, ed, x4); MULx(gd, ed, x4); - IFX(X_PEND) { - UFLAG_OP1(x3); - UFLAG_RES(gd); - UFLAG_DF(x1, d_imul64); - } else { - SET_DFNONE(x1); - } + SET_DFNONE(x1); IFX(X_ZF | X_PF | X_AF | X_SF) { MOV32w(x1, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF)); BICw(xFlags, xFlags, x1); @@ -552,13 +571,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SMULL(gd, ed, x4); LSRx(x3, gd, 32); MOVw_REG(gd, gd); - IFX(X_PEND) { - UFLAG_RES(gd); - UFLAG_OP1(x3); - UFLAG_DF(x1, d_imul32); - } else { - SET_DFNONE(x1); - } + SET_DFNONE(x1); IFX(X_ZF | X_PF | X_AF | X_SF) { MOV32w(x1, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF)); BICw(xFlags, xFlags, x1); @@ -1305,7 +1318,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 4: INST_NAME("MUL EAX, Ed"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_SET); GETEDO(x6, 0); if(rex.w) { if(ed==xRDX) gd=x3; else gd=xRDX; @@ -1317,13 +1330,29 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOVw_REG(xRAX, xRDX); LSRx(xRDX, xRDX, 32); } - UFLAG_RES(xRAX); - UFLAG_OP1(xRDX); - UFLAG_DF(x2, rex.w?d_mul64:d_mul32); + UFLAG_IF { + SET_DFNONE(x4); + IFX(X_CF|X_OF) { + CMPSxw_U12(xRDX, 0); + CSETw(x3, cNE); + IFX(X_CF) { + BFIw(xFlags, x3, F_CF, 1); + } + IFX(X_OF) { + BFIw(xFlags, x3, F_OF, 1); + } + } + IFX(X_AF | X_PF | X_ZF | X_SF) + if(box64_dynarec_test) { + // to avoid noise during test + MOV32w(x3, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF)); + BICw(xFlags, xFlags, x3); + } + } break; case 5: INST_NAME("IMUL EAX, Ed"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_SET); GETEDO(x6, 0); if(rex.w) { if(ed==xRDX) gd=x3; else gd=xRDX; @@ -1335,9 +1364,26 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOVw_REG(xRAX, xRDX); LSRx(xRDX, xRDX, 32); } - UFLAG_RES(xRAX); - UFLAG_OP1(xRDX); - UFLAG_DF(x2, rex.w?d_imul64:d_imul32); + UFLAG_IF { + SET_DFNONE(x4); + IFX(X_CF|X_OF) { + ASRxw(x4, xRAX, rex.w?63:31); + CMPSxw_REG(xRDX, x4); + CSETw(x3, cNE); + IFX(X_CF) { + BFIw(xFlags, x3, F_CF, 1); + } + IFX(X_OF) { + BFIw(xFlags, x3, F_OF, 1); + } + } + IFX(X_AF | X_PF | X_ZF | X_SF) + if(box64_dynarec_test) { + // to avoid noise during test + MOV32w(x3, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF)); + BICw(xFlags, xFlags, x3); + } + } break; case 6: INST_NAME("DIV Ed"); diff --git a/src/dynarec/arm64/dynarec_arm64_67.c b/src/dynarec/arm64/dynarec_arm64_67.c index e5a4e613..c3df1bf6 100644 --- a/src/dynarec/arm64/dynarec_arm64_67.c +++ b/src/dynarec/arm64/dynarec_arm64_67.c @@ -932,9 +932,22 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin UFLAG_IF { SMULH(x3, ed, x4); MULx(gd, ed, x4); - UFLAG_OP1(x3); - UFLAG_RES(gd); - UFLAG_DF(x3, d_imul64); + SET_DFNONE(x1); + IFX(X_ZF | X_PF | X_AF | X_SF) { + MOV32w(x1, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF)); + BICw(xFlags, xFlags, x1); + } + IFX(X_CF | X_OF) { + ASRx(x4, gd, 63); + CMPSx_REG(x3, x4); + CSETw(x1, cNE); + IFX(X_CF) { + BFIw(xFlags, x1, F_CF, 1); + } + IFX(X_OF) { + BFIw(xFlags, x1, F_OF, 1); + } + } } else { MULxw(gd, ed, x4); } @@ -942,11 +955,24 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin // 32bits imul UFLAG_IF { SMULL(gd, ed, x4); - UFLAG_RES(gd); LSRx(x3, gd, 32); - UFLAG_OP1(x3); - UFLAG_DF(x3, d_imul32); MOVw_REG(gd, gd); + SET_DFNONE(x1); + IFX(X_ZF | X_PF | X_AF | X_SF) { + MOV32w(x1, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF)); + BICw(xFlags, xFlags, x1); + } + IFX(X_CF | X_OF) { + ASRw(x4, gd, 31); + CMPSw_REG(x3, x4); + CSETw(x1, cNE); + IFX(X_CF) { + BFIw(xFlags, x1, F_CF, 1); + } + IFX(X_OF) { + BFIw(xFlags, x1, F_OF, 1); + } + } } else { MULxw(gd, ed, x4); } @@ -1438,7 +1464,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 4: INST_NAME("MUL EAX, Ed"); - SETFLAGS(X_ALL, SF_PENDING); + SETFLAGS(X_ALL, SF_SET); GETED32(0); if(rex.w) { if(ed==xRDX) gd=x3; else gd=xRDX; @@ -1450,9 +1476,25 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOVw_REG(xRAX, xRDX); LSRx(xRDX, xRDX, 32); } - UFLAG_RES(xRAX); - UFLAG_OP1(xRDX); - UFLAG_DF(x2, rex.w?d_mul64:d_mul32); + UFLAG_IF { + SET_DFNONE(x4); + IFX(X_CF|X_OF) { + CMPSxw_U12(xRDX, 0); + CSETw(x3, cNE); + IFX(X_CF) { + BFIw(xFlags, x3, F_CF, 1); + } + IFX(X_OF) { + BFIw(xFlags, x3, F_OF, 1); + } + } + IFX(X_AF | X_PF | X_ZF | X_SF) + if(box64_dynarec_test) { + // to avoid noise during test + MOV32w(x3, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF)); + BICw(xFlags, xFlags, x3); + } + } break; case 5: INST_NAME("IMUL EAX, Ed"); |