about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-12-22 15:22:44 +0100
committerptitSeb <sebastien.chev@gmail.com>2024-12-22 15:22:44 +0100
commit1bb7d9efd88690bf8739f9d1aebd0d3476e7d90b (patch)
tree50cfef8c1f2b81f9ee79920be9d117f3a5370936
parent25b4dd1457ac5fc4a9921af1b098bdc78ace4d14 (diff)
downloadbox64-1bb7d9efd88690bf8739f9d1aebd0d3476e7d90b.tar.gz
box64-1bb7d9efd88690bf8739f9d1aebd0d3476e7d90b.zip
[ARM64_DYNAREC] Reworked MUL/IMUL opcodes a bit
-rw-r--r--src/dynarec/arm64/arm64_emitter.h8
-rw-r--r--src/dynarec/arm64/dynarec_arm64_00.c62
-rw-r--r--src/dynarec/arm64/dynarec_arm64_0f.c6
-rw-r--r--src/dynarec/arm64/dynarec_arm64_66.c71
-rw-r--r--src/dynarec/arm64/dynarec_arm64_660f.c24
-rw-r--r--src/dynarec/arm64/dynarec_arm64_67.c32
6 files changed, 162 insertions, 41 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h
index c4679462..499751af 100644
--- a/src/dynarec/arm64/arm64_emitter.h
+++ b/src/dynarec/arm64/arm64_emitter.h
@@ -205,17 +205,25 @@ int convert_bitmask(uint64_t bitmask);
 
 #define SUBx_REG(Rd, Rn, Rm)                EMIT(ADDSUB_REG_gen(1, 1, 0, 0b00, Rm, 0, Rn, Rd))
 #define SUBSx_REG(Rd, Rn, Rm)              FEMIT(ADDSUB_REG_gen(1, 1, 1, 0b00, Rm, 0, Rn, Rd))
+#define SUBSx_REG_ASR(Rd, Rn, Rm, asr)     FEMIT(ADDSUB_REG_gen(1, 1, 1, 0b10, Rm, asr, Rn, Rd))
 #define SUBx_REG_LSL(Rd, Rn, Rm, lsl)       EMIT(ADDSUB_REG_gen(1, 1, 0, 0b00, Rm, lsl, Rn, Rd))
 #define SUBw_REG(Rd, Rn, Rm)                EMIT(ADDSUB_REG_gen(0, 1, 0, 0b00, Rm, 0, Rn, Rd))
 #define SUBw_REG_LSL(Rd, Rn, Rm, lsl)       EMIT(ADDSUB_REG_gen(0, 1, 0, 0b00, Rm, lsl, Rn, Rd))
 #define SUBSw_REG(Rd, Rn, Rm)              FEMIT(ADDSUB_REG_gen(0, 1, 1, 0b00, Rm, 0, Rn, Rd))
 #define SUBSw_REG_LSL(Rd, Rn, Rm, lsl)     FEMIT(ADDSUB_REG_gen(0, 1, 1, 0b00, Rm, lsl, Rn, Rd))
+#define SUBSw_REG_LSR(Rd, Rn, Rm, lsr)     FEMIT(ADDSUB_REG_gen(0, 1, 1, 0b01, Rm, lsr, Rn, Rd))
+#define SUBSw_REG_ASR(Rd, Rn, Rm, asr)     FEMIT(ADDSUB_REG_gen(0, 1, 1, 0b10, Rm, asr, Rn, Rd))
 #define SUBxw_REG(Rd, Rn, Rm)               EMIT(ADDSUB_REG_gen(rex.w, 1, 0, 0b00, Rm, 0, Rn, Rd))
 #define SUBz_REG(Rd, Rn, Rm)                EMIT(ADDSUB_REG_gen(rex.is32bits?0:1, 1, 0, 0b00, Rm, 0, Rn, Rd))
 #define SUBSxw_REG(Rd, Rn, Rm)             FEMIT(ADDSUB_REG_gen(rex.w, 1, 1, 0b00, Rm, 0, Rn, Rd))
+#define SUBSxw_REG_ASR(Rd, Rn, Rm, asr)    FEMIT(ADDSUB_REG_gen(rex.w, 1, 1, 0b10, Rm, asr, Rn, Rd))
 #define CMPSx_REG(Rn, Rm)                   SUBSx_REG(xZR, Rn, Rm)
+#define CMPSx_REG_ASR(Rn, Rm, asr)          SUBSx_REG_ASR(xZR, Rn, Rm, asr)
 #define CMPSw_REG(Rn, Rm)                   SUBSw_REG(wZR, Rn, Rm)
+#define CMPSw_REG_LSR(Rn, Rm, lsr)          SUBSw_REG_LSR(wZR, Rn, Rm, lsr)
+#define CMPSw_REG_ASR(Rn, Rm, asr)          SUBSw_REG_ASR(wZR, Rn, Rm, asr)
 #define CMPSxw_REG(Rn, Rm)                  SUBSxw_REG(xZR, Rn, Rm)
+#define CMPSxw_REG_ASR(Rn, Rm, asr)         SUBSxw_REG_ASR(xZR, Rn, Rm, asr)
 #define NEGx_REG(Rd, Rm)                    SUBx_REG(Rd, xZR, Rm);
 #define NEGw_REG(Rd, Rm)                    SUBw_REG(Rd, wZR, Rm);
 #define NEGxw_REG(Rd, Rm)                   SUBxw_REG(Rd, xZR, Rm);
diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c
index 4abbaeaf..cafd5632 100644
--- a/src/dynarec/arm64/dynarec_arm64_00.c
+++ b/src/dynarec/arm64/dynarec_arm64_00.c
@@ -836,8 +836,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         BICw(xFlags, xFlags, x1);
                     }
                     IFX(X_CF | X_OF) {
-                        ASRx(x4, gd, 63);
-                        CMPSx_REG(x3, x4);
+                        CMPSx_REG_ASR(x3, gd, 63);
                         CSETw(x1, cNE);
                         IFX(X_CF) {
                             BFIw(xFlags, x1, F_CF, 1);
@@ -861,8 +860,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         BICw(xFlags, xFlags, x1);
                     }
                     IFX(X_CF | X_OF) {
-                        ASRw(x4, gd, 31);
-                        CMPSw_REG(x3, x4);
+                        CMPSw_REG_ASR(x3, gd, 31);
                         CSETw(x1, cNE);
                         IFX(X_CF) {
                             BFIw(xFlags, x1, F_CF, 1);
@@ -902,8 +900,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         BICw(xFlags, xFlags, x1);
                     }
                     IFX(X_CF | X_OF) {
-                        ASRx(x4, gd, 63);
-                        CMPSx_REG(x3, x4);
+                        CMPSx_REG_ASR(x3, gd, 63);
                         CSETw(x1, cNE);
                         IFX(X_CF) {
                             BFIw(xFlags, x1, F_CF, 1);
@@ -927,8 +924,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         BICw(xFlags, xFlags, x1);
                     }
                     IFX(X_CF | X_OF) {
-                        ASRw(x4, gd, 31);
-                        CMPSw_REG(x3, x4);
+                        CMPSw_REG_ASR(x3, gd, 31);
                         CSETw(x1, cNE);
                         IFX(X_CF) {
                             BFIw(xFlags, x1, F_CF, 1);
@@ -3294,23 +3290,58 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     break;
                 case 4:
                     INST_NAME("MUL AL, Eb");
-                    SETFLAGS(X_ALL, SF_PENDING);
+                    SETFLAGS(X_ALL, SF_SET);
                     GETEB(x1, 0);
                     UXTBw(x2, xRAX);
                     MULw(x1, x2, x1);
-                    UFLAG_RES(x1);
                     BFIx(xRAX, x1, 0, 16);
-                    UFLAG_DF(x1, d_mul8);
+                    UFLAG_IF {
+                        SET_DFNONE(x4);
+                        IFX(X_CF|X_OF) {
+                            CMPSw_REG_LSR(xZR, x1, 8);
+                            CSETw(x3, cNE);
+                            IFX(X_CF) {
+                                BFIw(xFlags, x3, F_CF, 1);
+                            }
+                            IFX(X_OF) {
+                                BFIw(xFlags, x3, F_OF, 1);
+                            }
+                        }
+                        IFX(X_AF | X_PF | X_ZF | X_SF)
+                            if(box64_dynarec_test) {
+                                // to avoid noise during test
+                                MOV32w(x3, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF));
+                                BICw(xFlags, xFlags, x3);
+                            }
+                    }
                     break;
                 case 5:
                     INST_NAME("IMUL AL, Eb");
-                    SETFLAGS(X_ALL, SF_PENDING);
+                    SETFLAGS(X_ALL, SF_SET);
                     GETSEB(x1, 0);
                     SXTBw(x2, xRAX);
                     MULw(x1, x2, x1);
-                    UFLAG_RES(x1);
                     BFIx(xRAX, x1, 0, 16);
-                    UFLAG_DF(x1, d_imul8);
+                    UFLAG_IF {
+                        SET_DFNONE(x4);
+                        IFX(X_CF|X_OF) {
+                            ASRxw(x2, x1, 8);
+                            CMPSw_REG_ASR(x2, x1, 16);
+                            CSETw(x3, cNE);
+                            IFX(X_CF) {
+                                BFIw(xFlags, x3, F_CF, 1);
+                            }
+                            IFX(X_OF) {
+                                BFIw(xFlags, x3, F_OF, 1);
+                            }
+                        }
+                        IFX(X_AF | X_PF | X_ZF | X_SF)
+                            if(box64_dynarec_test) {
+                                // to avoid noise during test
+                                MOV32w(x3, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF));
+                                BICw(xFlags, xFlags, x3);
+                            }
+                    }
                     break;
                 case 6:
                     INST_NAME("DIV Eb");
@@ -3442,8 +3473,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     UFLAG_IF {
                         SET_DFNONE(x4);
                         IFX(X_CF|X_OF) {
-                            ASRxw(x4, xRAX, rex.w?63:31);
-                            CMPSxw_REG(xRDX, x4);
+                            CMPSxw_REG_ASR(xRDX, xRAX, rex.w?63:31);
                             CSETw(x3, cNE);
                             IFX(X_CF) {
                                 BFIw(xFlags, x3, F_CF, 1);
diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c
index c19f4857..e7375bf9 100644
--- a/src/dynarec/arm64/dynarec_arm64_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_0f.c
@@ -1910,8 +1910,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     MULx(gd, gd, ed);

                     SET_DFNONE(x4);

                     IFX(X_CF|X_OF) {

-                        ASRx(x4, gd, 63);

-                        CMPSx_REG(x3, x4);

+                        CMPSx_REG_ASR(x3, gd, 63);

                         CSETw(x3, cNE);

                         IFX(X_CF) {

                             BFIw(xFlags, x3, F_CF, 1);

@@ -1931,8 +1930,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     MOVw_REG(gd, gd);

                     SET_DFNONE(x4);

                     IFX(X_CF|X_OF) {

-                        ASRw(x4, gd, 31);

-                        CMPSw_REG(x3, x4);

+                        CMPSw_REG_ASR(x3, gd, 31);

                         CSETw(x3, cNE);

                         IFX(X_CF) {

                             BFIw(xFlags, x3, F_CF, 1);

diff --git a/src/dynarec/arm64/dynarec_arm64_66.c b/src/dynarec/arm64/dynarec_arm64_66.c
index b50c0dae..b9e50d70 100644
--- a/src/dynarec/arm64/dynarec_arm64_66.c
+++ b/src/dynarec/arm64/dynarec_arm64_66.c
@@ -436,16 +436,34 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             } else {

                 INST_NAME("IMUL Gw,Ew,Ib");

             }

-            SETFLAGS(X_ALL, SF_PENDING);

+            SETFLAGS(X_ALL, SF_SET);

             nextop = F8;

             GETSEW(x1, (opcode==0x69)?2:1);

             if(opcode==0x69) i32 = F16S; else i32 = F8S;

             MOV32w(x2, i32);

             MULw(x2, x2, x1);

-            UFLAG_RES(x2);

             gd=x2;

             GWBACK;

-            UFLAG_DF(x1, d_imul16);

+            UFLAG_IF {

+                SET_DFNONE(x4);

+                IFX(X_CF|X_OF) {

+                    ASRxw(x1, x2, 16);

+                    CMPSw_REG_ASR(x1, x2, 31);

+                    CSETw(x3, cNE);

+                    IFX(X_CF) {

+                        BFIw(xFlags, x3, F_CF, 1);

+                    }

+                    IFX(X_OF) {

+                        BFIw(xFlags, x3, F_OF, 1);

+                    }

+                }

+                IFX(X_AF | X_PF | X_ZF | X_SF)

+                    if(box64_dynarec_test) {

+                        // to avoid noise during test

+                        MOV32w(x3, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF));

+                        BICw(xFlags, xFlags, x3);

+                    }

+            }

             break;

         case 0x6A:

             INST_NAME("PUSH Ib");

@@ -1342,25 +1360,60 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     break;

                 case 4:

                     INST_NAME("MUL AX, Ew");

-                    SETFLAGS(X_ALL, SF_PENDING);

+                    SETFLAGS(X_ALL, SF_SET);

                     GETEW(x1, 0);

                     UXTHw(x2, xRAX);

                     MULw(x1, x2, x1);

-                    UFLAG_RES(x1);

                     BFIz(xRAX, x1, 0, 16);

                     BFXILx(xRDX, x1, 16, 16);

-                    UFLAG_DF(x1, d_mul16);

+                    UFLAG_IF {

+                        SET_DFNONE(x4);

+                        IFX(X_CF|X_OF) {

+                            CMPSw_REG_LSR(xZR, x1, 16);

+                            CSETw(x3, cNE);

+                            IFX(X_CF) {

+                                BFIw(xFlags, x3, F_CF, 1);

+                            }

+                            IFX(X_OF) {

+                                BFIw(xFlags, x3, F_OF, 1);

+                            }

+                        }

+                        IFX(X_AF | X_PF | X_ZF | X_SF)

+                            if(box64_dynarec_test) {

+                                // to avoid noise during test

+                                MOV32w(x3, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF));

+                                BICw(xFlags, xFlags, x3);

+                            }

+                    }

                     break;

                 case 5:

                     INST_NAME("IMUL AX, Ew");

-                    SETFLAGS(X_ALL, SF_PENDING);

+                    SETFLAGS(X_ALL, SF_SET);

                     GETSEW(x1, 0);

                     SXTHw(x2, xRAX);

                     MULw(x1, x2, x1);

-                    UFLAG_RES(x1);

                     BFIz(xRAX, x1, 0, 16);

                     BFXILx(xRDX, x1, 16, 16);

-                    UFLAG_DF(x1, d_imul16);

+                    UFLAG_IF {

+                        SET_DFNONE(x4);

+                        IFX(X_CF|X_OF) {

+                            ASRxw(x2, x1, 16);

+                            CMPSw_REG_ASR(x2, x1, 31);

+                            CSETw(x3, cNE);

+                            IFX(X_CF) {

+                                BFIw(xFlags, x3, F_CF, 1);

+                            }

+                            IFX(X_OF) {

+                                BFIw(xFlags, x3, F_OF, 1);

+                            }

+                        }

+                        IFX(X_AF | X_PF | X_ZF | X_SF)

+                            if(box64_dynarec_test) {

+                                // to avoid noise during test

+                                MOV32w(x3, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF));

+                                BICw(xFlags, xFlags, x3);

+                            }

+                    }

                     break;

                 case 6:

                     INST_NAME("DIV Ew");

diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c
index 8905218c..66b17e6e 100644
--- a/src/dynarec/arm64/dynarec_arm64_660f.c
+++ b/src/dynarec/arm64/dynarec_arm64_660f.c
@@ -2444,14 +2444,32 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
 

         case 0xAF:

             INST_NAME("IMUL Gw,Ew");

-            SETFLAGS(X_ALL, SF_PENDING);

+            SETFLAGS(X_ALL, SF_SET);

             nextop = F8;

             GETSEW(x1, 0);

             GETSGW(x2);

             MULw(x2, x2, x1);

-            UFLAG_RES(x2);

             GWBACK;

-            UFLAG_DF(x1, d_imul16);

+            UFLAG_IF {

+                SET_DFNONE(x4);

+                IFX(X_CF|X_OF) {

+                    ASRw(x1, x2, 16);

+                    CMPSw_REG_ASR(x1, x2, 31);

+                    CSETw(x3, cNE);

+                    IFX(X_CF) {

+                        BFIw(xFlags, x3, F_CF, 1);

+                    }

+                    IFX(X_OF) {

+                        BFIw(xFlags, x3, F_OF, 1);

+                    }

+                }

+                IFX(X_AF | X_PF | X_ZF | X_SF)

+                    if(box64_dynarec_test) {

+                        // to avoid noise during test

+                        MOV32w(x3, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF));

+                        BICw(xFlags, xFlags, x3);

+                    }

+            }

             break;

 

         case 0xB3:

diff --git a/src/dynarec/arm64/dynarec_arm64_67.c b/src/dynarec/arm64/dynarec_arm64_67.c
index 6855bf58..6bc96c2a 100644
--- a/src/dynarec/arm64/dynarec_arm64_67.c
+++ b/src/dynarec/arm64/dynarec_arm64_67.c
@@ -917,7 +917,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
 

         case 0x6B:

             INST_NAME("IMUL Gd, Ed, Ib");

-            SETFLAGS(X_ALL, SF_PENDING);

+            SETFLAGS(X_ALL, SF_SET);

             nextop = F8;

             GETGD;

             GETED32(1);

@@ -934,8 +934,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         BICw(xFlags, xFlags, x1);

                     }

                     IFX(X_CF | X_OF) {

-                        ASRx(x4, gd, 63);

-                        CMPSx_REG(x3, x4);

+                        CMPSx_REG_ASR(x3, gd, 63);

                         CSETw(x1, cNE);

                         IFX(X_CF) {

                             BFIw(xFlags, x1, F_CF, 1);

@@ -959,8 +958,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         BICw(xFlags, xFlags, x1);

                     }

                     IFX(X_CF | X_OF) {

-                        ASRw(x4, gd, 31);

-                        CMPSw_REG(x3, x4);

+                        CMPSw_REG_ASR(x3, gd, 31);

                         CSETw(x1, cNE);

                         IFX(X_CF) {

                             BFIw(xFlags, x1, F_CF, 1);

@@ -1501,7 +1499,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     break;

                 case 5:

                     INST_NAME("IMUL EAX, Ed");

-                    SETFLAGS(X_ALL, SF_PENDING);

+                    SETFLAGS(X_ALL, SF_SET);

                     GETED32(0);

                     if(rex.w) {

                         if(ed==xRDX) gd=x3; else gd=xRDX;

@@ -1513,9 +1511,25 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         MOVw_REG(xRAX, xRDX);

                         LSRx(xRDX, xRDX, 32);

                     }

-                    UFLAG_RES(xRAX);

-                    UFLAG_OP1(xRDX);

-                    UFLAG_DF(x2, rex.w?d_imul64:d_imul32);

+                    UFLAG_IF {

+                        SET_DFNONE(x4);

+                        IFX(X_CF|X_OF) {

+                            CMPSxw_REG_ASR(xRDX, xRAX, rex.w?63:31);

+                            CSETw(x3, cNE);

+                            IFX(X_CF) {

+                                BFIw(xFlags, x3, F_CF, 1);

+                            }

+                            IFX(X_OF) {

+                                BFIw(xFlags, x3, F_OF, 1);

+                            }

+                        }

+                        IFX(X_AF | X_PF | X_ZF | X_SF)

+                            if(box64_dynarec_test) {

+                                // to avoid noise during test

+                                MOV32w(x3, (1<<F_ZF)|(1<<F_AF)|(1<<F_PF)|(1<<F_SF));

+                                BICw(xFlags, xFlags, x3);

+                            }

+                    }

                     break;

                 case 6:

                     INST_NAME("DIV Ed");