about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <numbksco@gmail.com>2024-08-08 14:27:35 +0800
committerGitHub <noreply@github.com>2024-08-08 08:27:35 +0200
commitd8ac8a5f1c3de529a5d147c0d4d6d97c4a96ffa7 (patch)
tree42ed34888df3104f6db5e6720bfba6552b28d57c /src
parent18954abf54c75ab19a93cb51f79d67cd33a93d5a (diff)
downloadbox64-d8ac8a5f1c3de529a5d147c0d4d6d97c4a96ffa7.tar.gz
box64-d8ac8a5f1c3de529a5d147c0d4d6d97c4a96ffa7.zip
[LA64_DYNAREC] Fixed flag generation in IMUL/MUL opcode (#1716)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/la64/dynarec_la64_00.c76
-rw-r--r--src/dynarec/la64/dynarec_la64_660f.c4
-rw-r--r--src/dynarec/la64/dynarec_la64_helper.h85
3 files changed, 100 insertions, 65 deletions
diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c
index e1483ba6..90824596 100644
--- a/src/dynarec/la64/dynarec_la64_00.c
+++ b/src/dynarec/la64/dynarec_la64_00.c
@@ -421,7 +421,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("CMP EAX, Id");
             SETFLAGS(X_ALL, SF_SET_PENDING);
             i64 = F32S;
-            if(i64) {
+            if (i64) {
                 MOV64xw(x2, i64);
                 emit_cmp32(dyn, ninst, rex, xRAX, x2, x3, x4, x5, x6);
             } else
@@ -497,9 +497,9 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
         case 0x68:
             INST_NAME("PUSH Id");
             i64 = F32S;
-            if(PK(0)==0xC3) {
+            if (PK(0) == 0xC3) {
                 MESSAGE(LOG_DUMP, "PUSH then RET, using indirect\n");
-                TABLE64(x3, addr-4);
+                TABLE64(x3, addr - 4);
                 LD_W(x1, x3, 0);
                 PUSH1z(x1);
             } else {
@@ -624,7 +624,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
 
         case 0x80:
             nextop = F8;
-            switch((nextop>>3)&7) {
+            switch ((nextop >> 3) & 7) {
                 case 0: // ADD
                     INST_NAME("ADD Eb, Ib");
                     SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -679,7 +679,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     SETFLAGS(X_ALL, SF_SET_PENDING);
                     GETEB(x1, 1);
                     u8 = F8;
-                    if(u8) {
+                    if (u8) {
                         ADDI_D(x2, xZR, u8);
                         emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5, x6);
                     } else {
@@ -702,7 +702,10 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     }
                     SETFLAGS(X_ALL, SF_SET_PENDING);
                     GETED((opcode == 0x81) ? 4 : 1);
-                    if (opcode == 0x81) i64 = F32S; else i64 = F8S;
+                    if (opcode == 0x81)
+                        i64 = F32S;
+                    else
+                        i64 = F8S;
                     emit_add32c(dyn, ninst, rex, ed, i64, x3, x4, x5, x6);
                     WBACK;
                     break;
@@ -714,7 +717,10 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     }
                     SETFLAGS(X_ALL, SF_SET_PENDING);
                     GETED((opcode == 0x81) ? 4 : 1);
-                    if (opcode == 0x81) i64 = F32S; else i64 = F8S;
+                    if (opcode == 0x81)
+                        i64 = F32S;
+                    else
+                        i64 = F8S;
                     emit_or32c(dyn, ninst, rex, ed, i64, x3, x4);
                     WBACK;
                     break;
@@ -743,7 +749,10 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     }
                     SETFLAGS(X_ALL, SF_SET_PENDING);
                     GETED((opcode == 0x81) ? 4 : 1);
-                    if (opcode == 0x81) i64 = F32S; else i64 = F8S;
+                    if (opcode == 0x81)
+                        i64 = F32S;
+                    else
+                        i64 = F8S;
                     emit_and32c(dyn, ninst, rex, ed, i64, x3, x4);
                     WBACK;
                     break;
@@ -802,7 +811,10 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     }
                     SETFLAGS(X_ALL, SF_SET_PENDING);
                     GETED((opcode == 0x81) ? 4 : 1);
-                    if (opcode == 0x81) i64 = F32S; else i64 = F8S;
+                    if (opcode == 0x81)
+                        i64 = F32S;
+                    else
+                        i64 = F8S;
                     if (i64) {
                         MOV64xw(x2, i64);
                         emit_cmp32(dyn, ninst, rex, ed, x2, x3, x4, x5, x6);
@@ -821,7 +833,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
         case 0x84:
             INST_NAME("TEST Eb, Gb");
             SETFLAGS(X_ALL, SF_SET_PENDING);
-            nextop=F8;
+            nextop = F8;
             GETEB(x1, 0);
             GETGB(x2);
             emit_test8(dyn, ninst, x1, x2, x3, x4, x5);
@@ -1056,7 +1068,10 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             break;
         case 0xA0:
             INST_NAME("MOV AL,Ob");
-            if(rex.is32bits) u64 = F32; else u64 = F64;
+            if (rex.is32bits)
+                u64 = F32;
+            else
+                u64 = F64;
             MOV64z(x1, u64);
             LD_BU(x2, x1, 0);
             BSTRINS_D(xRAX, x2, 7, 0);
@@ -1521,7 +1536,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             if (box64_dynarec_safeflags) {
                 READFLAGS(X_PEND); // lets play safe here too
             }
-            fpu_purgecache(dyn, ninst, 1, x1, x2, x3);  // using next, even if there no next
+            fpu_purgecache(dyn, ninst, 1, x1, x2, x3); // using next, even if there no next
             i32 = F16;
             retn_to_epilog(dyn, ninst, rex, i32);
             *need_epilog = 0;
@@ -1533,7 +1548,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             if (box64_dynarec_safeflags) {
                 READFLAGS(X_PEND); // so instead, force the deferred flags, so it's not too slow, and flags are not lost
             }
-            fpu_purgecache(dyn, ninst, 1, x1, x2, x3);  // using next, even if there no next
+            fpu_purgecache(dyn, ninst, 1, x1, x2, x3); // using next, even if there no next
             ret_to_epilog(dyn, ninst, rex);
             *need_epilog = 0;
             *ok = 0;
@@ -1546,7 +1561,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 if (!rex.rex) {
                     ed = (nextop & 7);
                     eb1 = TO_LA64((ed & 3)); // Ax, Cx, Dx or Bx
-                    eb2 = (ed & 4) >> 2;   // L or H
+                    eb2 = (ed & 4) >> 2;     // L or H
                 } else {
                     eb1 = TO_LA64((nextop & 7) + (rex.b << 3));
                     eb2 = 0;
@@ -1711,7 +1726,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             break;
         case 0xD3:
             nextop = F8;
-            switch((nextop>>3)&7) {
+            switch ((nextop >> 3) & 7) {
                 case 0:
                     INST_NAME("ROL Ed, CL");
                     SETFLAGS(X_OF | X_CF, SF_SUBSET);
@@ -1766,6 +1781,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     DEFAULT;
             }
             break;
+
         #define GO(Z)                                                                               \
             BARRIER(BARRIER_MAYBE);                                                                 \
             JUMP(addr + i8, 1);                                                                     \
@@ -1795,12 +1811,16 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     BNE(xRCX, xZR, i32);                                                            \
                 };                                                                                  \
             }
+
         case 0xE0:
             INST_NAME("LOOPNZ");
             READFLAGS(X_ZF);
             i8 = F8S;
             ADDI_D(xRCX, xRCX, -1);
-            if (la64_lbt) X64_GET_EFLAGS(x1, X_ZF); else ANDI(x1, xFlags, 1 << F_ZF);
+            if (la64_lbt)
+                X64_GET_EFLAGS(x1, X_ZF);
+            else
+                ANDI(x1, xFlags, 1 << F_ZF);
             CBNZ_NEXT(x1);
             GO(0);
             break;
@@ -1809,7 +1829,10 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             READFLAGS(X_ZF);
             i8 = F8S;
             ADDI_D(xRCX, xRCX, -1);
-            if (la64_lbt) X64_GET_EFLAGS(x1, X_ZF); else ANDI(x1, xFlags, 1 << F_ZF);
+            if (la64_lbt)
+                X64_GET_EFLAGS(x1, X_ZF);
+            else
+                ANDI(x1, xFlags, 1 << F_ZF);
             CBZ_NEXT(x1);
             GO(0);
             break;
@@ -1825,22 +1848,23 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             GO(1);
             break;
         #undef GO
+
         case 0xE8:
             INST_NAME("CALL Id");
             i32 = F32S;
             if (addr + i32 == 0) {
-                #if STEP == 3
+#if STEP == 3
                 printf_log(LOG_INFO, "Warning, CALL to 0x0 at %p (%p)\n", (void*)addr, (void*)(addr - 1));
-                #endif
+#endif
             }
-            #if STEP < 2
+#if STEP < 2
             if (!rex.is32bits && isNativeCall(dyn, addr + i32, &dyn->insts[ninst].natcall, &dyn->insts[ninst].retn))
                 tmp = dyn->insts[ninst].pass2choice = 3;
             else
                 tmp = dyn->insts[ninst].pass2choice = 0;
-            #else
+#else
             tmp = dyn->insts[ninst].pass2choice;
-            #endif
+#endif
             switch (tmp) {
                 case 3:
                     SETFLAGS(X_ALL, SF_SET); // Hack to set flags to "dont'care" state
@@ -2006,12 +2030,12 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 case 4:
                     INST_NAME("MUL AL, Ed");
                     SETFLAGS(X_ALL, SF_PENDING);
-                    UFLAG_DF(x1, d_mul8);
                     GETEB(x1, 0);
                     ANDI(x2, xRAX, 0xff);
                     MUL_W(x1, x2, x1);
                     UFLAG_RES(x1);
                     BSTRINS_D(xRAX, x1, 15, 0);
+                    UFLAG_DF(x1, d_mul8);
                     break;
                 case 6:
                     INST_NAME("DIV Eb");
@@ -2039,7 +2063,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     INST_NAME("NOT Ed");
                     GETED(0);
                     NOR(ed, ed, xZR);
-                    if(!rex.w && MODREG)
+                    if (!rex.w && MODREG)
                         ZEROUP(ed);
                     WBACK;
                     break;
@@ -2053,7 +2077,6 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 case 4:
                     INST_NAME("MUL EAX, Ed");
                     SETFLAGS(X_ALL, SF_PENDING);
-                    UFLAG_DF(x2, rex.w ? d_mul64 : d_mul32);
                     GETED(0);
                     if (rex.w) {
                         if (ed == xRDX)
@@ -2075,11 +2098,11 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     }
                     UFLAG_RES(xRAX);
                     UFLAG_OP1(xRDX);
+                    UFLAG_DF(x2, rex.w ? d_mul64 : d_mul32);
                     break;
                 case 5:
                     INST_NAME("IMUL EAX, Ed");
                     SETFLAGS(X_ALL, SF_PENDING);
-                    UFLAG_DF(x2, rex.w ? d_imul64 : d_imul32);
                     GETSED(0);
                     if (rex.w) {
                         if (ed == xRDX)
@@ -2097,6 +2120,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     }
                     UFLAG_RES(xRAX);
                     UFLAG_OP1(xRDX);
+                    UFLAG_DF(x2, rex.w ? d_imul64 : d_imul32);
                     break;
                 case 6:
                     INST_NAME("DIV Ed");
diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c
index 095512d5..1cf3b2f9 100644
--- a/src/dynarec/la64/dynarec_la64_660f.c
+++ b/src/dynarec/la64/dynarec_la64_660f.c
@@ -446,7 +446,7 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 B##NO(x1, 4 + 4);                                                                    \
             BSTRINS_D(gd, ed, 15, 0);
 
-        GOCOND(0x40, "CMOV", "Gd, Ed");
+            GOCOND(0x40, "CMOV", "Gd, Ed");
         #undef GO
 
         case 0x54:
@@ -996,13 +996,13 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("IMUL Gw,Ew");
             SETFLAGS(X_ALL, SF_PENDING);
             nextop = F8;
-            UFLAG_DF(x1, d_imul16);
             GETSEW(x1, 0);
             GETSGW(x2);
             MUL_W(x2, x2, x1);
             UFLAG_RES(x2);
             BSTRPICK_D(x2, x2, 15, 0);
             GWBACK;
+            UFLAG_DF(x1, d_imul16);
             break;
         case 0xBE:
             INST_NAME("MOVSX Gw, Eb");
diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h
index 9bf58812..4e2d8653 100644
--- a/src/dynarec/la64/dynarec_la64_helper.h
+++ b/src/dynarec/la64/dynarec_la64_helper.h
@@ -575,47 +575,57 @@
     LOAD_REG(R14);      \
     LOAD_REG(R15);
 
-#define SET_DFNONE()                             \
-    do {                                         \
-    dyn->f.dfnone_here=1;                        \
-    if (!dyn->f.dfnone) {                        \
-        ST_W(xZR, xEmu, offsetof(x64emu_t, df)); \
-        dyn->f.dfnone = 1;                       \
-    } } while(0);
-#define SET_DF(S, N)                           \
-    if ((N) != d_none) {                       \
-        MOV32w(S, (N));                        \
-        ST_W(S, xEmu, offsetof(x64emu_t, df)); \
-        dyn->f.dfnone = 0;                     \
-    } else                                     \
+#define SET_DFNONE()                                 \
+    do {                                             \
+        dyn->f.dfnone_here = 1;                      \
+        if (!dyn->f.dfnone) {                        \
+            ST_W(xZR, xEmu, offsetof(x64emu_t, df)); \
+            dyn->f.dfnone = 1;                       \
+        }                                            \
+    } while (0);
+
+#define SET_DF(S, N)                                           \
+    if ((N) != d_none) {                                       \
+        MOV32w(S, (N));                                        \
+        ST_W(S, xEmu, offsetof(x64emu_t, df));                 \
+        if (dyn->f.pending == SF_PENDING                       \
+            && dyn->insts[ninst].x64.need_after                \
+            && !(dyn->insts[ninst].x64.need_after & X_PEND)) { \
+            CALL_(UpdateFlags, -1, 0);                         \
+            dyn->f.pending = SF_SET;                           \
+            SET_NODF();                                        \
+        }                                                      \
+        dyn->f.dfnone = 0;                                     \
+    } else                                                     \
         SET_DFNONE()
+
 #define SET_NODF() dyn->f.dfnone = 0
-#define SET_DFOK() dyn->f.dfnone = 1; dyn->f.dfnone_here=1
+#define SET_DFOK()     \
+    dyn->f.dfnone = 1; \
+    dyn->f.dfnone_here = 1
 
-#define CLEAR_FLAGS_(s) \
-    MOV64x(s, (1UL << F_AF) | (1UL << F_CF) | (1UL << F_OF) | (1UL << F_ZF) | (1UL << F_SF) | (1UL << F_PF)); ANDN(xFlags, xFlags, s);
+#define CLEAR_FLAGS_(s)                                                                                       \
+    MOV64x(s, (1UL << F_AF) | (1UL << F_CF) | (1UL << F_OF) | (1UL << F_ZF) | (1UL << F_SF) | (1UL << F_PF)); \
+    ANDN(xFlags, xFlags, s);
 
 #define CLEAR_FLAGS(s) \
-    IFX(X_ALL) { CLEAR_FLAGS_(s) }
+    IFX (X_ALL) { CLEAR_FLAGS_(s) }
 
 #define CALC_SUB_FLAGS(op1_, op2, res, scratch1, scratch2, width)     \
-    IFX(X_AF | X_CF | X_OF)                                           \
-    {                                                                 \
+    IFX (X_AF | X_CF | X_OF) {                                        \
         /* calc borrow chain */                                       \
         /* bc = (res & (~op1 | op2)) | (~op1 & op2) */                \
         OR(scratch1, op1_, op2);                                      \
         AND(scratch2, res, scratch1);                                 \
         AND(op1_, op1_, op2);                                         \
         OR(scratch2, scratch2, op1_);                                 \
-        IFX(X_AF)                                                     \
-        {                                                             \
+        IFX (X_AF) {                                                  \
             /* af = bc & 0x8 */                                       \
             ANDI(scratch1, scratch2, 8);                              \
             BEQZ(scratch1, 8);                                        \
             ORI(xFlags, xFlags, 1 << F_AF);                           \
         }                                                             \
-        IFX(X_CF)                                                     \
-        {                                                             \
+        IFX (X_CF) {                                                  \
             /* cf = bc & (1<<(width-1)) */                            \
             if ((width) == 8) {                                       \
                 ANDI(scratch1, scratch2, 0x80);                       \
@@ -626,8 +636,7 @@
             BEQZ(scratch1, 8);                                        \
             ORI(xFlags, xFlags, 1 << F_CF);                           \
         }                                                             \
-        IFX(X_OF)                                                     \
-        {                                                             \
+        IFX (X_OF) {                                                  \
             /* of = ((bc >> (width-2)) ^ (bc >> (width-1))) & 0x1; */ \
             SRLI_D(scratch1, scratch2, (width)-2);                    \
             SRLI_D(scratch2, scratch1, 1);                            \
@@ -639,7 +648,9 @@
     }
 
 #ifndef MAYSETFLAGS
-#define MAYSETFLAGS() do {} while (0)
+#define MAYSETFLAGS() \
+    do {              \
+    } while (0)
 #endif
 
 #ifndef READFLAGS
@@ -713,7 +724,7 @@
 #define ARCH_RESET()
 
 #if STEP < 2
-#define GETIP(A) TABLE64(0, 0)
+#define GETIP(A)  TABLE64(0, 0)
 #define GETIP_(A) TABLE64(0, 0)
 #else
 // put value in the Table64 even if not using it for now to avoid difference between Step2 and Step3. Needs to be optimized later...
@@ -857,8 +868,8 @@ void* la64_next(x64emu_t* emu, uintptr_t addr);
 
 #define x87_restoreround  STEPNAME(x87_restoreround)
 #define sse_setround      STEPNAME(sse_setround)
-#define x87_forget       STEPNAME(x87_forget)
-#define sse_purge07cache STEPNAME(sse_purge07cache)
+#define x87_forget        STEPNAME(x87_forget)
+#define sse_purge07cache  STEPNAME(sse_purge07cache)
 #define sse_get_reg       STEPNAME(sse_get_reg)
 #define sse_get_reg_empty STEPNAME(sse_get_reg_empty)
 #define sse_forget_reg    STEPNAME(sse_forget_reg)
@@ -1127,13 +1138,13 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
         }
 
 // Restore xFlags from LBT.eflags
-#define RESTORE_EFLAGS(s)               \
-    do {                                \
-        if (la64_lbt) {                 \
-            CLEAR_FLAGS_(s);            \
-            X64_GET_EFLAGS(s, X_ALL);   \
-            OR(xFlags, xFlags, s);      \
-        }                               \
+#define RESTORE_EFLAGS(s)             \
+    do {                              \
+        if (la64_lbt) {               \
+            CLEAR_FLAGS_(s);          \
+            X64_GET_EFLAGS(s, X_ALL); \
+            OR(xFlags, xFlags, s);    \
+        }                             \
     } while (0)
 
 // Spill xFlags to LBT.eflags
@@ -1144,6 +1155,6 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
         }                                  \
     } while (0)
 
-#define PURGE_YMM()    /* TODO */
+#define PURGE_YMM() /* TODO */
 
 #endif //__DYNAREC_LA64_HELPER_H__