about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2025-02-17 16:16:15 +0800
committerGitHub <noreply@github.com>2025-02-17 09:16:15 +0100
commit64313d5aa6a0bafe84ddda61fe7cd541f81b42e7 (patch)
treea5b3c55e288614d57063f0d8bc1012bad49409ca
parent71845a11bba58f353d5de837e0d7ef55d1c87052 (diff)
downloadbox64-64313d5aa6a0bafe84ddda61fe7cd541f81b42e7.tar.gz
box64-64313d5aa6a0bafe84ddda61fe7cd541f81b42e7.zip
[LA64_DYNAREC] Minor optims and fixes to some opcodes (#2371)
-rw-r--r--src/dynarec/la64/dynarec_la64_00.c59
-rw-r--r--src/dynarec/la64/dynarec_la64_0f.c8
-rw-r--r--src/dynarec/la64/dynarec_la64_64.c4
-rw-r--r--src/dynarec/la64/dynarec_la64_emit_tests.c80
-rw-r--r--src/dynarec/la64/dynarec_la64_helper.h4
5 files changed, 127 insertions, 28 deletions
diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c
index ed708062..2b6c65ee 100644
--- a/src/dynarec/la64/dynarec_la64_00.c
+++ b/src/dynarec/la64/dynarec_la64_00.c
@@ -477,10 +477,10 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION);
             i64 = F32S;
             if (i64) {
-                MOV64xw(x2, i64);
+                MOV64x(x2, i64);
                 emit_cmp32(dyn, ninst, rex, xRAX, x2, x3, x4, x5, x6);
             } else
-                emit_cmp32_0(dyn, ninst, rex, xRAX, x3, x4);
+                emit_cmp32_0(dyn, ninst, rex, nextop, xRAX, x3, x4, x5);
             break;
         case 0x40:
         case 0x41:
@@ -946,15 +946,10 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     else
                         i64 = F8S;
                     if (i64) {
-                        MOV64xw(x2, i64);
+                        MOV64x(x2, i64);
                         emit_cmp32(dyn, ninst, rex, ed, x2, x3, x4, x5, x6);
-                    } else {
-                        if (!rex.w && MODREG) {
-                            ZEROUP2(x1, ed);
-                            ed = x1;
-                        }
-                        emit_cmp32_0(dyn, ninst, rex, ed, x3, x4);
-                    }
+                    } else
+                        emit_cmp32_0(dyn, ninst, rex, nextop, ed, x3, x4, x5);
                     break;
             }
             break;
@@ -1179,7 +1174,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             } else {
                 SMREAD();
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                LD_HU(x1, wback, fixedaddress);
+                LD_HU(x1, ed, fixedaddress);
                 ed = x1;
             }
             ST_H(ed, xEmu, offsetof(x64emu_t, segs[u8]));
@@ -2194,9 +2189,21 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 case 6:
                     INST_NAME("SHL Ed, CL");
                     SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined
+                    if (!dyn->insts[ninst].x64.gen_flags) {
+                        GETED(0);
+                        SLL_D(ed, ed, xRCX);
+                        if (dyn->insts[ninst].nat_flags_fusion) {
+                            if (!rex.w) ZEROUP(ed);
+                            NAT_FLAGS_OPS(ed, xZR);
+                        } else if (!rex.w && MODREG) {
+                            ZEROUP(ed);
+                        }
+                        WBACK;
+                        break;
+                    }
                     ANDI(x3, xRCX, rex.w ? 0x3f : 0x1f);
                     GETED(0);
-                    if (!rex.w && MODREG) { ZEROUP(ed); }
+                    if (!rex.w && MODREG) ZEROUP(ed);
                     CBZ_NEXT(x3);
                     emit_shl32(dyn, ninst, rex, ed, x3, x5, x4, x6);
                     WBACK;
@@ -2204,9 +2211,21 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 case 5:
                     INST_NAME("SHR Ed, CL");
                     SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined
+                    if (!dyn->insts[ninst].x64.gen_flags) {
+                        GETED(0);
+                        SRL_D(ed, ed, xRCX);
+                        if (dyn->insts[ninst].nat_flags_fusion) {
+                            if (!rex.w) ZEROUP(ed);
+                            NAT_FLAGS_OPS(ed, xZR);
+                        } else if (!rex.w && MODREG) {
+                            ZEROUP(ed);
+                        }
+                        WBACK;
+                        break;
+                    }
                     ANDI(x3, xRCX, rex.w ? 0x3f : 0x1f);
                     GETED(0);
-                    if (!rex.w && MODREG) { ZEROUP(ed); }
+                    if (!rex.w && MODREG) ZEROUP(ed);
                     CBZ_NEXT(x3);
                     emit_shr32(dyn, ninst, rex, ed, x3, x5, x4);
                     WBACK;
@@ -2498,10 +2517,18 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 case 1:
                     INST_NAME("TEST Eb, Ib");
                     SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION);
-                    GETEB(x1, 1);
+                    if (MODREG && (rex.rex || (((nextop & 7) >> 2) == 0))) {
+                        // quick path for low 8bit registers
+                        if (rex.rex)
+                            ed = TO_NAT((nextop & 7) + (rex.b << 3));
+                        else
+                            ed = TO_NAT(nextop & 3);
+                    } else {
+                        GETEB(x1, 1);
+                        ed = x1;
+                    }
                     u8 = F8;
-                    MOV32w(x2, u8);
-                    emit_test8(dyn, ninst, x1, x2, x3, x4, x5);
+                    emit_test8c(dyn, ninst, ed, u8, x3, x4, x5);
                     break;
                 case 2:
                     INST_NAME("NOT Eb");
diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c
index 192c9445..6bb5bb7a 100644
--- a/src/dynarec/la64/dynarec_la64_0f.c
+++ b/src/dynarec/la64/dynarec_la64_0f.c
@@ -494,7 +494,10 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             else                                                                                 \
                 B##NO(tmp1, 8);                                                                  \
         }                                                                                        \
-        MV(gd, ed);                                                                              \
+        if (rex.w)                                                                               \
+            MV(gd, ed);                                                                          \
+        else                                                                                     \
+            ZEROUP2(gd, ed);                                                                     \
     } else {                                                                                     \
         addr = geted(dyn, addr, ninst, nextop, &ed, tmp2, tmp3, &fixedaddress, rex, NULL, 1, 0); \
         if (dyn->insts[ninst].nat_flags_fusion) {                                                \
@@ -506,8 +509,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 B##NO(tmp1, 8);                                                                  \
         }                                                                                        \
         LDxw(gd, ed, fixedaddress);                                                              \
-    }                                                                                            \
-    if (!rex.w) ZEROUP(gd);
+    }
 
             GOCOND(0x40, "CMOV", "Gd, Ed");
 
diff --git a/src/dynarec/la64/dynarec_la64_64.c b/src/dynarec/la64/dynarec_la64_64.c
index 04633316..2af080ef 100644
--- a/src/dynarec/la64/dynarec_la64_64.c
+++ b/src/dynarec/la64/dynarec_la64_64.c
@@ -323,10 +323,10 @@ uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     else
                         i64 = F8S;
                     if (i64) {
-                        MOV64xw(x2, i64);
+                        MOV64x(x2, i64);
                         emit_cmp32(dyn, ninst, rex, ed, x2, x3, x4, x5, x6);
                     } else
-                        emit_cmp32_0(dyn, ninst, rex, ed, x3, x4);
+                        emit_cmp32_0(dyn, ninst, rex, nextop, ed, x3, x4, x5);
                     break;
             }
             break;
diff --git a/src/dynarec/la64/dynarec_la64_emit_tests.c b/src/dynarec/la64/dynarec_la64_emit_tests.c
index 687a3f80..ea04f36a 100644
--- a/src/dynarec/la64/dynarec_la64_emit_tests.c
+++ b/src/dynarec/la64/dynarec_la64_emit_tests.c
@@ -354,7 +354,7 @@ void emit_cmp32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
 }
 
 // emit CMP32 instruction, from cmp s1, 0, using s3 and s4 as scratch
-void emit_cmp32_0(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4)
+void emit_cmp32_0(dynarec_la64_t* dyn, int ninst, rex_t rex, uint8_t nextop, int s1, int s3, int s4, int s5)
 {
     IFX_PENDOR0 {
         ST_D(s1, xEmu, offsetof(x64emu_t, op1));
@@ -380,10 +380,13 @@ void emit_cmp32_0(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s3, int
             else {
                 if (dyn->insts[ninst].nat_flags_needsign) {
                     SEXT_W(s3, s1);
-                } else {
+                    NAT_FLAGS_OPS(s3, xZR);
+                } else if (MODREG) {
                     ZEROUP2(s3, s1);
+                    NAT_FLAGS_OPS(s3, xZR);
+                } else {
+                    NAT_FLAGS_OPS(s1, xZR);
                 }
-                NAT_FLAGS_OPS(s3, xZR);
             }
         }
         return;
@@ -399,12 +402,19 @@ void emit_cmp32_0(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s3, int
         }
         ORI(xFlags, xFlags, 1 << F_SF);
     }
+    int res = s1;
+    IFX (X_ZF | X_PF) {
+        if (!rex.w && MODREG) {
+            ZEROUP2(s5, s1);
+            res = s5;
+        }
+    }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
+        BNEZ(res, 8);
         ORI(xFlags, xFlags, 1 << F_ZF);
     }
     IFX(X_PF) {
-        emit_pf(dyn, ninst, s1, s3, s4);
+        emit_pf(dyn, ninst, res, s3, s4);
     }
     if (dyn->insts[ninst].nat_flags_fusion) {
         if (rex.w)
@@ -412,10 +422,13 @@ void emit_cmp32_0(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s3, int
         else {
             if (dyn->insts[ninst].nat_flags_needsign) {
                 SEXT_W(s3, s1);
+                NAT_FLAGS_OPS(s3, xZR);
+            } else if (res == s5) { // zero-up'd case
+                NAT_FLAGS_OPS(res, xZR);
             } else {
                 ZEROUP2(s3, s1);
+                NAT_FLAGS_OPS(s3, xZR);
             }
-            NAT_FLAGS_OPS(s3, xZR);
         }
     }
 }
@@ -474,6 +487,61 @@ void emit_test8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
     }
 }
 
+// emit TEST8 instruction, from test s1, c, using s3, s4 and s5 as scratch
+void emit_test8c(dynarec_la64_t* dyn, int ninst, int s1, uint8_t c, int s3, int s4, int s5)
+{
+    IFX_PENDOR0 {
+        SET_DF(s3, d_tst8);
+    } else {
+        SET_DFNONE();
+    }
+
+    NAT_FLAGS_ENABLE_SIGN();
+    if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s3, xZR);
+
+    if (la64_lbt) {
+        IFX (X_ALL) {
+            ADDI_D(s3, xZR, c);
+            X64_AND_B(s1, s3);
+        }
+
+        if (dyn->insts[ninst].nat_flags_fusion) {
+            ANDI(s3, s1, c);
+            if (dyn->insts[ninst].nat_flags_needsign) {
+                EXT_W_B(s3, s3);
+            }
+        }
+
+        IFX_PENDOR0 {
+            if (!dyn->insts[ninst].nat_flags_fusion) ANDI(s3, s1, c);
+            ST_B(s3, xEmu, offsetof(x64emu_t, res));
+        }
+        return;
+    }
+
+    CLEAR_FLAGS(s3);
+    ANDI(s3, s1, c); // res = s1 & c
+
+    IFX_PENDOR0 {
+        ST_D(s3, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX (X_SF) {
+        SRLI_D(s4, s3, 7);
+        BEQZ(s4, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    IFX (X_ZF) {
+        BNEZ(s3, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX (X_PF) {
+        emit_pf(dyn, ninst, s3, s4, s5);
+    }
+    if (dyn->insts[ninst].nat_flags_fusion && dyn->insts[ninst].nat_flags_needsign) {
+        EXT_W_B(s3, s3);
+    }
+}
+
 // emit TEST16 instruction, from test s1, s2, using s3, s4 and s5 as scratch
 void emit_test16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
 {
diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h
index fc5ce861..b912fe8a 100644
--- a/src/dynarec/la64/dynarec_la64_helper.h
+++ b/src/dynarec/la64/dynarec_la64_helper.h
@@ -865,6 +865,7 @@ void* la64_next(x64emu_t* emu, uintptr_t addr);
 #define emit_cmp8           STEPNAME(emit_cmp8)
 #define emit_cmp8_0         STEPNAME(emit_cmp8_0)
 #define emit_test8          STEPNAME(emit_test8)
+#define emit_test8c         STEPNAME(emit_test8c)
 #define emit_test16         STEPNAME(emit_test16)
 #define emit_test32         STEPNAME(emit_test32)
 #define emit_test32c        STEPNAME(emit_test32c)
@@ -974,8 +975,9 @@ void emit_cmp16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
 void emit_cmp32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5, int s6);
 void emit_cmp8_0(dynarec_la64_t* dyn, int ninst, int s1, int s3, int s4);
 void emit_cmp16_0(dynarec_la64_t* dyn, int ninst, int s1, int s3, int s4);
-void emit_cmp32_0(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4);
+void emit_cmp32_0(dynarec_la64_t* dyn, int ninst, rex_t rex, uint8_t nextop, int s1, int s3, int s4, int s5);
 void emit_test8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
+void emit_test8c(dynarec_la64_t* dyn, int ninst, int s1, uint8_t c, int s3, int s4, int s5);
 void emit_test16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
 void emit_test32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
 void emit_test32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4, int s5);