about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2025-05-26 16:39:19 +0800
committerGitHub <noreply@github.com>2025-05-26 10:39:19 +0200
commitabd1ebb425d23e693847b5796ab207453e181bd6 (patch)
tree1b99cce25bb60984812e664a857fbb7b36c97af4 /src
parente994c651167c9ae3fab34ff5ffbeb8823baeffe9 (diff)
downloadbox64-abd1ebb425d23e693847b5796ab207453e181bd6.tar.gz
box64-abd1ebb425d23e693847b5796ab207453e181bd6.zip
[RV64_DYNAREC] Minor nativeflags optim to LEA and CMOVcc opcodes (#2669)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00_2.c1
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f.c5
-rw-r--r--src/dynarec/rv64/dynarec_rv64_67.c1
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.c93
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h4
-rw-r--r--src/dynarec/rv64/rv64_emitter.h42
6 files changed, 118 insertions, 28 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00_2.c b/src/dynarec/rv64/dynarec_rv64_00_2.c
index 7551cce4..ec484941 100644
--- a/src/dynarec/rv64/dynarec_rv64_00_2.c
+++ b/src/dynarec/rv64/dynarec_rv64_00_2.c
@@ -495,6 +495,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             if (MODREG) { // reg <= reg? that's an invalid operation
                 DEFAULT;
             } else { // mem <= reg
+                SCRATCH_USAGE(0);
                 addr = geted(dyn, addr, ninst, nextop, &ed, gd, x1, &fixedaddress, rex, NULL, 0, 0);
                 if (gd != ed) {
                     MVxw(gd, ed);
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c
index 9429b701..40de22ce 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f.c
@@ -937,11 +937,10 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
     if (MODREG) {                                                                                \
         ed = TO_NAT((nextop & 7) + (rex.b << 3));                                                \
         if (dyn->insts[ninst].nat_flags_fusion) {                                                \
-            NATIVEJUMP(NATNO, 8);                                                                \
+            NATIVEMV(NATYES, gd, ed);                                                            \
         } else {                                                                                 \
-            B##NO(tmp1, 8);                                                                      \
+            MV##YES(gd, ed, tmp1);                                                               \
         }                                                                                        \
-        MV(gd, ed);                                                                              \
         if (!rex.w) ZEROUP(gd);                                                                  \
     } else {                                                                                     \
         addr = geted(dyn, addr, ninst, nextop, &ed, tmp2, tmp3, &fixedaddress, rex, NULL, 1, 0); \
diff --git a/src/dynarec/rv64/dynarec_rv64_67.c b/src/dynarec/rv64/dynarec_rv64_67.c
index aeb6535d..fcc1a1e9 100644
--- a/src/dynarec/rv64/dynarec_rv64_67.c
+++ b/src/dynarec/rv64/dynarec_rv64_67.c
@@ -739,6 +739,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             if (MODREG) { // reg <= reg? that's an invalid operation
                 DEFAULT;
             } else { // mem <= reg
+                SCRATCH_USAGE(0);
                 addr = geted32(dyn, addr, ninst, nextop, &ed, gd, x1, &fixedaddress, rex, NULL, 0, 0);
                 ZEXTW2(gd, ed);
             }
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c
index 8de757df..a6a047ca 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.c
+++ b/src/dynarec/rv64/dynarec_rv64_helper.c
@@ -66,7 +66,7 @@ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop,
                     } else {
                         if (sib >> 6) {
                             SLLI(ret, TO_NAT(sib_reg), (sib >> 6));
-                            SCRATCH_USAGE(1);
+                            SCRATCH_USAGE(!IS_GPR(ret));
                         } else
                             ret = TO_NAT(sib_reg);
                         *fixaddress = tmp;
@@ -79,12 +79,24 @@ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop,
                             break;
                     }
                     MOV64x(ret, tmp);
-                    SCRATCH_USAGE(1);
+                    SCRATCH_USAGE(!IS_GPR(ret));
                 }
             } else {
                 if (sib_reg != 4) {
-                    ADDSL(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg), sib >> 6, scratch);
-                    SCRATCH_USAGE(1);
+                    if (!(sib >> 6)) {
+                        ADD(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg));
+                        SCRATCH_USAGE(!IS_GPR(ret));
+                    } else if (rv64_zba) {
+                        SHxADD(ret, TO_NAT(sib_reg), sib >> 6, TO_NAT(sib_reg2));
+                        SCRATCH_USAGE(!IS_GPR(ret));
+                    } else if (rv64_xtheadba) {
+                        TH_ADDSL(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg), sib >> 6);
+                        SCRATCH_USAGE(!IS_GPR(ret));
+                    } else {
+                        SLLI(scratch, TO_NAT(sib_reg), sib >> 6);
+                        ADD(ret, TO_NAT(sib_reg2), scratch);
+                        SCRATCH_USAGE(1);
+                    }
                 } else {
                     ret = TO_NAT(sib_reg2);
                 }
@@ -102,23 +114,24 @@ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop,
                 SCRATCH_USAGE(1);
             } else if (adj && (tmp + adj >= -2048) && (tmp + adj <= maxval)) {
                 ADDI(ret, xRIP, tmp + adj);
-                SCRATCH_USAGE(1);
+                SCRATCH_USAGE(!IS_GPR(ret));
             } else if ((tmp >= -2048) && (tmp <= maxval)) {
                 GETIP(addr + delta, scratch);
                 ADDI(ret, xRIP, tmp);
                 SCRATCH_USAGE(1);
             } else if (tmp + addr + delta < 0x100000000LL) {
                 MOV64x(ret, tmp + addr + delta);
-                SCRATCH_USAGE(1);
+                SCRATCH_USAGE(!IS_GPR(ret));
             } else {
                 if (adj) {
                     MOV64x(ret, tmp + adj);
+                    SCRATCH_USAGE(!IS_GPR(ret));
                 } else {
                     MOV64x(ret, tmp);
                     GETIP(addr + delta, scratch);
+                    SCRATCH_USAGE(1);
                 }
                 ADD(ret, ret, xRIP);
-                SCRATCH_USAGE(1);
             }
             switch (lock) {
                 case 1: addLockAddress(addr + delta + tmp); break;
@@ -146,8 +159,20 @@ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop,
             *fixaddress = i64;
             if ((nextop & 7) == 4) {
                 if (sib_reg != 4) {
-                    ADDSL(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg), sib >> 6, scratch);
-                    SCRATCH_USAGE(1);
+                    if (!(sib >> 6)) {
+                        ADD(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg));
+                        SCRATCH_USAGE(!IS_GPR(ret));
+                    } else if (rv64_zba) {
+                        SHxADD(ret, TO_NAT(sib_reg), sib >> 6, TO_NAT(sib_reg2));
+                        SCRATCH_USAGE(!IS_GPR(ret));
+                    } else if (rv64_xtheadba) {
+                        TH_ADDSL(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg), sib >> 6);
+                        SCRATCH_USAGE(!IS_GPR(ret));
+                    } else {
+                        SLLI(scratch, TO_NAT(sib_reg), sib >> 6);
+                        ADD(ret, TO_NAT(sib_reg2), scratch);
+                        SCRATCH_USAGE(1);
+                    }
                 } else {
                     ret = TO_NAT(sib_reg2);
                 }
@@ -157,30 +182,43 @@ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop,
             if (i64 >= -2048 && i64 <= 2047) {
                 if ((nextop & 7) == 4) {
                     if (sib_reg != 4) {
-                        ADDSL(scratch, TO_NAT(sib_reg2), TO_NAT(sib_reg), sib >> 6, scratch);
+                        if (!(sib >> 6)) {
+                            ADD(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg));
+                            SCRATCH_USAGE(!IS_GPR(ret));
+                        } else if (rv64_zba) {
+                            SHxADD(ret, TO_NAT(sib_reg), sib >> 6, TO_NAT(sib_reg2));
+                            SCRATCH_USAGE(!IS_GPR(ret));
+                        } else if (rv64_xtheadba) {
+                            TH_ADDSL(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg), sib >> 6);
+                            SCRATCH_USAGE(!IS_GPR(ret));
+                        } else {
+                            SLLI(scratch, TO_NAT(sib_reg), sib >> 6);
+                            ADD(ret, TO_NAT(sib_reg2), scratch);
+                            SCRATCH_USAGE(1);
+                        }
+                        ADDI(ret, ret, i64);
                     } else {
-                        scratch = TO_NAT(sib_reg2);
+                        ADDI(ret, TO_NAT(sib_reg2), i64);
+                        SCRATCH_USAGE(!IS_GPR(ret));
                     }
-                } else
-                    scratch = TO_NAT((nextop & 0x07) + (rex.b << 3));
-                ADDI(ret, scratch, i64);
-                SCRATCH_USAGE(1);
+                } else {
+                    ADDI(ret, TO_NAT((nextop & 0x07) + (rex.b << 3)), i64);
+                    SCRATCH_USAGE(!IS_GPR(ret));
+                }
             } else {
                 MOV64x(scratch, i64);
+                SCRATCH_USAGE(1);
                 if ((nextop & 7) == 4) {
                     if (sib_reg != 4) {
                         ADD(scratch, scratch, TO_NAT(sib_reg2));
                         ADDSL(ret, scratch, TO_NAT(sib_reg), sib >> 6, ret);
-                        SCRATCH_USAGE(1);
                     } else {
                         PASS3(int tmp = TO_NAT(sib_reg2));
                         ADD(ret, tmp, scratch);
-                        SCRATCH_USAGE(1);
                     }
                 } else {
                     PASS3(int tmp = TO_NAT((nextop & 0x07) + (rex.b << 3)));
                     ADD(ret, tmp, scratch);
-                    SCRATCH_USAGE(1);
                 }
             }
         }
@@ -226,7 +264,7 @@ static uintptr_t geted_32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_
                     } else {
                         if (sib >> 6) {
                             SLLI(ret, TO_NAT(sib_reg), (sib >> 6));
-                            SCRATCH_USAGE(1);
+                            SCRATCH_USAGE(!IS_GPR(ret));
                         } else
                             ret = TO_NAT(sib_reg);
                         *fixaddress = tmp;
@@ -239,18 +277,19 @@ static uintptr_t geted_32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_
                             break;
                     }
                     MOV32w(ret, tmp);
-                    SCRATCH_USAGE(1);
+                    SCRATCH_USAGE(!IS_GPR(ret));
                 }
             } else {
                 if (sib_reg != 4) {
                     if ((sib >> 6)) {
                         SLLI(scratch, TO_NAT(sib_reg), (sib >> 6));
                         ADDW(ret, scratch, TO_NAT(sib_reg2));
+                        SCRATCH_USAGE(1);
                     } else {
                         ADDW(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg));
+                        SCRATCH_USAGE(!IS_GPR(ret));
                     }
                     ZEROUP(ret);
-                    SCRATCH_USAGE(1);
                 } else {
                     ret = TO_NAT(sib_reg2);
                 }
@@ -258,7 +297,7 @@ static uintptr_t geted_32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_
         } else if ((nextop & 7) == 5) {
             uint32_t tmp = F32;
             MOV32w(ret, tmp);
-            SCRATCH_USAGE(1);
+            SCRATCH_USAGE(!IS_GPR(ret));
             switch (lock) {
                 case 1: addLockAddress(tmp); break;
                 case 2:
@@ -291,11 +330,12 @@ static uintptr_t geted_32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_
                     if (sib >> 6) {
                         SLLI(scratch, TO_NAT(sib_reg), (sib >> 6));
                         ADDW(ret, scratch, TO_NAT(sib_reg2));
+                        SCRATCH_USAGE(1);
                     } else {
                         ADDW(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg));
+                        SCRATCH_USAGE(!IS_GPR(ret));
                     }
                     ZEROUP(ret);
-                    SCRATCH_USAGE(1);
                 } else {
                     ret = TO_NAT(sib_reg2);
                 }
@@ -311,14 +351,17 @@ static uintptr_t geted_32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_
                             ADDW(scratch, scratch, TO_NAT(sib_reg2));
                         } else
                             ADDW(scratch, TO_NAT(sib_reg2), TO_NAT(sib_reg));
+                        SCRATCH_USAGE(1);
                     } else {
                         scratch = TO_NAT(sib_reg2);
+                        SCRATCH_USAGE(!IS_GPR(ret));
                     }
-                } else
+                } else {
                     scratch = TO_NAT(nextop & 0x07);
+                    SCRATCH_USAGE(!IS_GPR(ret));
+                }
                 ADDIW(ret, scratch, i32);
                 ZEROUP(ret);
-                SCRATCH_USAGE(1);
             } else {
                 // no need to zero up, as we did it below
                 rv64_move32(dyn, ninst, scratch, i32, 0);
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index 37e8518c..0fd8cf75 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -1823,6 +1823,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
 #define B__safe(a, b, c) XOR(xZR, xZR, xZR)
 #define B_(a, b, c)      XOR(xZR, xZR, xZR)
 #define S_(a, b, c)      XOR(xZR, xZR, xZR)
+#define MV_(a, b, c, d)  XOR(xZR, xZR, xZR)
 
 #define NATIVEJUMP_safe(COND, val) \
     B##COND##_safe(dyn->insts[ninst].nat_flags_op1, dyn->insts[ninst].nat_flags_op2, val);
@@ -1833,6 +1834,9 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
 #define NATIVESET(COND, rd) \
     S##COND(rd, dyn->insts[ninst].nat_flags_op1, dyn->insts[ninst].nat_flags_op2);
 
+#define NATIVEMV(COND, rd, rs) \
+    MV##COND(rd, rs, dyn->insts[ninst].nat_flags_op1, dyn->insts[ninst].nat_flags_op2);
+
 #define NOTEST(s1)                                     \
     if (BOX64ENV(dynarec_test)) {                      \
         SW(xZR, xEmu, offsetof(x64emu_t, test.test));  \
diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h
index 9dd094f7..27ae0427 100644
--- a/src/dynarec/rv64/rv64_emitter.h
+++ b/src/dynarec/rv64/rv64_emitter.h
@@ -251,6 +251,48 @@
 #define SGTU(rd, rs1, rs2) SLTU(rd, rs2, rs1);
 #define SLEU(rd, rs1, rs2) SGEU(rd, rs2, rs1);
 
+#define MVEQ(rd, rs1, rs2, rs3)                             \
+    if (rv64_xtheadcondmov && (rs2 == xZR || rs3 == xZR)) { \
+        TH_MVEQZ(rd, rs1, ((rs2 == xZR) ? rs3 : rs2));      \
+    } else {                                                \
+        BNE(rs2, rs3, 8);                                   \
+        MV(rd, rs1);                                        \
+    }
+#define MVNE(rd, rs1, rs2, rs3)                             \
+    if (rv64_xtheadcondmov && (rs2 == xZR || rs3 == xZR)) { \
+        TH_MVNEZ(rd, rs1, ((rs2 == xZR) ? rs3 : rs2));      \
+    } else {                                                \
+        BEQ(rs2, rs3, 8);                                   \
+        MV(rd, rs1);                                        \
+    }
+#define MVLT(rd, rs1, rs2, rs3) \
+    BGE(rs2, rs3, 8);           \
+    MV(rd, rs1);
+#define MVGE(rd, rs1, rs2, rs3) \
+    BLT(rs2, rs3, 8);           \
+    MV(rd, rs1);
+#define MVLTU(rd, rs1, rs2, rs3) \
+    BGEU(rs2, rs3, 8);           \
+    MV(rd, rs1);
+#define MVGEU(rd, rs1, rs2, rs3) \
+    BLTU(rs2, rs3, 8);           \
+    MV(rd, rs1);
+#define MVGT(rd, rs1, rs2, rs3) \
+    BGEU(rs3, rs2, 8);          \
+    MV(rd, rs1);
+#define MVLE(rd, rs1, rs2, rs3) \
+    BLT(rs3, rs2, 8);           \
+    MV(rd, rs1);
+#define MVGTU(rd, rs1, rs2, rs3) \
+    BGEU(rs3, rs2, 8);           \
+    MV(rd, rs1);
+#define MVLEU(rd, rs1, rs2, rs3) \
+    BLTU(rs3, rs2, 8);           \
+    MV(rd, rs1);
+
+#define MVEQZ(rd, rs1, rs2) MVEQ(rd, rs1, rs2, xZR)
+#define MVNEZ(rd, rs1, rs2) MVNE(rd, rs1, rs2, xZR)
+
 #define BEQ_safe(rs1, rs2, imm)              \
     if ((imm) > -0x1000 && (imm) < 0x1000) { \
         BEQ(rs1, rs2, imm);                  \