about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2024-08-28 02:23:42 +0800
committerGitHub <noreply@github.com>2024-08-27 20:23:42 +0200
commit875a2ef3a9c943edbf497088df72e5fbcacd8728 (patch)
tree46622576593264affa330da7eb72566ba51b6fbf
parentbfbf18688beec811b9e936908db12e0ca1671430 (diff)
downloadbox64-875a2ef3a9c943edbf497088df72e5fbcacd8728.tar.gz
box64-875a2ef3a9c943edbf497088df72e5fbcacd8728.zip
[RV64_DYNAREC] Added a fast path for some 16bit opcodes (#1765)
-rw-r--r--src/dynarec/rv64/dynarec_rv64_66.c112
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h19
2 files changed, 73 insertions, 58 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_66.c b/src/dynarec/rv64/dynarec_rv64_66.c
index a90bdb77..5ad815af 100644
--- a/src/dynarec/rv64/dynarec_rv64_66.c
+++ b/src/dynarec/rv64/dynarec_rv64_66.c
@@ -63,6 +63,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("ADD Ew, Gw");
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
+            FAST_16BIT_OPERATION(ed, gd, x1, ADD(ed, ed, x1));
             GETGW(x2);
             GETEW(x1, 0);
             emit_add16(dyn, ninst, x1, x2, x4, x5, x6);
@@ -72,6 +73,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("ADD Gw, Ew");
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
+            FAST_16BIT_OPERATION(gd, ed, x1, ADD(gd, gd, x1));
             GETGW(x1);
             GETEW(x2, 0);
             emit_add16(dyn, ninst, x1, x2, x5, x4, x6);
@@ -101,6 +103,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("OR Ew, Gw");
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
+            FAST_16BIT_OPERATION(ed, gd, x1, OR(ed, ed, x1));
             GETGW(x2);
             GETEW(x1, 0);
             emit_or16(dyn, ninst, x1, x2, x4, x5);
@@ -110,6 +113,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("OR Gw, Ew");
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
+            FAST_16BIT_OPERATION(gd, ed, x1, OR(gd, gd, x1));
             GETGW(x1);
             GETEW(x2, 0);
             emit_or16(dyn, ninst, x1, x2, x4, x5);
@@ -143,6 +147,12 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             READFLAGS(X_CF);
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
+            FAST_16BIT_OPERATION(ed, gd, x1, {
+                ADD(ed, ed, x1);
+                ANDI(x2, xFlags, 1 << F_CF);
+                SLLI(x2, x2, 64 - 16);
+                ADD(ed, ed, x2);
+            });
             GETGW(x2);
             GETEW(x1, 0);
             emit_adc16(dyn, ninst, x1, x2, x4, x6, x5);
@@ -153,6 +163,12 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             READFLAGS(X_CF);
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
+            FAST_16BIT_OPERATION(gd, ed, x1, {
+                ADD(gd, gd, x1);
+                ANDI(x2, xFlags, 1 << F_CF);
+                SLLI(x2, x2, 64 - 16);
+                ADD(gd, gd, x2);
+            });
             GETGW(x1);
             GETEW(x2, 0);
             emit_adc16(dyn, ninst, x1, x2, x4, x6, x5);
@@ -173,6 +189,12 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             READFLAGS(X_CF);
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
+            FAST_16BIT_OPERATION(ed, gd, x1, {
+                SUB(ed, ed, x1);
+                ANDI(x2, xFlags, 1 << F_CF);
+                SLLI(x2, x2, 64 - 16);
+                SUB(ed, ed, x2);
+            });
             GETGW(x2);
             GETEW(x1, 0);
             emit_sbb16(dyn, ninst, x1, x2, x4, x5, x6);
@@ -183,6 +205,12 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             READFLAGS(X_CF);
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
+            FAST_16BIT_OPERATION(gd, ed, x1, {
+                SUB(gd, gd, x1);
+                ANDI(x2, xFlags, 1 << F_CF);
+                SLLI(x2, x2, 64 - 16);
+                SUB(gd, gd, x2);
+            });
             GETGW(x1);
             GETEW(x2, 0);
             emit_sbb16(dyn, ninst, x1, x2, x6, x4, x5);
@@ -240,6 +268,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("SUB Ew, Gw");
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
+            FAST_16BIT_OPERATION(ed, gd, x1, SUB(ed, ed, x1));
             GETGW(x1);
             GETEW(x2, 0);
             emit_sub16(dyn, ninst, x2, x1, x4, x5, x6);
@@ -249,6 +278,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("SUB Gw, Ew");
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
+            FAST_16BIT_OPERATION(gd, ed, x1, SUB(gd, gd, x1));
             GETGW(x1);
             GETEW(x2, 0);
             emit_sub16(dyn, ninst, x1, x2, x6, x4, x5);
@@ -267,73 +297,39 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("XOR Ew, Gw");
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
-            // try to determine ed and gd
-            ed = 0;
-            GETGD;
-            if (MODREG) {
-                GETED(0);
-            }
-            if (ed == gd) {
-                // optimize XOR Gw, Gw
-                CLEAR_FLAGS();
-                IFX(X_PEND) {
-                    SET_DF(x6, d_xor16);
-                } else IFX(X_ALL) {
-                    SET_DFNONE();
-                }
-                SRLI(ed, ed, 16);
-                SLLI(ed, ed, 16);
-                IFX(X_PEND) {
-                    SH(ed, xEmu, offsetof(x64emu_t, res));
-                }
-                IFX(X_ZF) {
-                    ORI(xFlags, xFlags, 1 << F_ZF);
-                }
-                IFX(X_PF) {
-                    ORI(xFlags, xFlags, 1 << F_PF);
+            if (MODREG && !dyn->insts[ninst].x64.gen_flags) {
+                gd = xRAX + ((nextop & 0x38) >> 3) + (rex.r << 3);
+                ed = xRAX + (nextop & 7) + (rex.b << 3);
+                if (ed == gd) {
+                    SRLI(ed, ed, 16);
+                    SLLI(ed, ed, 16);
+                    break;
                 }
-            } else {
-                GETGW(x2);
-                GETEW(x1, 0);
-                emit_xor16(dyn, ninst, x1, x2, x4, x5, x6);
-                EWBACK;
             }
+            FAST_16BIT_OPERATION(ed, gd, x1, XOR(ed, ed, x1));
+            GETGW(x2);
+            GETEW(x1, 0);
+            emit_xor16(dyn, ninst, x1, x2, x4, x5, x6);
+            EWBACK;
             break;
         case 0x33:
             INST_NAME("XOR Gw, Ew");
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
-            // try to determine ed and gd
-            ed = 0;
-            GETGD;
-            if (MODREG) {
-                GETED(0);
-            }
-            if (ed == gd) {
-                // optimize XOR Gw, Gw
-                CLEAR_FLAGS();
-                IFX(X_PEND) {
-                    SET_DF(x6, d_xor16);
-                } else IFX(X_ALL) {
-                    SET_DFNONE();
-                }
-                SRLI(ed, ed, 16);
-                SLLI(ed, ed, 16);
-                IFX(X_PEND) {
-                    SH(ed, xEmu, offsetof(x64emu_t, res));
-                }
-                IFX(X_ZF) {
-                    ORI(xFlags, xFlags, 1 << F_ZF);
-                }
-                IFX(X_PF) {
-                    ORI(xFlags, xFlags, 1 << F_PF);
+            if (MODREG && !dyn->insts[ninst].x64.gen_flags) {
+                gd = xRAX + ((nextop & 0x38) >> 3) + (rex.r << 3);
+                ed = xRAX + (nextop & 7) + (rex.b << 3);
+                if (ed == gd) {
+                    SRLI(gd, gd, 16);
+                    SLLI(gd, gd, 16);
+                    break;
                 }
-            } else {
-                GETGW(x1);
-                GETEW(x2, 0);
-                emit_xor16(dyn, ninst, x1, x2, x4, x5, x6);
-                GWBACK;
             }
+            FAST_16BIT_OPERATION(gd, ed, x1, XOR(gd, gd, x1));
+            GETGW(x1);
+            GETEW(x2, 0);
+            emit_xor16(dyn, ninst, x1, x2, x4, x5, x6);
+            GWBACK;
             break;
         case 0x35:
             INST_NAME("XOR AX, Iw");
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index 200d7f11..b2579454 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -1774,4 +1774,23 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
         break;                                                                       \
     }
 
+#define FAST_16BIT_OPERATION(dst, src, s1, OP)                                       \
+    if (MODREG && (rv64_zbb || rv64_xtheadbb) && !dyn->insts[ninst].x64.gen_flags) { \
+        gd = xRAX + ((nextop & 0x38) >> 3) + (rex.r << 3);                           \
+        ed = xRAX + (nextop & 7) + (rex.b << 3);                                     \
+        SLLI(s1, src, 64 - 16);                                                      \
+        if (rv64_zbb) {                                                              \
+            RORI(dst, dst, 16);                                                      \
+        } else {                                                                     \
+            TH_SRRI(dst, dst, 16);                                                   \
+        }                                                                            \
+        OP;                                                                          \
+        if (rv64_zbb) {                                                              \
+            RORI(dst, dst, 64 - 16);                                                 \
+        } else {                                                                     \
+            TH_SRRI(dst, dst, 64 - 16);                                              \
+        }                                                                            \
+        break;                                                                       \
+    }
+
 #endif //__DYNAREC_RV64_HELPER_H__