about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2024-08-28 00:55:11 +0800
committerGitHub <noreply@github.com>2024-08-27 18:55:11 +0200
commit9de694c46bcd665ea6a91cce848d49144e6cec2f (patch)
treeb21434cbf5c1854e39dcf8bb19685c859eabeb7f
parent220c5ee47d4b8f57cbbd30827e74c1b62f40ead6 (diff)
downloadbox64-9de694c46bcd665ea6a91cce848d49144e6cec2f.tar.gz
box64-9de694c46bcd665ea6a91cce848d49144e6cec2f.zip
[RV64_DYNAREC] Added a fast path for some 8bit opcodes (#1763)
* [RV64_DYNAREC] Added a fast path for some 8bit opcodes

* fix

* more

* more
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00_0.c34
-rw-r--r--src/dynarec/rv64/dynarec_rv64_67.c12
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h31
3 files changed, 75 insertions, 2 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00_0.c b/src/dynarec/rv64/dynarec_rv64_00_0.c
index 24eabbb9..cb50df7f 100644
--- a/src/dynarec/rv64/dynarec_rv64_00_0.c
+++ b/src/dynarec/rv64/dynarec_rv64_00_0.c
@@ -33,7 +33,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
     int32_t i32, tmp;
     int64_t i64, j64;
     uint8_t u8;
-    uint8_t gb1, gb2, eb1, eb2;
+    uint8_t gb, gb1, gb2, eb1, eb2;
     uint32_t u32;
     uint64_t u64;
     uint8_t wback, wb1, wb2, wb;
@@ -54,6 +54,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("ADD Eb, Gb");
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
+            FAST_8BIT_OPERATION(wb, gb, x1, ADD(wb, wb, x1));
             GETEB(x1, 0);
             GETGB(x2);
             emit_add8(dyn, ninst, x1, x2, x4, x5);
@@ -72,6 +73,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("ADD Gb, Eb");
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
+            FAST_8BIT_OPERATION(gb, wb, x1, ADD(gb, gb, x1));
             GETEB(x1, 0);
             GETGB(x2);
             emit_add8(dyn, ninst, x2, x1, x4, x5);
@@ -123,6 +125,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("OR Eb, Gb");
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
+            FAST_8BIT_OPERATION(wb, gb, x1, OR(wb, wb, x1));
             GETEB(x1, 0);
             GETGB(x2);
             emit_or8(dyn, ninst, x1, x2, x4, x5);
@@ -141,6 +144,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("OR Gb, Eb");
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
+            FAST_8BIT_OPERATION(gb, wb, x1, OR(gb, gb, x1));
             GETEB(x1, 0);
             GETGB(x2);
             emit_or8(dyn, ninst, x2, x1, x4, x5);
@@ -190,6 +194,12 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             READFLAGS(X_CF);
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
+            FAST_8BIT_OPERATION(wb, gb, x1, {
+                ADD(wb, wb, x1);
+                ANDI(x2, xFlags, 1 << F_CF);
+                SLLI(x2, x2, 64 - 8);
+                ADD(wb, wb, x2);
+            });
             GETEB(x1, 0);
             GETGB(x2);
             emit_adc8(dyn, ninst, x1, x2, x4, x5, x6);
@@ -210,6 +220,12 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             READFLAGS(X_CF);
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
+            FAST_8BIT_OPERATION(gb, wb, x1, {
+                ADD(gb, gb, x1);
+                ANDI(x2, xFlags, 1 << F_CF);
+                SLLI(x2, x2, 64 - 8);
+                ADD(gb, gb, x2);
+            });
             GETEB(x2, 0);
             GETGB(x1);
             emit_adc8(dyn, ninst, x1, x2, x4, x6, x5);
@@ -268,6 +284,12 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             READFLAGS(X_CF);
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
+            FAST_8BIT_OPERATION(wb, gb, x1, {
+                SUB(wb, wb, x1);
+                ANDI(x2, xFlags, 1 << F_CF);
+                SLLI(x2, x2, 64 - 8);
+                SUB(wb, wb, x2);
+            });
             GETEB(x1, 0);
             GETGB(x2);
             emit_sbb8(dyn, ninst, x1, x2, x4, x5, x6);
@@ -288,6 +310,12 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             READFLAGS(X_CF);
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
+            FAST_8BIT_OPERATION(gb, wb, x1, {
+                SUB(gb, gb, x1);
+                ANDI(x2, xFlags, 1 << F_CF);
+                SLLI(x2, x2, 64 - 8);
+                SUB(gb, gb, x2);
+            });
             GETEB(x2, 0);
             GETGB(x1);
             emit_sbb8(dyn, ninst, x1, x2, x6, x4, x5);
@@ -395,6 +423,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("SUB Eb, Gb");
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
+            FAST_8BIT_OPERATION(wb, gb, x1, SUB(wb, wb, x1));
             GETEB(x1, 0);
             GETGB(x2);
             emit_sub8(dyn, ninst, x1, x2, x4, x5, x6);
@@ -413,6 +442,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("SUB Gb, Eb");
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
+            FAST_8BIT_OPERATION(gb, wb, x1, SUB(gb, gb, x1));
             GETEB(x1, 0);
             GETGB(x2);
             emit_sub8(dyn, ninst, x2, x1, x4, x5, x6);
@@ -445,6 +475,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("XOR Eb, Gb");
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
+            FAST_8BIT_OPERATION(wb, gb, x1, XOR(wb, wb, x1));
             GETEB(x1, 0);
             GETGB(x2);
             emit_xor8(dyn, ninst, x1, x2, x4, x5);
@@ -465,6 +496,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("XOR Gb, Eb");
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
+            FAST_8BIT_OPERATION(gb, wb, x1, XOR(gb, gb, x1));
             GETEB(x1, 0);
             GETGB(x2);
             emit_xor8(dyn, ninst, x2, x1, x4, x5);
diff --git a/src/dynarec/rv64/dynarec_rv64_67.c b/src/dynarec/rv64/dynarec_rv64_67.c
index f1dd0549..59fc507a 100644
--- a/src/dynarec/rv64/dynarec_rv64_67.c
+++ b/src/dynarec/rv64/dynarec_rv64_67.c
@@ -28,7 +28,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
 
     uint8_t opcode = F8;
     uint8_t nextop;
-    uint8_t gd, ed, wback, wb, wb1, wb2, gb1, gb2, eb1, eb2;
+    uint8_t gd, ed, wback, wb, wb1, wb2, gb, gb1, gb2, eb1, eb2;
     int64_t fixedaddress;
     int unscaled;
     int8_t  i8;
@@ -75,6 +75,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("ADD Gb, Eb");
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
+            FAST_8BIT_OPERATION(gb, wb, x1, ADD(gb, gb, x1));
             GETEB32(x2, 0);
             GETGB(x1);
             emit_add8(dyn, ninst, x1, x2, x3, x4);
@@ -109,6 +110,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("OR Gb, Eb");
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
+            FAST_8BIT_OPERATION(gb, wb, x1, OR(gb, gb, x1));
             GETEB32(x2, 0);
             GETGB(x1);
             emit_or8(dyn, ninst, x1, x2, x3, x4);
@@ -268,6 +270,12 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             READFLAGS(X_CF);
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
+            FAST_8BIT_OPERATION(gb, wb, x1, {
+                SUB(gb, gb, x1);
+                ANDI(x2, xFlags, 1 << F_CF);
+                SLLI(x2, x2, 64 - 8);
+                SUB(gb, gb, x2);
+            });
             GETEB32(x2, 0);
             GETGB(x1);
             emit_sbb8(dyn, ninst, x1, x2, x3, x4, x5);
@@ -339,6 +347,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("SUB Gb, Eb");
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
+            FAST_8BIT_OPERATION(gb, wb, x1, SUB(gb, gb, x1));
             GETEB32(x2, 0);
             GETGB(x1);
             emit_sub8(dyn, ninst, x1, x2, x3, x4, x5);
@@ -373,6 +382,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("XOR Gb, Eb");
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
+            FAST_8BIT_OPERATION(gb, wb, x1, XOR(gb, gb, x1));
             GETEB32(x2, 0);
             GETGB(x1);
             emit_xor8(dyn, ninst, x1, x2, x3, x4);
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index e1082c1f..200d7f11 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -1743,4 +1743,35 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
     BLT(reg, s, 4 + 4);           \
     ADDIW(reg, s, -1);
 
+#define FAST_8BIT_OPERATION(dst, src, s1, OP)                                        \
+    if (MODREG && (rv64_zbb || rv64_xtheadbb) && !dyn->insts[ninst].x64.gen_flags) { \
+        if (rex.rex) {                                                               \
+            wb = xRAX + (nextop & 7) + (rex.b << 3);                                 \
+            wb2 = 0;                                                                 \
+            gb = xRAX + ((nextop & 0x38) >> 3) + (rex.r << 3);                       \
+            gb2 = 0;                                                                 \
+        } else {                                                                     \
+            wb = (nextop & 7);                                                       \
+            wb2 = (wb >> 2) * 8;                                                     \
+            wb = xRAX + (wb & 3);                                                    \
+            gd = (nextop & 0x38) >> 3;                                               \
+            gb2 = ((gd & 4) >> 2) * 8;                                               \
+            gb = xRAX + (gd & 3);                                                    \
+        }                                                                            \
+        if (src##2) { ANDI(s1, src, 0xf00); }                                        \
+        SLLI(s1, (src##2 ? s1 : src), 64 - src##2 - 8);                              \
+        if (rv64_zbb) {                                                              \
+            RORI(dst, dst, 8 + dst##2);                                              \
+        } else {                                                                     \
+            TH_SRRI(dst, dst, 8 + dst##2);                                           \
+        }                                                                            \
+        OP;                                                                          \
+        if (rv64_zbb) {                                                              \
+            RORI(dst, dst, 64 - 8 - dst##2);                                         \
+        } else {                                                                     \
+            TH_SRRI(dst, dst, 64 - 8 - dst##2);                                      \
+        }                                                                            \
+        break;                                                                       \
+    }
+
 #endif //__DYNAREC_RV64_HELPER_H__