about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2025-05-23 22:33:02 +0800
committerGitHub <noreply@github.com>2025-05-23 16:33:02 +0200
commitb7ddb92c80b45d5945a3339f8f7a36ee787e0901 (patch)
tree61cdd68bc5cae881ea13c4e2e60e15e2f0bd0d1c /src
parentd40f51bdc53e07972a03284e0521d001035c996a (diff)
downloadbox64-b7ddb92c80b45d5945a3339f8f7a36ee787e0901.tar.gz
box64-b7ddb92c80b45d5945a3339f8f7a36ee787e0901.zip
[RV64_DYNAREC] Optimized CLZ macro with xtheadbb (#2664)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_660f.c4
-rw-r--r--src/dynarec/rv64/rv64_emitter.h84
2 files changed, 48 insertions, 40 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c
index bd40bb4b..4ecf1d7d 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f.c
@@ -1426,8 +1426,8 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             B_NEXT_nocond;
             MARK;
             ANDI(xFlags, xFlags, ~(1 << F_ZF));
-            CLZxw(gd, ed, 0, x1, x2, x6);
-            ADDI(x1, xZR, rex.w ? 63 : 31);
+            CLZxw(gd, ed, 1, x1, x2, x6);
+            ADDI(x1, xZR, 63);
             SUB(gd, x1, gd);
             GWBACK;
             break;
diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h
index 38cda033..9dd094f7 100644
--- a/src/dynarec/rv64/rv64_emitter.h
+++ b/src/dynarec/rv64/rv64_emitter.h
@@ -851,44 +851,52 @@
 // Count leading zero bits in word
 #define CLZW(rd, rs) EMIT(R_type(0b0110000, 0b00000, rs, 0b001, rd, 0b0011011))
 // Count leading zero bits
-#define CLZxw(rd, rs, x, s1, s2, s3)         \
-    if (rv64_zbb) {                          \
-        if (x)                               \
-            CLZ(rd, rs);                     \
-        else                                 \
-            CLZW(rd, rs);                    \
-    } else {                                 \
-        if (rs != rd)                        \
-            u8 = rd;                         \
-        else                                 \
-            u8 = s1;                         \
-        ADDI(u8, xZR, x ? 63 : 31);          \
-        if (x) {                             \
-            MV(s2, rs);                      \
-            SRLI(s3, s2, 32);                \
-            BEQZ(s3, 4 + 2 * 4);             \
-            SUBI(u8, u8, 32);                \
-            MV(s2, s3);                      \
-        } else {                             \
-            ZEXTW2(s2, rs);                  \
-        }                                    \
-        SRLI(s3, s2, 16);                    \
-        BEQZ(s3, 4 + 2 * 4);                 \
-        SUBI(u8, u8, 16);                    \
-        MV(s2, s3);                          \
-        SRLI(s3, s2, 8);                     \
-        BEQZ(s3, 4 + 2 * 4);                 \
-        SUBI(u8, u8, 8);                     \
-        MV(s2, s3);                          \
-        SRLI(s3, s2, 4);                     \
-        BEQZ(s3, 4 + 2 * 4);                 \
-        SUBI(u8, u8, 4);                     \
-        MV(s2, s3);                          \
-        ANDI(s2, s2, 0b1111);                \
-        TABLE64(s3, (uintptr_t) & lead0tab); \
-        ADD(s3, s3, s2);                     \
-        LBU(s2, s3, 0);                      \
-        SUB(rd, u8, s2);                     \
+#define CLZxw(rd, rs, x, s1, s2, s3)       \
+    if (rv64_zbb) {                        \
+        if (x)                             \
+            CLZ(rd, rs);                   \
+        else                               \
+            CLZW(rd, rs);                  \
+    } else if (rv64_xtheadbb) {            \
+        if (x) {                           \
+            TH_FF1(rd, rs);                \
+        } else {                           \
+            ZEXTW2(rd, rs);                \
+            TH_FF1(rd, rd);                \
+            SUBI(rd, rd, 32);              \
+        }                                  \
+    } else {                               \
+        if (rs != rd)                      \
+            u8 = rd;                       \
+        else                               \
+            u8 = s1;                       \
+        ADDI(u8, xZR, x ? 63 : 31);        \
+        if (x) {                           \
+            MV(s2, rs);                    \
+            SRLI(s3, s2, 32);              \
+            BEQZ(s3, 4 + 2 * 4);           \
+            SUBI(u8, u8, 32);              \
+            MV(s2, s3);                    \
+        } else {                           \
+            ZEXTW2(s2, rs);                \
+        }                                  \
+        SRLI(s3, s2, 16);                  \
+        BEQZ(s3, 4 + 2 * 4);               \
+        SUBI(u8, u8, 16);                  \
+        MV(s2, s3);                        \
+        SRLI(s3, s2, 8);                   \
+        BEQZ(s3, 4 + 2 * 4);               \
+        SUBI(u8, u8, 8);                   \
+        MV(s2, s3);                        \
+        SRLI(s3, s2, 4);                   \
+        BEQZ(s3, 4 + 2 * 4);               \
+        SUBI(u8, u8, 4);                   \
+        MV(s2, s3);                        \
+        ANDI(s2, s2, 0b1111);              \
+        TABLE64(s3, (uintptr_t)&lead0tab); \
+        ADD(s3, s3, s2);                   \
+        LBU(s2, s3, 0);                    \
+        SUB(rd, u8, s2);                   \
     }
 
 // Count trailing zero bits