about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2023-09-27 03:45:33 +0800
committerGitHub <noreply@github.com>2023-09-26 21:45:33 +0200
commit684160f17847b3220ea49a7a4c0e214ce15ffb8d (patch)
tree3e276a8d9f6a7f3038fe1ee0911878e6bf62ce85 /src
parente3f193b3824cb46a147414b2efdcbcee6715abdb (diff)
downloadbox64-684160f17847b3220ea49a7a4c0e214ce15ffb8d.tar.gz
box64-684160f17847b3220ea49a7a4c0e214ce15ffb8d.zip
[RV64_DYNAREC] Added more support for XTheadBs extension (#993)
* [RV64_DYNAREC] Added more support for XTheadBs extension

* Revert: FF0 is not CLZ
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f.c34
-rw-r--r--src/dynarec/rv64/dynarec_rv64_f30f.c2
-rw-r--r--src/dynarec/rv64/rv64_emitter.h45
3 files changed, 39 insertions, 42 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c
index 227ffda5..d7e4eeb8 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f.c
@@ -1009,9 +1009,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 LDxw(x1, x3, fixedaddress);
                 ed = x1;
             }
-            ANDI(x2, gd, rex.w ? 0x3f : 0x1f);
-            SRL(x4, ed, x2);
-            ANDI(x4, x4, 1);
+            BEXT(x4, ed, gd, x2);
             ANDI(xFlags, xFlags, ~1); // F_CF is 1
             OR(xFlags, xFlags, x4);
             break;
@@ -1043,13 +1041,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 ed = x1;
                 wback = x3;
             }
-            if (rex.w) {
-                ANDI(x2, gd, 0x3f);
-            } else {
-                ANDI(x2, gd, 0x1f);
-            }
-            SRL(x4, ed, x2);
-            ANDI(x4, x4, 1); // F_CF is 1
+            BEXT(x4, ed, gd, x2);
             ANDI(xFlags, xFlags, ~1);
             OR(xFlags, xFlags, x4);
             ADDI(x3, xZR, 1);
@@ -1194,13 +1186,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 ed = x1;
                 wback = x3;
             }
-            if (rex.w) {
-                ANDI(x2, gd, 0x3f);
-            } else {
-                ANDI(x2, gd, 0x1f);
-            }
-            SRL(x4, ed, x2);
-            ANDI(x4, x4, 1); // F_CF is 1
+            BEXT(x4, ed, gd, x2); // F_CF is 1
             ANDI(xFlags, xFlags, ~1);
             OR(xFlags, xFlags, x4);
             ADDI(x5, xZR, 1);
@@ -1260,8 +1246,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     GETED(1);
                     u8 = F8;
                     u8 &= rex.w ? 0x3f : 0x1f;
-                    SRLIxw(x3, ed, u8);
-                    ANDI(x3, x3, 1); // F_CF is 1
+                    BEXTI(x3, ed, u8); // F_CF is 1
                     ANDI(xFlags, xFlags, ~1);
                     OR(xFlags, xFlags, x3);
                     break;
@@ -1326,8 +1311,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     GETED(1);
                     u8 = F8;
                     u8 &= rex.w ? 0x3f : 0x1f;
-                    SRLIxw(x3, ed, u8);
-                    ANDI(x3, x3, 1); // F_CF is 1
+                    BEXTI(x3, ed, u8); // F_CF is 1
                     ANDI(xFlags, xFlags, ~1);
                     OR(xFlags, xFlags, x3);
                     if (u8 <= 10) {
@@ -1363,13 +1347,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 ed = x1;
                 wback = x3;
             }
-            if (rex.w) {
-                ANDI(x2, gd, 0x3f);
-            } else {
-                ANDI(x2, gd, 0x1f);
-            }
-            SRL(x4, ed, x2);
-            ANDI(x4, x4, 1); // F_CF is 1
+            BEXT(x4, ed, gd, x2); // F_CF is 1
             ANDI(xFlags, xFlags, ~1);
             OR(xFlags, xFlags, x4);
             ADDI(x3, xZR, 1);
diff --git a/src/dynarec/rv64/dynarec_rv64_f30f.c b/src/dynarec/rv64/dynarec_rv64_f30f.c
index 9007e46e..1b57e9ed 100644
--- a/src/dynarec/rv64/dynarec_rv64_f30f.c
+++ b/src/dynarec/rv64/dynarec_rv64_f30f.c
@@ -416,8 +416,6 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             MARK;
             if (rv64_zbb) {
                 CLZxw(gd, ed);
-            } else if (rv64_xtheadbb) {
-                TH_FF0(gd, ed);
             } else {
                 if (ed != gd)
                     u8 = gd;
diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h
index 63e62ad1..e7608781 100644
--- a/src/dynarec/rv64/rv64_emitter.h
+++ b/src/dynarec/rv64/rv64_emitter.h
@@ -841,10 +841,10 @@ f28–31  ft8–11  FP temporaries                  Caller
 #define BCLR(rd, rs1, rs2) EMIT(R_type(0b0100100, rs2, rs1, 0b001, rd, 0b0110011))
 // Single-bit Clear (Immediate)
 #define BCLI(rd, rs1, imm) EMIT(R_type(0b0100100, imm, rs1, 0b001, rd, 0b0010011))
-// Single-bit Extreact (Register)
-#define BEXT(rd, rs1, rs2) EMIT(R_type(0b0100100, rs2, rs1, 0b101, rd, 0b0110011))
+// Single-bit Extract (Register)
+#define BEXT_(rd, rs1, rs2) EMIT(R_type(0b0100100, rs2, rs1, 0b101, rd, 0b0110011))
 // Single-bit Extract (Immediate)
-#define BEXTI(rd, rs1, imm) EMIT(R_type(0b0100100, imm, rs1, 0b101, rd, 0b0010011))
+#define BEXTI_(rd, rs1, imm) EMIT(R_type(0b0100100, imm, rs1, 0b101, rd, 0b0010011))
 // Single-bit Invert (Register)
 #define BINV(rd, rs1, rs2) EMIT(R_type(0b0110100, rs2, rs1, 0b001, rd, 0b0110011))
 // Single-bit Invert (Immediate)
@@ -854,6 +854,27 @@ f28–31  ft8–11  FP temporaries                  Caller
 // Single-bit Set (Immediate)
 #define BSETI(rd, rs1, imm) EMIT(R_type(0b0010100, imm, rs1, 0b001, rd, 0b0010011))
 
+// Single-bit Extract (Register), s0 can be the same as rs2
+#define BEXT(rd, rs1, rs2, s0)              \
+    if (rv64_zbs)                           \
+        BEXT_(rd, rs1, rs2);                \
+    else {                                  \
+        ANDI(s0, rs2, rex.w ? 0x3f : 0x1f); \
+        SRL(rd, rs1, s0);                   \
+        ANDI(rd, rd, 1);                    \
+    }
+
+// Single-bit Extract (Immediate)
+#define BEXTI(rd, rs1, imm)   \
+    if (rv64_zbs)             \
+        BEXTI_(rd, rs1, imm); \
+    else if (rv64_xtheadbs)   \
+        TH_TST(rd, rs1, imm); \
+    else {                    \
+        SRLIxw(rd, rs1, imm); \
+        ANDI(rd, rd, 1);      \
+    }
+
 /// THead vendor extension
 /// https://github.com/T-head-Semi/thead-extension-spec/releases
 
@@ -861,7 +882,7 @@ f28–31  ft8–11  FP temporaries                  Caller
 
 // Add a shifted operand to a second operand.
 // reg[rd] := reg[rs1] + (reg[rs2] << imm2)
-#define TH_ADDSL(rd, rs1, rs2, imm2) EMIT(R_type(imm2 & 0b11, rs2, rs1, 0b001, rd, 0b0001011))
+#define TH_ADDSL(rd, rs1, rs2, imm2) EMIT(R_type((imm2)&0b11, rs2, rs1, 0b001, rd, 0b0001011))
 
 // XTheadBb - Basic bit-manipulation
 
@@ -874,20 +895,20 @@ f28–31  ft8–11  FP temporaries                  Caller
 
 // Perform a cyclic right shift.
 // reg[rd] := (reg[rs1] >> imm6) | (reg[rs1] << (xlen - imm6))
-#define TH_SRRI(rd, rs1, imm6) EMIT(I_type(0b000100000000 | (imm6 & 0x3f), rs1, 0b001, rd, 0b0001011))
+#define TH_SRRI(rd, rs1, imm6) EMIT(I_type(0b000100000000 | ((imm6)&0x3f), rs1, 0b001, rd, 0b0001011))
 
 // Perform a cyclic right shift on word operand.
 // data := zext.w(reg[rs1])
 // reg[rd] := (data >> imm5) | (data << (32 - imm5))
-#define TH_SRRIW(rd, rs1, imm5) EMIT(I_type(0b000101000000 | (imm5 & 0x1f), rs1, 0b001, rd, 0b0001011))
+#define TH_SRRIW(rd, rs1, imm5) EMIT(I_type(0b000101000000 | ((imm5)&0x1f), rs1, 0b001, rd, 0b0001011))
 
 // Extract and sign-extend bits.
 // reg[rd] := sign_extend(reg[rs1][imm1:imm2])
-#define TH_EXT(rd, rs1, imm1, imm2) EMIT(I_type(((imm1 & 0x1f) << 6) | (imm2 & 0x1f), rs1, 0b010, rd, 0b0001011))
+#define TH_EXT(rd, rs1, imm1, imm2) EMIT(I_type((((imm1)&0x1f) << 6) | ((imm2)&0x1f), rs1, 0b010, rd, 0b0001011))
 
 // Extract and zero-extend bits.
 // reg[rd] := zero_extend(reg[rs1][imm1:imm2])
-#define TH_EXTU(rd, rs1, imm1, imm2) EMIT(I_type(((imm1 & 0x1f) << 6) | (imm2 & 0x1f), rs1, 0b011, rd, 0b0001011))
+#define TH_EXTU(rd, rs1, imm1, imm2) EMIT(I_type((((imm1)&0x1f) << 6) | ((imm2)&0x1f), rs1, 0b011, rd, 0b0001011))
 
 // Find first '0'-bit
 // for i=xlen..0:
@@ -932,7 +953,7 @@ f28–31  ft8–11  FP temporaries                  Caller
 //   rd := 1
 // else
 //   rd := 0
-#define TH_TST(rd, rs1, imm6) EMIT(I_type(0b100010000000 | (imm6 & 0x3f), rs1, 0b001, rd, 0b0001011))
+#define TH_TST(rd, rs1, imm6) EMIT(I_type(0b100010000000 | ((imm6)&0x3f), rs1, 0b001, rd, 0b0001011))
 
 
 // XTheadCondMov -  Conditional move
@@ -952,7 +973,7 @@ f28–31  ft8–11  FP temporaries                  Caller
 // Load indexed byte, increment address after loading.
 // rd := sign_extend(mem[rs1])
 // rs1 := rs1 + (sign_extend(imm5) << imm2)
-#define TH_LBIA(rd, rs1, imm5, imm2) EMIT(I_type(0b000110000000 | ((imm2 & 0b11) << 5) | (imm5 & 0x1f), rs1, 0b100, rd, 0b0001011))
+#define TH_LBIA(rd, rs1, imm5, imm2) EMIT(I_type(0b000110000000 | (((imm2)&0b11) << 5) | ((imm5)&0x1f), rs1, 0b100, rd, 0b0001011))
 
 // TODO
 // th.lbib rd, (rs1), imm5, imm2 Load indexed byte
@@ -1006,7 +1027,7 @@ f28–31  ft8–11  FP temporaries                  Caller
 // addr := rs1 + (zero_extend(imm2) << 4)
 // rd1 := mem[addr+7:addr]
 // rd2 := mem[addr+15:addr+8]
-#define TH_LDD(rd1, rd2, rs1, imm2) EMIT(R_type(0b1111100 | (imm2 & 0b11), rd2, rs1, 0b100, rd1, 0b0001011))
+#define TH_LDD(rd1, rd2, rs1, imm2) EMIT(R_type(0b1111100 | ((imm2)&0b11), rd2, rs1, 0b100, rd1, 0b0001011))
 
 // TODO
 // th.lwd rd1, rd2, (rs1), imm2, 3 Load two signed 32-bit values
@@ -1019,7 +1040,7 @@ f28–31  ft8–11  FP temporaries                  Caller
 // Load indexed double-precision floating point value.
 // addr := rs1 + (rs2 << imm2)
 // rd := fmem[addr+7:addr]
-#define TH_FLRD(rd, rs1, rs2, imm2) EMIT(R_type(0b0110000 | (imm2 & 0b11), rs2, rs1, 0b110, rd, 0b0001011))
+#define TH_FLRD(rd, rs1, rs2, imm2) EMIT(R_type(0b0110000 | ((imm2)&0b11), rs2, rs1, 0b110, rd, 0b0001011))
 
 // TODO
 // th.flrw rd, rs1, rs2, imm2 Load indexed float