about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2023-05-01 11:36:02 +0000
committerptitSeb <sebastien.chev@gmail.com>2023-05-01 11:36:02 +0000
commit505ac0021dd69d56f5467302b6514c8435caee82 (patch)
tree7d1dda73bdb2a289d1f6e03c49137041cd351b30 /src
parentc020154a2f3e6bbf8e566395bacc2190770269a1 (diff)
downloadbox64-505ac0021dd69d56f5467302b6514c8435caee82.tar.gz
box64-505ac0021dd69d56f5467302b6514c8435caee82.zip
[RV64_DYNAREC] Added Zbb path for 0F BC/BD and 66 0F BC/BD opcodes
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f.c82
-rw-r--r--src/dynarec/rv64/dynarec_rv64_f30f.c85
-rw-r--r--src/dynarec/rv64/rv64_emitter.h22
3 files changed, 105 insertions, 84 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c
index 3c6b4567..e5e44a12 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f.c
@@ -1058,14 +1058,18 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             ORI(xFlags, xFlags, 1<<F_ZF);
             B_NEXT_nocond;
             MARK;
-            NEG(x2, ed);
-            AND(x2, x2, ed);
-            TABLE64(x3, 0x03f79d71b4ca8b09ULL);
-            MUL(x2, x2, x3);
-            SRLI(x2, x2, 64-6);
-            TABLE64(x1, (uintptr_t)&deBruijn64tab);
-            ADD(x1, x1, x2);
-            LBU(gd, x1, 0);
+            if(rv64_zbb) {
+                CTZxw(gd, ed);
+            } else {
+                NEG(x2, ed);
+                AND(x2, x2, ed);
+                TABLE64(x3, 0x03f79d71b4ca8b09ULL);
+                MUL(x2, x2, x3);
+                SRLI(x2, x2, 64-6);
+                TABLE64(x1, (uintptr_t)&deBruijn64tab);
+                ADD(x1, x1, x2);
+                LBU(gd, x1, 0);
+            }
             ANDI(xFlags, xFlags, ~(1<<F_ZF));
             break;
         case 0xBD:
@@ -1084,37 +1088,43 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             B_NEXT_nocond;
             MARK;
             ANDI(xFlags, xFlags, ~(1<<F_ZF));
-            if(ed!=gd)
-                u8 = gd;
-            else
-                u8 = x1;
-            ADDI(u8, xZR, 0);
-            if(rex.w) {
-                MV(x2, ed);
-                SRLI(x3, x2, 32);
+            if(rv64_zbb) {
+                MOV32w(x1, rex.w?63:31);
+                CLZxw(gd, ed);
+                SUB(gd, x1, gd);
+            } else {
+                if(ed!=gd)
+                    u8 = gd;
+                else
+                    u8 = x1;
+                ADDI(u8, xZR, 0);
+                if(rex.w) {
+                    MV(x2, ed);
+                    SRLI(x3, x2, 32);
+                    BEQZ(x3, 4+2*4);
+                    ADDI(u8, u8, 32);
+                    MV(x2, x3);
+                } else {
+                    AND(x2, ed, xMASK);
+                }
+                SRLI(x3, x2, 16);
                 BEQZ(x3, 4+2*4);
-                ADDI(u8, u8, 32);
+                ADDI(u8, u8, 16);
                 MV(x2, x3);
-            } else {
-                AND(x2, ed, xMASK);
+                SRLI(x3, x2, 8);
+                BEQZ(x3, 4+2*4);
+                ADDI(u8, u8, 8);
+                MV(x2, x3);
+                SRLI(x3, x2, 4);
+                BEQZ(x3, 4+2*4);
+                ADDI(u8, u8, 4);
+                MV(x2, x3);
+                ANDI(x2, x2, 0b1111); 
+                TABLE64(x3, (uintptr_t)&lead0tab);
+                ADD(x3, x3, x2);
+                LBU(x2, x3, 0);
+                ADD(gd, u8, x2);
             }
-            SRLI(x3, x2, 16);
-            BEQZ(x3, 4+2*4);
-            ADDI(u8, u8, 16);
-            MV(x2, x3);
-            SRLI(x3, x2, 8);
-            BEQZ(x3, 4+2*4);
-            ADDI(u8, u8, 8);
-            MV(x2, x3);
-            SRLI(x3, x2, 4);
-            BEQZ(x3, 4+2*4);
-            ADDI(u8, u8, 4);
-            MV(x2, x3);
-            ANDI(x2, x2, 0b1111); 
-            TABLE64(x3, (uintptr_t)&lead0tab);
-            ADD(x3, x3, x2);
-            LBU(x2, x3, 0);
-            ADD(gd, u8, x2);
             break;
         case 0xBE:
             INST_NAME("MOVSX Gd, Eb");
diff --git a/src/dynarec/rv64/dynarec_rv64_f30f.c b/src/dynarec/rv64/dynarec_rv64_f30f.c
index 489d5ca0..77b8bf2d 100644
--- a/src/dynarec/rv64/dynarec_rv64_f30f.c
+++ b/src/dynarec/rv64/dynarec_rv64_f30f.c
@@ -284,21 +284,24 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 AND(x4, ed, xMASK);
                 ed = x4;
             }
-            BNE_MARK(ed, xZR);
             ANDI(xFlags, xFlags, ~((1<<F_ZF) | (1<<F_CF)));
+            BNE_MARK(ed, xZR);
             ORI(xFlags, xFlags, 1<<F_CF);
             MOV32w(gd, rex.w?64:32);
             B_NEXT_nocond;
             MARK;
-            NEG(x2, ed);
-            AND(x2, x2, ed);
-            TABLE64(x3, 0x03f79d71b4ca8b09ULL);
-            MUL(x2, x2, x3);
-            SRLI(x2, x2, 64-6);
-            TABLE64(x1, (uintptr_t)&deBruijn64tab);
-            ADD(x1, x1, x2);
-            LBU(gd, x1, 0);
-            ANDI(xFlags, xFlags, ~((1<<F_ZF) | (1<<F_CF)));
+            if(rv64_zbb) {
+                CTZxw(gd, ed);
+            } else {
+                NEG(x2, ed);
+                AND(x2, x2, ed);
+                TABLE64(x3, 0x03f79d71b4ca8b09ULL);
+                MUL(x2, x2, x3);
+                SRLI(x2, x2, 64-6);
+                TABLE64(x1, (uintptr_t)&deBruijn64tab);
+                ADD(x1, x1, x2);
+                LBU(gd, x1, 0);
+            }
             BNE(gd, xZR, 4+4);
             ORI(xFlags, xFlags, 1<<F_ZF);
             break;
@@ -319,38 +322,42 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             ORI(xFlags, xFlags, 1<<F_CF);
             B_NEXT_nocond;
             MARK;
-            if(ed!=gd)
-                u8 = gd;
-            else
-                u8 = x1;
-            ADDI(u8, xZR, rex.w?63:31);
-            if(rex.w) {
-                MV(x2, ed);
-                SRLI(x3, x2, 32);
+            if(rv64_zbb) {
+                CLZxw(gd, ed);
+            } else {
+                if(ed!=gd)
+                    u8 = gd;
+                else
+                    u8 = x1;
+                ADDI(u8, xZR, rex.w?63:31);
+                if(rex.w) {
+                    MV(x2, ed);
+                    SRLI(x3, x2, 32);
+                    BEQZ(x3, 4+2*4);
+                    SUBI(u8, u8, 32);
+                    MV(x2, x3);
+                } else {
+                    AND(x2, ed, xMASK);
+                }
+                SRLI(x3, x2, 16);
                 BEQZ(x3, 4+2*4);
-                SUBI(u8, u8, 32);
+                SUBI(u8, u8, 16);
                 MV(x2, x3);
-            } else {
-                AND(x2, ed, xMASK);
+                SRLI(x3, x2, 8);
+                BEQZ(x3, 4+2*4);
+                SUBI(u8, u8, 8);
+                MV(x2, x3);
+                SRLI(x3, x2, 4);
+                BEQZ(x3, 4+2*4);
+                SUBI(u8, u8, 4);
+                MV(x2, x3);
+                ANDI(x2, x2, 0b1111); 
+                TABLE64(x3, (uintptr_t)&lead0tab);
+                ADD(x3, x3, x2);
+                LBU(x2, x3, 0);
+                SUB(gd, u8, x2);
+                MARK2;
             }
-            SRLI(x3, x2, 16);
-            BEQZ(x3, 4+2*4);
-            SUBI(u8, u8, 16);
-            MV(x2, x3);
-            SRLI(x3, x2, 8);
-            BEQZ(x3, 4+2*4);
-            SUBI(u8, u8, 8);
-            MV(x2, x3);
-            SRLI(x3, x2, 4);
-            BEQZ(x3, 4+2*4);
-            SUBI(u8, u8, 4);
-            MV(x2, x3);
-            ANDI(x2, x2, 0b1111); 
-            TABLE64(x3, (uintptr_t)&lead0tab);
-            ADD(x3, x3, x2);
-            LBU(x2, x3, 0);
-            SUB(gd, u8, x2);
-            MARK2;
             ANDI(xFlags, xFlags, ~((1<<F_ZF) | (1<<F_CF)));
             BNE(gd, xZR, 4+4);
             ORI(xFlags, xFlags, 1<<F_ZF);
diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h
index eec49a09..7ff2d4db 100644
--- a/src/dynarec/rv64/rv64_emitter.h
+++ b/src/dynarec/rv64/rv64_emitter.h
@@ -528,6 +528,10 @@ f28–31  ft8–11  FP temporaries                  Caller
 #define SH3ADDUW(rd, rs1, rs2)      EMIT(R_type(0b0010000, rs2, rs1, 0b110, rd, 0b0111011))
 // Shift left unsigned word (immediate)
 #define SLLIUW(rd, rs1, imm)        EMIT(R_type(0b0000100, imm, rs1, 0b001, rd, 0b0011011))
+// Shift left by 1,2 or 3 and add (rd = X(rs2) + X(rs1)<<x)
+#define SHxADD(rd, rs1, x, rs2)        EMIT(R_type(0b0010000, rs2, rs1, (x)<<1, rd, 0b0110011))
+// Shift unsigned word left by 1,2 or 3 and add (rd = X(rs2) + Wz(rs1)<<x)
+#define SHxADDUW(rd, rs1, x, rs2)      EMIT(R_type(0b0010000, rs2, rs1, (x)<<1, rd, 0b0111011))
 
 //Zbb
 // AND with reverted operand (rs1 & ~rs2)
@@ -537,23 +541,23 @@ f28–31  ft8–11  FP temporaries                  Caller
 // Exclusive NOR (~(rs1 ^ rs2))
 #define XNOR(rd, rs1, rs2)      EMIT(R_type(0b0100000, rs2, rs1, 0b100, rd, 0b0110011))
 // Count leading zero bits
-#define CLZ(rd, rs)             EMIT(R_type(0b0110000, 0b00000, rs1, 0b001, rd, 0b0010011))
+#define CLZ(rd, rs)             EMIT(R_type(0b0110000, 0b00000, rs, 0b001, rd, 0b0010011))
 // Count leading zero bits in word
-#define CLZW(rd, rs)            EMIT(R_type(0b0110000, 0b00000, rs1, 0b001, rd, 0b0011011))
+#define CLZW(rd, rs)            EMIT(R_type(0b0110000, 0b00000, rs, 0b001, rd, 0b0011011))
 // Count leading zero bits
-#define CLZxw(rd, rs)           EMIT(R_type(0b0110000, 0b00000, rs1, 0b001, rd, rex.w?0b0010011:0b0011011))
+#define CLZxw(rd, rs)           EMIT(R_type(0b0110000, 0b00000, rs, 0b001, rd, rex.w?0b0010011:0b0011011))
 // Count trailing zero bits
-#define CTZ(rd, rs)             EMIT(R_type(0b0110000, 0b00001, rs1, 0b001, rd, 0b0010011))
+#define CTZ(rd, rs)             EMIT(R_type(0b0110000, 0b00001, rs, 0b001, rd, 0b0010011))
 // Count trailing zero bits in word
-#define CTZW(rd, rs)            EMIT(R_type(0b0110000, 0b00001, rs1, 0b001, rd, 0b0011011))
+#define CTZW(rd, rs)            EMIT(R_type(0b0110000, 0b00001, rs, 0b001, rd, 0b0011011))
 // Count trailing zero bits
-#define CTZxw(rd, rs)           EMIT(R_type(0b0110000, 0b00001, rs1, 0b001, rd, rex.w?0b0010011:0b0011011))
+#define CTZxw(rd, rs)           EMIT(R_type(0b0110000, 0b00001, rs, 0b001, rd, rex.w?0b0010011:0b0011011))
 // Count set bits
-#define CPOP(rd, rs)            EMIT(R_type(0b0110000, 0b00010, rs1, 0b001, rd, 0b0010011))
+#define CPOP(rd, rs)            EMIT(R_type(0b0110000, 0b00010, rs, 0b001, rd, 0b0010011))
 // Count set bits in word
-#define CPOPW(rd, rs)           EMIT(R_type(0b0110000, 0b00010, rs1, 0b001, rd, 0b0011011))
+#define CPOPW(rd, rs)           EMIT(R_type(0b0110000, 0b00010, rs, 0b001, rd, 0b0011011))
 // Count set bits
-#define CPOPxw(rd, rs)          EMIT(R_type(0b0110000, 0b00010, rs1, 0b001, rd, rex.w?0b0010011:0b0011011))
+#define CPOPxw(rd, rs)          EMIT(R_type(0b0110000, 0b00010, rs, 0b001, rd, rex.w?0b0010011:0b0011011))
 // Maximum
 #define MAX(rd, rs1, rs2)       EMIT(R_type(0b0000101, rs2, rs1, 0b110, rd, 0b0110011))
 // Unisgned maximum