about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2024-03-06 16:58:04 +0800
committerGitHub <noreply@github.com>2024-03-06 09:58:04 +0100
commit584b258241b12f90d575ab43d9602ee9a5566d3d (patch)
tree886d74ea299f3f25672aa8810e32054fea37c052 /src
parentfb13ce2bafc05697e84b095326ba95b3f5b0ca8b (diff)
downloadbox64-584b258241b12f90d575ab43d9602ee9a5566d3d.tar.gz
box64-584b258241b12f90d575ab43d9602ee9a5566d3d.zip
[RV64_DYNAREC] Added 66 0F 38 61 PCMPESTRI opcode and some refactors too (#1337)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f.c54
-rw-r--r--src/dynarec/rv64/dynarec_rv64_660f.c76
-rw-r--r--src/dynarec/rv64/dynarec_rv64_f30f.c50
-rw-r--r--src/dynarec/rv64/rv64_emitter.h61
4 files changed, 108 insertions, 133 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c
index 8b9eb63c..bfe29a2c 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f.c
@@ -1677,18 +1677,8 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             ORI(xFlags, xFlags, 1 << F_ZF);
             B_NEXT_nocond;
             MARK;
-            if (rv64_zbb) {
-                CTZxw(gd, ed);
-            } else {
-                NEG(x2, ed);
-                AND(x2, x2, ed);
-                TABLE64(x3, 0x03f79d71b4ca8b09ULL);
-                MUL(x2, x2, x3);
-                SRLI(x2, x2, 64 - 6);
-                TABLE64(x1, (uintptr_t)&deBruijn64tab);
-                ADD(x1, x1, x2);
-                LBU(gd, x1, 0);
-            }
+            // gd is undefined if ed is all zeros, don't worry.
+            CTZxw(gd, ed, rex.w, x1, x2);
             ANDI(xFlags, xFlags, ~(1 << F_ZF));
             break;
         case 0xBD:
@@ -1707,43 +1697,9 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             B_NEXT_nocond;
             MARK;
             ANDI(xFlags, xFlags, ~(1 << F_ZF));
-            if (rv64_zbb) {
-                MOV32w(x1, rex.w ? 63 : 31);
-                CLZxw(gd, ed);
-                SUB(gd, x1, gd);
-            } else {
-                if (ed != gd)
-                    u8 = gd;
-                else
-                    u8 = x1;
-                ADDI(u8, xZR, 0);
-                if (rex.w) {
-                    MV(x2, ed);
-                    SRLI(x3, x2, 32);
-                    BEQZ(x3, 4 + 2 * 4);
-                    ADDI(u8, u8, 32);
-                    MV(x2, x3);
-                } else {
-                    AND(x2, ed, xMASK);
-                }
-                SRLI(x3, x2, 16);
-                BEQZ(x3, 4 + 2 * 4);
-                ADDI(u8, u8, 16);
-                MV(x2, x3);
-                SRLI(x3, x2, 8);
-                BEQZ(x3, 4 + 2 * 4);
-                ADDI(u8, u8, 8);
-                MV(x2, x3);
-                SRLI(x3, x2, 4);
-                BEQZ(x3, 4 + 2 * 4);
-                ADDI(u8, u8, 4);
-                MV(x2, x3);
-                ANDI(x2, x2, 0b1111);
-                TABLE64(x3, (uintptr_t)&lead0tab);
-                ADD(x3, x3, x2);
-                LBU(x2, x3, 0);
-                ADD(gd, u8, x2);
-            }
+            CLZxw(gd, ed, rex.w, x1, x2, x3);
+            ADDI(x1, xZR, rex.w ? 63 : 31);
+            SUB(gd, x1, gd);
             break;
         case 0xBE:
             INST_NAME("MOVSX Gd, Eb");
diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c
index 2ace7fd4..e1553d69 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f.c
@@ -21,6 +21,7 @@
 #include "dynarec_rv64_private.h"
 #include "dynarec_rv64_functions.h"
 #include "dynarec_rv64_helper.h"
+#include "emu/x64compstrings.h"
 
 uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog)
 {
@@ -898,6 +899,42 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                         SW(x3, gback, gdoffset + i * 4);
                     }
                     break;
+                case 0x61:
+                    INST_NAME("PCMPESTRI Gx, Ex, Ib");
+                    SETFLAGS(X_ALL, SF_SET);
+                    nextop = F8;
+                    GETG;
+                    sse_reflect_reg(dyn, ninst, gd);
+                    ADDI(x3, xEmu, offsetof(x64emu_t, xmm[gd]));
+                    if (MODREG) {
+                        ed = (nextop & 7) + (rex.b << 3);
+                        sse_reflect_reg(dyn, ninst, ed);
+                        ADDI(x1, xEmu, offsetof(x64emu_t, xmm[ed]));
+                    } else {
+                        addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, NULL, 0, 1);
+                        if (ed != x1) {
+                            MV(x1, ed);
+                        }
+                    }
+                    // prepare rest arguments
+                    MV(x2, xRDX);
+                    MV(x4, xRAX);
+                    u8 = F8;
+                    MOV32w(x5, u8);
+                    CALL(sse42_compare_string_explicit_len, x1);
+                    ZEROUP(x1);
+                    BNEZ_MARK(x1);
+                    MOV32w(xRCX, (u8 & 1) ? 8 : 16);
+                    B_NEXT_nocond;
+                    MARK;
+                    if (u8 & 0b1000000) {
+                        CLZxw(xRCX, x1, 0, x2, x3, x4);
+                        ADDI(x2, xZR, 31);
+                        SUB(xRCX, x2, xRCX);
+                    } else {
+                        CTZxw(xRCX, xRCX, 0, x1, x2);
+                    }
+                    break;
                 case 0xDB:
                     INST_NAME("AESIMC Gx, Ex"); // AES-NI
                     nextop = F8;
@@ -2397,18 +2434,8 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             ORI(xFlags, xFlags, 1 << F_ZF);
             B_NEXT_nocond;
             MARK;
-            if (rv64_zbb) {
-                CTZxw(gd, ed);
-            } else {
-                NEG(x2, ed);
-                AND(x2, x2, ed);
-                TABLE64(x3, 0x03f79d71b4ca8b09ULL);
-                MUL(x2, x2, x3);
-                SRLI(x2, x2, 64 - 6);
-                TABLE64(x1, (uintptr_t)&deBruijn64tab);
-                ADD(x1, x1, x2);
-                LBU(gd, x1, 0);
-            }
+            // gd is undefined if ed is all zeros, don't worry.
+            CTZxw(gd, ed, 0, x1, x2);
             ANDI(xFlags, xFlags, ~(1 << F_ZF));
             GWBACK;
             break;
@@ -2424,28 +2451,9 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             B_NEXT_nocond;
             MARK;
             ANDI(xFlags, xFlags, ~(1 << F_ZF));
-            if (rv64_zbb) {
-                MOV32w(x1, rex.w ? 63 : 31);
-                CLZxw(gd, ed);
-                SUB(gd, x1, gd);
-            } else {
-                u8 = gd;
-                ADDI(u8, xZR, 0);
-                AND(x2, ed, xMASK);
-                SRLI(x3, x2, 8);
-                BEQZ(x3, 4 + 2 * 4);
-                ADDI(u8, u8, 8);
-                MV(x2, x3);
-                SRLI(x3, x2, 4);
-                BEQZ(x3, 4 + 2 * 4);
-                ADDI(u8, u8, 4);
-                MV(x2, x3);
-                ANDI(x2, x2, 0b1111);
-                TABLE64(x3, (uintptr_t)&lead0tab);
-                ADD(x3, x3, x2);
-                LBU(x2, x3, 0);
-                ADD(gd, u8, x2);
-            }
+            CLZxw(gd, ed, 0, x1, x2, x3);
+            ADDI(x1, xZR, rex.w ? 63 : 31);
+            SUB(gd, x1, gd);
             GWBACK;
             break;
         case 0xBE:
diff --git a/src/dynarec/rv64/dynarec_rv64_f30f.c b/src/dynarec/rv64/dynarec_rv64_f30f.c
index 45a6a2cf..bebcc551 100644
--- a/src/dynarec/rv64/dynarec_rv64_f30f.c
+++ b/src/dynarec/rv64/dynarec_rv64_f30f.c
@@ -408,18 +408,7 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             MOV32w(gd, rex.w ? 64 : 32);
             B_NEXT_nocond;
             MARK;
-            if (rv64_zbb) {
-                CTZxw(gd, ed);
-            } else {
-                NEG(x2, ed);
-                AND(x2, x2, ed);
-                TABLE64(x3, 0x03f79d71b4ca8b09ULL);
-                MUL(x2, x2, x3);
-                SRLI(x2, x2, 64 - 6);
-                TABLE64(x1, (uintptr_t)&deBruijn64tab);
-                ADD(x1, x1, x2);
-                LBU(gd, x1, 0);
-            }
+            CTZxw(gd, ed, rex.w, x1, x2);
             BNE(gd, xZR, 4 + 4);
             ORI(xFlags, xFlags, 1 << F_ZF);
             break;
@@ -440,42 +429,7 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             ORI(xFlags, xFlags, 1 << F_CF);
             B_NEXT_nocond;
             MARK;
-            if (rv64_zbb) {
-                CLZxw(gd, ed);
-            } else {
-                if (ed != gd)
-                    u8 = gd;
-                else
-                    u8 = x1;
-                ADDI(u8, xZR, rex.w ? 63 : 31);
-                if (rex.w) {
-                    MV(x2, ed);
-                    SRLI(x3, x2, 32);
-                    BEQZ(x3, 4 + 2 * 4);
-                    SUBI(u8, u8, 32);
-                    MV(x2, x3);
-                } else {
-                    AND(x2, ed, xMASK);
-                }
-                SRLI(x3, x2, 16);
-                BEQZ(x3, 4 + 2 * 4);
-                SUBI(u8, u8, 16);
-                MV(x2, x3);
-                SRLI(x3, x2, 8);
-                BEQZ(x3, 4 + 2 * 4);
-                SUBI(u8, u8, 8);
-                MV(x2, x3);
-                SRLI(x3, x2, 4);
-                BEQZ(x3, 4 + 2 * 4);
-                SUBI(u8, u8, 4);
-                MV(x2, x3);
-                ANDI(x2, x2, 0b1111);
-                TABLE64(x3, (uintptr_t)&lead0tab);
-                ADD(x3, x3, x2);
-                LBU(x2, x3, 0);
-                SUB(gd, u8, x2);
-                MARK2;
-            }
+            CLZxw(gd, ed, rex.w, x1, x2, x3);
             ANDI(xFlags, xFlags, ~((1 << F_ZF) | (1 << F_CF)));
             BNE(gd, xZR, 4 + 4);
             ORI(xFlags, xFlags, 1 << F_ZF);
diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h
index 6b675f6e..87e5be54 100644
--- a/src/dynarec/rv64/rv64_emitter.h
+++ b/src/dynarec/rv64/rv64_emitter.h
@@ -723,13 +723,70 @@ f28–31  ft8–11  FP temporaries                  Caller
 // Count leading zero bits in word
 #define CLZW(rd, rs) EMIT(R_type(0b0110000, 0b00000, rs, 0b001, rd, 0b0011011))
 // Count leading zero bits
-#define CLZxw(rd, rs) EMIT(R_type(0b0110000, 0b00000, rs, 0b001, rd, rex.w ? 0b0010011 : 0b0011011))
+#define CLZxw(rd, rs, x, s1, s2, s3)       \
+    if (rv64_zbb) {                        \
+        if (x)                             \
+            CLZ(rd, rs);                   \
+        else                               \
+            CLZW(rd, rs);                  \
+    } else {                               \
+        if (rs != rd)                      \
+            u8 = rd;                       \
+        else                               \
+            u8 = s1;                       \
+        ADDI(u8, xZR, rex.w ? 63 : 31);    \
+        if (rex.w) {                       \
+            MV(s2, rs);                    \
+            SRLI(s3, s2, 32);              \
+            BEQZ(s3, 4 + 2 * 4);           \
+            SUBI(u8, u8, 32);              \
+            MV(s2, s3);                    \
+        } else {                           \
+            AND(s2, rs, xMASK);            \
+        }                                  \
+        SRLI(s3, s2, 16);                  \
+        BEQZ(s3, 4 + 2 * 4);               \
+        SUBI(u8, u8, 16);                  \
+        MV(s2, s3);                        \
+        SRLI(s3, s2, 8);                   \
+        BEQZ(s3, 4 + 2 * 4);               \
+        SUBI(u8, u8, 8);                   \
+        MV(s2, s3);                        \
+        SRLI(s3, s2, 4);                   \
+        BEQZ(s3, 4 + 2 * 4);               \
+        SUBI(u8, u8, 4);                   \
+        MV(s2, s3);                        \
+        ANDI(s2, s2, 0b1111);              \
+        TABLE64(s3, (uintptr_t)&lead0tab); \
+        ADD(s3, s3, s2);                   \
+        LBU(s2, s3, 0);                    \
+        SUB(rd, u8, s2);                   \
+    }
+
 // Count trailing zero bits
 #define CTZ(rd, rs) EMIT(R_type(0b0110000, 0b00001, rs, 0b001, rd, 0b0010011))
 // Count trailing zero bits in word
 #define CTZW(rd, rs) EMIT(R_type(0b0110000, 0b00001, rs, 0b001, rd, 0b0011011))
 // Count trailing zero bits
-#define CTZxw(rd, rs) EMIT(R_type(0b0110000, 0b00001, rs, 0b001, rd, rex.w ? 0b0010011 : 0b0011011))
+// BEWARE: You should take care of the all zeros situation yourself,
+//         and clear the high 32bit when x is 1.
+#define CTZxw(rd, rs, x, s1, s2)                \
+    if (rv64_zbb) {                             \
+        if (x)                                  \
+            CTZ(rd, rs);                        \
+        else                                    \
+            CTZW(rd, rs);                       \
+    } else {                                    \
+        NEG(s2, ed);                            \
+        AND(s2, s2, ed);                        \
+        TABLE64(x3, 0x03f79d71b4ca8b09ULL);     \
+        MUL(s2, s2, x3);                        \
+        SRLI(s2, s2, 64 - 6);                   \
+        TABLE64(s1, (uintptr_t)&deBruijn64tab); \
+        ADD(s1, s1, s2);                        \
+        LBU(gd, s1, 0);                         \
+    }
+
 // Count set bits
 #define CPOP(rd, rs) EMIT(R_type(0b0110000, 0b00010, rs, 0b001, rd, 0b0010011))
 // Count set bits in word