about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2025-03-04 15:07:45 +0800
committerGitHub <noreply@github.com>2025-03-04 08:07:45 +0100
commitb015f333765b8b9e23113d206cd8dca3f627f447 (patch)
tree35b7ed06874b070d54bfd0df3833f6157e61a15f /src
parent2636e7e8b54dd1aa96139b6e609f8920813d8a6e (diff)
downloadbox64-b015f333765b8b9e23113d206cd8dca3f627f447.tar.gz
box64-b015f333765b8b9e23113d206cd8dca3f627f447.zip
[RV64_DYNAREC] Minor optimizations to sign mask extraction instructions for vector (#2416)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f_vector.c6
-rw-r--r--src/dynarec/rv64/dynarec_rv64_660f_vector.c11
2 files changed, 5 insertions, 12 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c
index ddcf0b80..b002aa3d 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c
@@ -298,12 +298,10 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
             SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
             GETGD;
             GETEX_vector(q0, 0, 0, VECTOR_SEW32);
-            v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL8);
-            VSRL_VI(v0, q0, 31, VECTOR_UNMASKED);
             if (rv64_xtheadvector) {
                 // Force the element width to 4bit
                 vector_vsetvli(dyn, ninst, x4, VECTOR_SEW32, VECTOR_LMUL8, 1);
-                VMSNE_VX(VMASK, v0, xZR, VECTOR_UNMASKED);
+                VMSLT_VX(VMASK, q0, xZR, VECTOR_UNMASKED);
                 vector_vsetvli(dyn, ninst, x4, VECTOR_SEW32, VECTOR_LMUL1, 1);
                 VMV_X_S(x4, VMASK);
                 BEXTI(gd, x4, 12);
@@ -314,7 +312,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
                 BEXTI(x5, x4, 0);
                 ADDSL(gd, x5, gd, 1, x6);
             } else {
-                VMSNE_VX(VMASK, v0, xZR, VECTOR_UNMASKED);
+                VMSLT_VX(VMASK, q0, xZR, VECTOR_UNMASKED);
                 VMV_X_S(gd, VMASK);
                 ANDI(gd, gd, 0xF);
             }
diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c
index f206d7d5..eff7aaa4 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c
@@ -917,21 +917,18 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
             SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
             GETGD;
             GETEX_vector(q0, 0, 0, VECTOR_SEW64);
-            v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2);
             ADDI(x4, xZR, 63);
-            VSRL_VX(v0, q0, x4, VECTOR_UNMASKED);
             if (rv64_xtheadvector) {
                 // Force the mask element width to 32
                 vector_vsetvli(dyn, ninst, x1, VECTOR_SEW64, VECTOR_LMUL2, 1);
-            }
-            VMSNE_VX(VMASK, v0, xZR, VECTOR_UNMASKED);
-            if (rv64_xtheadvector) {
+                VMSLT_VX(VMASK, q0, xZR, VECTOR_UNMASKED);
                 vector_vsetvli(dyn, ninst, x1, VECTOR_SEW64, VECTOR_LMUL1, 1);
                 VMV_X_S(x4, VMASK);
                 ANDI(gd, x4, 1);
                 SRLI(x4, x4, 31);
                 OR(gd, gd, x4);
             } else {
+                VMSLT_VX(VMASK, q0, xZR, VECTOR_UNMASKED);
                 VMV_X_S(x4, VMASK);
                 ANDI(gd, x4, 0b11);
             }
@@ -1863,13 +1860,11 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
             SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
             GETGD;
             GETEX_vector(q0, 0, 0, VECTOR_SEW8);
-            v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL8);
-            VSRL_VI(v0, q0, 7, VECTOR_UNMASKED);
             if (rv64_xtheadvector) {
                 // Force the element width to 1bit
                 vector_vsetvli(dyn, ninst, x4, VECTOR_SEW8, VECTOR_LMUL8, 1);
             }
-            VMSNE_VX(VMASK, v0, xZR, VECTOR_UNMASKED);
+            VMSLT_VX(VMASK, q0, xZR, VECTOR_UNMASKED);
             SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
             VMV_X_S(gd, VMASK);
             if (!rv64_xtheadvector) { ZEXTH(gd, gd); }