diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2025-03-04 15:07:45 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-03-04 08:07:45 +0100 |
| commit | b015f333765b8b9e23113d206cd8dca3f627f447 (patch) | |
| tree | 35b7ed06874b070d54bfd0df3833f6157e61a15f /src | |
| parent | 2636e7e8b54dd1aa96139b6e609f8920813d8a6e (diff) | |
| download | box64-b015f333765b8b9e23113d206cd8dca3f627f447.tar.gz box64-b015f333765b8b9e23113d206cd8dca3f627f447.zip | |
[RV64_DYNAREC] Minor optimizations to sign mask extraction instructions for vector (#2416)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f_vector.c | 6 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f_vector.c | 11 |
2 files changed, 5 insertions, 12 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c index ddcf0b80..b002aa3d 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c @@ -298,12 +298,10 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); GETGD; GETEX_vector(q0, 0, 0, VECTOR_SEW32); - v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL8); - VSRL_VI(v0, q0, 31, VECTOR_UNMASKED); if (rv64_xtheadvector) { // Force the element width to 4bit vector_vsetvli(dyn, ninst, x4, VECTOR_SEW32, VECTOR_LMUL8, 1); - VMSNE_VX(VMASK, v0, xZR, VECTOR_UNMASKED); + VMSLT_VX(VMASK, q0, xZR, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, x4, VECTOR_SEW32, VECTOR_LMUL1, 1); VMV_X_S(x4, VMASK); BEXTI(gd, x4, 12); @@ -314,7 +312,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, BEXTI(x5, x4, 0); ADDSL(gd, x5, gd, 1, x6); } else { - VMSNE_VX(VMASK, v0, xZR, VECTOR_UNMASKED); + VMSLT_VX(VMASK, q0, xZR, VECTOR_UNMASKED); VMV_X_S(gd, VMASK); ANDI(gd, gd, 0xF); } diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c index f206d7d5..eff7aaa4 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c @@ -917,21 +917,18 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); GETGD; GETEX_vector(q0, 0, 0, VECTOR_SEW64); - v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); ADDI(x4, xZR, 63); - VSRL_VX(v0, q0, x4, VECTOR_UNMASKED); if (rv64_xtheadvector) { // Force the mask element width to 32 vector_vsetvli(dyn, ninst, x1, VECTOR_SEW64, VECTOR_LMUL2, 1); - } - VMSNE_VX(VMASK, v0, xZR, VECTOR_UNMASKED); - if (rv64_xtheadvector) { + VMSLT_VX(VMASK, q0, xZR, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW64, VECTOR_LMUL1, 1); VMV_X_S(x4, VMASK); ANDI(gd, x4, 1); SRLI(x4, x4, 31); OR(gd, gd, x4); } else { + VMSLT_VX(VMASK, q0, xZR, VECTOR_UNMASKED); VMV_X_S(x4, VMASK); ANDI(gd, x4, 0b11); } @@ -1863,13 +1860,11 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETGD; GETEX_vector(q0, 0, 0, VECTOR_SEW8); - v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL8); - VSRL_VI(v0, q0, 7, VECTOR_UNMASKED); if (rv64_xtheadvector) { // Force the element width to 1bit vector_vsetvli(dyn, ninst, x4, VECTOR_SEW8, VECTOR_LMUL8, 1); } - VMSNE_VX(VMASK, v0, xZR, VECTOR_UNMASKED); + VMSLT_VX(VMASK, q0, xZR, VECTOR_UNMASKED); SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); VMV_X_S(gd, VMASK); if (!rv64_xtheadvector) { ZEXTH(gd, gd); } |