From b015f333765b8b9e23113d206cd8dca3f627f447 Mon Sep 17 00:00:00 2001 From: Yang Liu Date: Tue, 4 Mar 2025 15:07:45 +0800 Subject: [RV64_DYNAREC] Minor optimizations to sign mask extraction instructions for vector (#2416) --- src/dynarec/rv64/dynarec_rv64_0f_vector.c | 6 ++---- src/dynarec/rv64/dynarec_rv64_660f_vector.c | 11 +++-------- 2 files changed, 5 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c index ddcf0b80..b002aa3d 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c @@ -298,12 +298,10 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); GETGD; GETEX_vector(q0, 0, 0, VECTOR_SEW32); - v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL8); - VSRL_VI(v0, q0, 31, VECTOR_UNMASKED); if (rv64_xtheadvector) { // Force the element width to 4bit vector_vsetvli(dyn, ninst, x4, VECTOR_SEW32, VECTOR_LMUL8, 1); - VMSNE_VX(VMASK, v0, xZR, VECTOR_UNMASKED); + VMSLT_VX(VMASK, q0, xZR, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, x4, VECTOR_SEW32, VECTOR_LMUL1, 1); VMV_X_S(x4, VMASK); BEXTI(gd, x4, 12); @@ -314,7 +312,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, BEXTI(x5, x4, 0); ADDSL(gd, x5, gd, 1, x6); } else { - VMSNE_VX(VMASK, v0, xZR, VECTOR_UNMASKED); + VMSLT_VX(VMASK, q0, xZR, VECTOR_UNMASKED); VMV_X_S(gd, VMASK); ANDI(gd, gd, 0xF); } diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c index f206d7d5..eff7aaa4 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c @@ -917,21 +917,18 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); GETGD; GETEX_vector(q0, 0, 0, VECTOR_SEW64); - v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); ADDI(x4, xZR, 63); - VSRL_VX(v0, q0, x4, VECTOR_UNMASKED); if (rv64_xtheadvector) { // Force the mask element width to 32 vector_vsetvli(dyn, ninst, x1, VECTOR_SEW64, VECTOR_LMUL2, 1); - } - VMSNE_VX(VMASK, v0, xZR, VECTOR_UNMASKED); - if (rv64_xtheadvector) { + VMSLT_VX(VMASK, q0, xZR, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW64, VECTOR_LMUL1, 1); VMV_X_S(x4, VMASK); ANDI(gd, x4, 1); SRLI(x4, x4, 31); OR(gd, gd, x4); } else { + VMSLT_VX(VMASK, q0, xZR, VECTOR_UNMASKED); VMV_X_S(x4, VMASK); ANDI(gd, x4, 0b11); } @@ -1863,13 +1860,11 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETGD; GETEX_vector(q0, 0, 0, VECTOR_SEW8); - v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL8); - VSRL_VI(v0, q0, 7, VECTOR_UNMASKED); if (rv64_xtheadvector) { // Force the element width to 1bit vector_vsetvli(dyn, ninst, x4, VECTOR_SEW8, VECTOR_LMUL8, 1); } - VMSNE_VX(VMASK, v0, xZR, VECTOR_UNMASKED); + VMSLT_VX(VMASK, q0, xZR, VECTOR_UNMASKED); SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); VMV_X_S(gd, VMASK); if (!rv64_xtheadvector) { ZEXTH(gd, gd); } -- cgit 1.4.1