diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2025-03-07 23:36:53 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-03-07 16:36:53 +0100 |
| commit | 946c999e2593f902fe46005b39314f6659d0d7d5 (patch) | |
| tree | 54149b31f11ffbc2c1f387910eb3d5ee7cbcc024 /src | |
| parent | c8b6f80f995d0ee00a46b423a360c38bf20cacee (diff) | |
| download | box64-946c999e2593f902fe46005b39314f6659d0d7d5.tar.gz box64-946c999e2593f902fe46005b39314f6659d0d7d5.zip | |
[RV64_DYNAREC] Rollback some falsy optimization in the xtheadvector path (#2426)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f_vector.c | 4 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f_vector.c | 12 |
2 files changed, 12 insertions, 4 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c index b002aa3d..428dbcf4 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c @@ -299,9 +299,11 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, GETGD; GETEX_vector(q0, 0, 0, VECTOR_SEW32); if (rv64_xtheadvector) { + v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL8); + VSRL_VI(v0, q0, 31, VECTOR_UNMASKED); // Force the element width to 4bit vector_vsetvli(dyn, ninst, x4, VECTOR_SEW32, VECTOR_LMUL8, 1); - VMSLT_VX(VMASK, q0, xZR, VECTOR_UNMASKED); + VMSNE_VX(VMASK, v0, xZR, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, x4, VECTOR_SEW32, VECTOR_LMUL1, 1); VMV_X_S(x4, VMASK); BEXTI(gd, x4, 12); diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c index eff7aaa4..0d2c2097 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c @@ -917,11 +917,13 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); GETGD; GETEX_vector(q0, 0, 0, VECTOR_SEW64); - ADDI(x4, xZR, 63); if (rv64_xtheadvector) { + v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + ADDI(x4, xZR, 63); + VSRL_VX(v0, q0, x4, VECTOR_UNMASKED); // Force the mask element width to 32 vector_vsetvli(dyn, ninst, x1, VECTOR_SEW64, VECTOR_LMUL2, 1); - VMSLT_VX(VMASK, q0, xZR, VECTOR_UNMASKED); + VMSNE_VX(VMASK, v0, xZR, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW64, VECTOR_LMUL1, 1); VMV_X_S(x4, VMASK); ANDI(gd, x4, 1); @@ -1861,10 +1863,14 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETGD; GETEX_vector(q0, 0, 0, VECTOR_SEW8); if (rv64_xtheadvector) { + v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL8); + VSRL_VI(v0, q0, 7, VECTOR_UNMASKED); // Force the element width to 1bit vector_vsetvli(dyn, ninst, x4, VECTOR_SEW8, VECTOR_LMUL8, 1); + VMSNE_VX(VMASK, v0, xZR, VECTOR_UNMASKED); + } else { + VMSLT_VX(VMASK, q0, xZR, VECTOR_UNMASKED); } - VMSLT_VX(VMASK, q0, xZR, VECTOR_UNMASKED); SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); VMV_X_S(gd, VMASK); if (!rv64_xtheadvector) { ZEXTH(gd, gd); } |