From d476217f4c53ed5697086fd477cc4058e78fbae1 Mon Sep 17 00:00:00 2001 From: Yang Liu Date: Fri, 22 Aug 2025 01:15:59 +0800 Subject: [RV64_DYNAREC] Fixed more scalar avx opcodes (#2959) * [RV64_DYNAREC] Fixed more scalar avx opcodes * oops --- src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c b/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c index 28558f14..0a3ef423 100644 --- a/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c +++ b/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c @@ -65,9 +65,8 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } for (int i = 0; i < 16; ++i) { - LBU(x3, wback, fixedaddress + i); - ANDI(x4, x3, 128); - BEQZ(x4, 4 + 4 * 2); + LB(x3, wback, fixedaddress + i); + BGE(x3, xZR, 4 + 4 * 2); SB(xZR, gback, gdoffset + i); J(4 + 4 * 4); // continue ANDI(x4, x3, 15); @@ -87,15 +86,14 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i vyoffset = 0; } for (int i = 0; i < 16; ++i) { - LBU(x3, wback, fixedaddress + i); - ANDI(x4, x3, 128); - BEQZ(x4, 4 + 4 * 2); - SB(xZR, gback, gdoffset + i); + LB(x3, wback, fixedaddress + i); + BGE(x3, xZR, 4 + 4 * 2); + SB(xZR, gback, gyoffset + i); J(4 + 4 * 4); // continue ANDI(x4, x3, 15); ADD(x4, x4, vback); - LBU(x4, x4, vxoffset); - SB(x4, gback, gdoffset + i); + LBU(x4, x4, vyoffset); + SB(x4, gback, gyoffset + i); } } else { SD(xZR, gback, gyoffset + 0); @@ -243,7 +241,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i LD(x3, gback, gyoffset + 0); SD(x3, gback, gyoffset + 8); } else { - for (int i = 0; i < 4; ++i) { + for (int i = 0; i < 2; ++i) { // GY->sd[4+i] = EY->sd[i*2+0]+EY->sd[i*2+1]; LW(x3, wback, fixedaddress + 4 * (i * 2 + 0)); LW(x4, wback, fixedaddress + 4 * (i * 2 + 1)); @@ -434,7 +432,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i LD(x3, gback, gyoffset + 0); SD(x3, gback, gyoffset + 8); } else { - for (int i = 0; i < 4; ++i) { + for (int i = 0; i < 2; ++i) { // GY->sd[4+i] = EY->sd[i*2+0]-EY->sd[i*2+1]; LW(x3, wback, fixedaddress + 4 * (i * 2 + 0)); LW(x4, wback, fixedaddress + 4 * (i * 2 + 1)); -- cgit 1.4.1