diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2025-08-19 20:17:10 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-08-19 14:17:10 +0200 |
| commit | 44448774a7be9ad7dbc97ff566bd1166974dbf75 (patch) | |
| tree | e20d50bfbd713d44cb370324fa0d51d87fd98805 | |
| parent | a280884f0986f9658547d2bdd4b515e0beb28259 (diff) | |
| download | box64-44448774a7be9ad7dbc97ff566bd1166974dbf75.tar.gz box64-44448774a7be9ad7dbc97ff566bd1166974dbf75.zip | |
[RV64_DYNAREC] Added more scaalr avx opcodes (#2952)
| -rw-r--r-- | CMakeLists.txt | 1 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f38.c | 30 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_avx.c | 2 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c | 58 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_avx_66_0f3a.c | 87 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 2 |
6 files changed, 164 insertions, 16 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 527e913f..b46ac047 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1059,6 +1059,7 @@ if(RV64_DYNAREC) "${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_avx_0f.c" "${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_avx_66_0f.c" "${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c" + "${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_avx_66_0f3a.c" "${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c" ) endif() diff --git a/src/dynarec/rv64/dynarec_rv64_660f38.c b/src/dynarec/rv64/dynarec_rv64_660f38.c index 4e33ca33..aca87f82 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f38.c +++ b/src/dynarec/rv64/dynarec_rv64_660f38.c @@ -313,31 +313,29 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, nextop = F8; SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); GETGX(); - GETEX(x2, 0, 8); + GETEX(x1, 0, 8); CLEAR_FLAGS(); SET_DFNONE(); IFX (X_ZF | X_CF) { - LD(x5, wback, fixedaddress + 0); - LD(x6, wback, fixedaddress + 8); + LD(x2, wback, fixedaddress + 0); + LD(x3, wback, fixedaddress + 8); + LD(x4, gback, gdoffset + 0); + LD(x5, gback, gdoffset + 8); IFX (X_ZF) { - LD(x3, gback, gdoffset + 0); - LD(x4, gback, gdoffset + 8); - AND(x3, x3, x5); - AND(x4, x4, x6); - OR(x3, x3, x4); - BNEZ(x3, 8); + AND(x6, x4, x2); + AND(x7, x5, x3); + OR(x6, x6, x7); + BNEZ(x6, 4 + 4); ORI(xFlags, xFlags, 1 << F_ZF); } IFX (X_CF) { - LD(x3, gback, gdoffset + 0); - NOT(x3, x3); - LD(x4, gback, gdoffset + 8); NOT(x4, x4); - AND(x3, x3, x5); - AND(x4, x4, x6); - OR(x3, x3, x4); - BNEZ(x3, 8); + NOT(x5, x5); + AND(x6, x4, x2); + AND(x7, x5, x3); + OR(x6, x6, x7); + BNEZ(x6, 4 + 4); ORI(xFlags, xFlags, 1 << F_CF); } } diff --git a/src/dynarec/rv64/dynarec_rv64_avx.c b/src/dynarec/rv64/dynarec_rv64_avx.c index 6209ccff..12cce037 100644 --- a/src/dynarec/rv64/dynarec_rv64_avx.c +++ b/src/dynarec/rv64/dynarec_rv64_avx.c @@ -56,6 +56,8 @@ uintptr_t dynarec64_AVX(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int n addr = dynarec64_AVX_F3_0F(dyn, addr, ip, ninst, vex, ok, need_epilog); else if ((vex.m == VEX_M_0F38) && (vex.p == VEX_P_66)) addr = dynarec64_AVX_66_0F38(dyn, addr, ip, ninst, vex, ok, need_epilog); + else if ((vex.m == VEX_M_0F3A) && (vex.p == VEX_P_66)) + addr = dynarec64_AVX_66_0F3A(dyn, addr, ip, ninst, vex, ok, need_epilog); else { DEFAULT; } diff --git a/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c b/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c index fc263e8f..bef35991 100644 --- a/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c +++ b/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c @@ -580,6 +580,64 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SD(xZR, gback, gyoffset + 8); } break; + case 0x17: + INST_NAME("VPTEST Gx, Ex"); + nextop = F8; + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); + GETEX(x1, 0, vex.l ? 24 : 8); + GETGX(); + CLEAR_FLAGS(); + SET_DFNONE(); + IFX (X_ZF | X_CF) { + LD(x2, wback, fixedaddress + 0); + LD(x3, wback, fixedaddress + 8); + LD(x4, gback, gdoffset + 0); + LD(x5, gback, gdoffset + 8); + + IFX (X_ZF) { + AND(x6, x4, x2); + AND(x7, x5, x3); + OR(x6, x6, x7); + BNEZ(x6, 4 + 4); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX (X_CF) { + NOT(x4, x4); + NOT(x5, x5); + AND(x6, x4, x2); + AND(x7, x5, x3); + OR(x6, x6, x7); + BNEZ(x3, 4 + 4); + ORI(xFlags, xFlags, 1 << F_CF); + } + } + if (vex.l) { + GETEY(); + LD(x2, wback, fixedaddress + 0); + LD(x3, wback, fixedaddress + 8); + LD(x4, gback, gyoffset + 0); + LD(x5, gback, gyoffset + 8); + + IFX (X_ZF) { + AND(x6, x4, x2); + AND(x7, x5, x3); + OR(x6, x6, x7); + BNEZ(x6, 4 + 2 * 4); + ANDI(x6, xFlags, 1 << F_ZF); + OR(xFlags, xFlags, x6); + } + IFX (X_CF) { + NOT(x4, x4); + NOT(x5, x5); + AND(x6, x4, x2); + AND(x7, x5, x3); + OR(x6, x6, x7); + BNEZ(x6, 4 + 2 * 4); + ANDI(x6, xFlags, 1 << F_CF); + OR(xFlags, xFlags, x6); + } + } + break; default: DEFAULT; } diff --git a/src/dynarec/rv64/dynarec_rv64_avx_66_0f3a.c b/src/dynarec/rv64/dynarec_rv64_avx_66_0f3a.c new file mode 100644 index 00000000..d3a3b702 --- /dev/null +++ b/src/dynarec/rv64/dynarec_rv64_avx_66_0f3a.c @@ -0,0 +1,87 @@ +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <errno.h> + +#include "debug.h" +#include "box64context.h" +#include "box64cpu.h" +#include "emu/x64emu_private.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_native.h" +#include "my_cpuid.h" +#include "emu/x87emu_private.h" +#include "emu/x64shaext.h" + +#include "rv64_printer.h" +#include "dynarec_rv64_private.h" +#include "dynarec_rv64_functions.h" +#include "../dynarec_helper.h" + +uintptr_t dynarec64_AVX_66_0F3A(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog) +{ + (void)ip; + (void)need_epilog; + + uint8_t opcode = F8; + uint8_t nextop, u8; + uint8_t gd, ed, vd; + uint8_t wback, wb1, wb2, gback, vback; + uint8_t eb1, eb2, gb1, gb2; + int32_t i32, i32_; + int cacheupd = 0; + int v0, v1, v2; + int q0, q1, q2; + int d0, d1, d2; + int s0; + uint64_t tmp64u, u64; + int64_t j64; + int64_t fixedaddress, gdoffset, vxoffset, gyoffset, vyoffset; + int unscaled; + + rex_t rex = vex.rex; + + switch (opcode) { + case 0x4A: + INST_NAME("VBLENDVPS Gx, Vx, Ex, XMMImm8"); + nextop = F8; + u8 = geted_ib(dyn, addr, ninst, nextop) >> 4; + GETEX(x1, 1, vex.l ? 28 : 12); + GETGX(); + GETVX(); + GETGY(); + F8; + for (int i = 0; i < 4; ++i) { + LW(x3, xEmu, offsetof(x64emu_t, xmm) + u8 * 16 + i * 4); + LWU(x4, wback, fixedaddress + i * 4); + LWU(x5, vback, vxoffset + i * 4); + BGE(x3, xZR, 4 + 4); + MV(x5, x4); + MV(x3, x5); + SW(x3, gback, gdoffset + i * 4); + } + if (vex.l) { + GETEY(); + for (int i = 0; i < 4; ++i) { + LW(x3, xEmu, offsetof(x64emu_t, ymm) + u8 * 16 + i * 4); + LWU(x4, wback, fixedaddress + i * 4); + LWU(x5, vback, vyoffset + i * 4); + BGE(x3, xZR, 4 + 4); + MV(x5, x4); + MV(x3, x5); + SW(x3, gback, gyoffset + i * 4); + } + } else { + SD(xZR, gback, gyoffset + 0); + SD(xZR, gback, gyoffset + 8); + } + break; + default: + DEFAULT; + } + return addr; +} diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 8f01750d..b5ac7225 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -1293,6 +1293,7 @@ #define dynarec64_AVX_0F STEPNAME(dynarec64_AVX_0F) #define dynarec64_AVX_66_0F STEPNAME(dynarec64_AVX_66_0F) #define dynarec64_AVX_66_0F38 STEPNAME(dynarec64_AVX_66_0F38) +#define dynarec64_AVX_66_0F3A STEPNAME(dynarec64_AVX_66_0F3A) #define dynarec64_AVX_F3_0F STEPNAME(dynarec64_AVX_F3_0F) #define geted STEPNAME(geted) @@ -1745,6 +1746,7 @@ uintptr_t dynarec64_AVX(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int n uintptr_t dynarec64_AVX_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog); uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog); uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog); +uintptr_t dynarec64_AVX_66_0F3A(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog); uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog); #if STEP < 2 |