diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2025-08-18 20:12:24 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-08-18 14:12:24 +0200 |
| commit | 1c2e763ffbff668851ab0845dee3d4f2072a0e36 (patch) | |
| tree | c1e932ceff58067f27a829168117fe3f468f0a46 /src | |
| parent | 5f144d8ddde847fd6e99d1739940cfd3cbf3779e (diff) | |
| download | box64-1c2e763ffbff668851ab0845dee3d4f2072a0e36.tar.gz box64-1c2e763ffbff668851ab0845dee3d4f2072a0e36.zip | |
[RV64_DYNAREC] Added a few more scalar AVX 66 0F38 opcodes (#2949)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f38.c | 4 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_avx.c | 2 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_avx_0f.c | 22 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_avx_66_0f.c | 26 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c | 402 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c | 22 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 15 |
7 files changed, 451 insertions, 42 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_660f38.c b/src/dynarec/rv64/dynarec_rv64_660f38.c index df87f2f7..b3088fd2 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f38.c +++ b/src/dynarec/rv64/dynarec_rv64_660f38.c @@ -70,9 +70,9 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, for (int i = 0; i < 16; ++i) { LBU(x3, wback, fixedaddress + i); ANDI(x4, x3, 128); - BEQZ(x4, 12); + BEQZ(x4, 4 + 4 * 2); SB(xZR, gback, gdoffset + i); - BEQZ(xZR, 20); // continue + J(4 + 4 * 4); // continue ANDI(x4, x3, 15); ADD(x4, x4, x5); LBU(x4, x4, 0); diff --git a/src/dynarec/rv64/dynarec_rv64_avx.c b/src/dynarec/rv64/dynarec_rv64_avx.c index 0d4a6f2c..6209ccff 100644 --- a/src/dynarec/rv64/dynarec_rv64_avx.c +++ b/src/dynarec/rv64/dynarec_rv64_avx.c @@ -54,6 +54,8 @@ uintptr_t dynarec64_AVX(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int n addr = dynarec64_AVX_66_0F(dyn, addr, ip, ninst, vex, ok, need_epilog); else if ((vex.m == VEX_M_0F) && (vex.p == VEX_P_F3)) addr = dynarec64_AVX_F3_0F(dyn, addr, ip, ninst, vex, ok, need_epilog); + else if ((vex.m == VEX_M_0F38) && (vex.p == VEX_P_66)) + addr = dynarec64_AVX_66_0F38(dyn, addr, ip, ninst, vex, ok, need_epilog); else { DEFAULT; } diff --git a/src/dynarec/rv64/dynarec_rv64_avx_0f.c b/src/dynarec/rv64/dynarec_rv64_avx_0f.c index 95207210..6b4b812d 100644 --- a/src/dynarec/rv64/dynarec_rv64_avx_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_avx_0f.c @@ -30,7 +30,7 @@ uintptr_t dynarec64_AVX_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, in uint8_t opcode = F8; uint8_t nextop, u8; uint8_t gd, ed, vd; - uint8_t wback, wb1, wb2, gback, vback, gyback; + uint8_t wback, wb1, wb2, gback, vback; uint8_t eb1, eb2, gb1, gb2; int32_t i32, i32_; int cacheupd = 0; @@ -59,12 +59,12 @@ uintptr_t dynarec64_AVX_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, in if (vex.l) { GETEY(); LD(x3, wback, fixedaddress); - SD(x3, gyback, gyoffset); + SD(x3, gback, gyoffset); LD(x3, wback, fixedaddress + 8); - SD(x3, gyback, gyoffset + 8); + SD(x3, gback, gyoffset + 8); } else { - SD(xZR, gyback, gyoffset); - SD(xZR, gyback, gyoffset + 8); + SD(xZR, gback, gyoffset); + SD(xZR, gback, gyoffset + 8); } break; case 0x29: @@ -79,9 +79,9 @@ uintptr_t dynarec64_AVX_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, in if (vex.l) { GETEY(); GETGY(); - LD(x3, gyback, gyoffset); + LD(x3, gback, gyoffset); SD(x3, wback, fixedaddress); - LD(x3, gyback, gyoffset + 8); + LD(x3, gback, gyoffset + 8); SD(x3, wback, fixedaddress + 8); } else if (MODREG) { GETEY(); @@ -147,11 +147,11 @@ uintptr_t dynarec64_AVX_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, in FLW(s1, wback, fixedaddress + 12); FCVTDS(s0, s0); FCVTDS(s1, s1); - FSD(s0, gyback, gyoffset + 0); - FSD(s1, gyback, gyoffset + 8); + FSD(s0, gback, gyoffset + 0); + FSD(s1, gback, gyoffset + 8); } else { - FSD(xZR, gyback, gyoffset + 0); - FSD(xZR, gyback, gyoffset + 8); + FSD(xZR, gback, gyoffset + 0); + FSD(xZR, gback, gyoffset + 8); } break; default: diff --git a/src/dynarec/rv64/dynarec_rv64_avx_66_0f.c b/src/dynarec/rv64/dynarec_rv64_avx_66_0f.c index 64d2a93f..a360cfa9 100644 --- a/src/dynarec/rv64/dynarec_rv64_avx_66_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_avx_66_0f.c @@ -30,7 +30,7 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, uint8_t opcode = F8; uint8_t nextop, u8; uint8_t gd, ed, vd; - uint8_t wback, wb1, wb2, gback, vback, gyback; + uint8_t wback, wb1, wb2, gback, vback; uint8_t eb1, eb2, gb1, gb2; int32_t i32, i32_; int cacheupd = 0; @@ -59,12 +59,12 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, if (vex.l) { GETEY(); LD(x3, wback, fixedaddress); - SD(x3, gyback, gyoffset); + SD(x3, gback, gyoffset); LD(x3, wback, fixedaddress + 8); - SD(x3, gyback, gyoffset + 8); + SD(x3, gback, gyoffset + 8); } else { - SD(xZR, gyback, gyoffset); - SD(xZR, gyback, gyoffset + 8); + SD(xZR, gback, gyoffset); + SD(xZR, gback, gyoffset + 8); } break; case 0x6E: @@ -75,8 +75,8 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, GETGY(); SD(ed, gback, gdoffset); SD(xZR, gback, gdoffset + 8); - SD(xZR, gyback, gyoffset); - SD(xZR, gyback, gyoffset + 8); + SD(xZR, gback, gyoffset); + SD(xZR, gback, gyoffset + 8); break; case 0x6F: INST_NAME("VMOVDQA Gx, Ex"); @@ -91,12 +91,12 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, if (vex.l) { GETEY(); LD(x3, wback, fixedaddress); - SD(x3, gyback, gyoffset); + SD(x3, gback, gyoffset); LD(x3, wback, fixedaddress + 8); - SD(x3, gyback, gyoffset + 8); + SD(x3, gback, gyoffset + 8); } else { - SD(xZR, gyback, gyoffset); - SD(xZR, gyback, gyoffset + 8); + SD(xZR, gback, gyoffset); + SD(xZR, gback, gyoffset + 8); } break; case 0x7E: @@ -136,9 +136,9 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, SD(x3, wback, fixedaddress + 8); if (vex.l) { GETEY(); - LD(x3, gyback, gyoffset); + LD(x3, gback, gyoffset); SD(x3, wback, fixedaddress); - LD(x3, gyback, gyoffset + 8); + LD(x3, gback, gyoffset + 8); SD(x3, wback, fixedaddress + 8); } else if (MODREG) { SD(xZR, wback, fixedaddress); diff --git a/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c b/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c new file mode 100644 index 00000000..bb41fbdf --- /dev/null +++ b/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c @@ -0,0 +1,402 @@ +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <errno.h> + +#include "debug.h" +#include "box64context.h" +#include "box64cpu.h" +#include "emu/x64emu_private.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_native.h" +#include "my_cpuid.h" +#include "emu/x87emu_private.h" +#include "emu/x64shaext.h" + +#include "rv64_printer.h" +#include "dynarec_rv64_private.h" +#include "dynarec_rv64_functions.h" +#include "../dynarec_helper.h" + +uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog) +{ + (void)ip; + (void)need_epilog; + + uint8_t opcode = F8; + uint8_t nextop, u8; + uint8_t gd, ed, vd; + uint8_t wback, wb1, wb2, gback, vback; + uint8_t eb1, eb2, gb1, gb2; + int32_t i32, i32_; + int cacheupd = 0; + int v0, v1, v2; + int q0, q1, q2; + int d0, d1, d2; + int s0; + uint64_t tmp64u, u64; + int64_t j64; + int64_t fixedaddress, gdoffset, vxoffset, gyoffset, vyoffset; + int unscaled; + + rex_t rex = vex.rex; + + switch (opcode) { + case 0x00: + INST_NAME("VPSHUFB Gx, Vx, Ex"); + nextop = F8; + GETEX(x1, 0, vex.l ? 31 : 15); + GETGX(); + GETVX(); + GETGY(); + GETVY(); + + if (gd == vex.v) { + ADDI(x5, xEmu, offsetof(x64emu_t, scratch)); + LD(x3, vback, vxoffset + 0); + LD(x4, vback, vxoffset + 8); + SD(x3, x5, 0); + SD(x4, x5, 8); + vback = x5; + vxoffset = 0; + } + + for (int i = 0; i < 16; ++i) { + LBU(x3, wback, fixedaddress + i); + ANDI(x4, x3, 128); + BEQZ(x4, 4 + 4 * 2); + SB(xZR, gback, gdoffset + i); + J(4 + 4 * 4); // continue + ANDI(x4, x3, 15); + ADD(x4, x4, vback); + LBU(x4, x4, vxoffset); + SB(x4, gback, gdoffset + i); + } + + if (vex.l) { + GETEY(); + if (gd == vex.v) { + LD(x3, vback, vyoffset + 0); + LD(x4, vback, vyoffset + 8); + SD(x3, x5, 0); + SD(x4, x5, 8); + vback = x5; + vyoffset = 0; + } + for (int i = 0; i < 16; ++i) { + LBU(x3, wback, fixedaddress + i); + ANDI(x4, x3, 128); + BEQZ(x4, 4 + 4 * 2); + SB(xZR, gback, gdoffset + i); + J(4 + 4 * 4); // continue + ANDI(x4, x3, 15); + ADD(x4, x4, vback); + LBU(x4, x4, vxoffset); + SB(x4, gback, gdoffset + i); + } + } else { + SD(xZR, gback, gyoffset + 0); + SD(xZR, gback, gyoffset + 8); + } + break; + case 0x01: + INST_NAME("VPHADDW Gx, Vx, Ex"); + nextop = F8; + GETEX(x1, 0, vex.l ? 46 : 14); + GETGX(); + GETVX(); + GETGY(); + GETVY(); + if (gd == ed) { + ADDI(x5, xEmu, offsetof(x64emu_t, scratch)); + LD(x3, wback, fixedaddress + 0); + LD(x4, wback, fixedaddress + 8); + SD(x3, x5, 0); + SD(x4, x5, 8); + wback = x5; + fixedaddress = 0; + } + for (int i = 0; i < 4; ++i) { + // GX->sw[i] = VX->sw[i*2+0]+VX->sw[i*2+1]; + LH(x3, vback, vxoffset + 2 * (i * 2 + 0)); + LH(x4, vback, vxoffset + 2 * (i * 2 + 1)); + ADDW(x3, x3, x4); + SH(x3, gback, gdoffset + 2 * i); + } + if (MODREG && ed == vex.v) { + // GX->q[1] = GX->q[0]; + LD(x3, gback, gdoffset + 0); + SD(x3, gback, gdoffset + 8); + } else { + for (int i = 0; i < 4; ++i) { + // GX->sw[4+i] = EX->sw[i*2+0]+EX->sw[i*2+1]; + LH(x3, wback, fixedaddress + 2 * (i * 2 + 0)); + LH(x4, wback, fixedaddress + 2 * (i * 2 + 1)); + ADDW(x3, x3, x4); + SH(x3, gback, gdoffset + 2 * (4 + i)); + } + } + if (vex.l) { + GETEY(); + if (gd == ed) { + ADDI(x5, xEmu, offsetof(x64emu_t, scratch)); + LD(x3, wback, fixedaddress + 0); + LD(x4, wback, fixedaddress + 8); + SD(x3, x5, 0); + SD(x4, x5, 8); + wback = x5; + fixedaddress = 0; + } + for (int i = 0; i < 4; ++i) { + // GY->sw[i] = VY->sw[i*2+0]+VY->sw[i*2+1]; + LH(x3, vback, vyoffset + 2 * (i * 2 + 0)); + LH(x4, vback, vyoffset + 2 * (i * 2 + 1)); + ADDW(x3, x3, x4); + SH(x3, gback, gyoffset + 2 * i); + } + if (MODREG && ed == vex.v) { + // GY->q[1] = GY->q[0]; + LD(x3, gback, gyoffset + 0); + SD(x3, gback, gyoffset + 8); + } else { + for (int i = 0; i < 4; ++i) { + // GY->sw[4+i] = EY->sw[i*2+0]+EY->sw[i*2+1]; + LH(x3, wback, fixedaddress + 2 * (i * 2 + 0)); + LH(x4, wback, fixedaddress + 2 * (i * 2 + 1)); + ADDW(x3, x3, x4); + SH(x3, gback, gyoffset + 2 * (4 + i)); + } + } + } else { + SD(xZR, gback, gyoffset + 0); + SD(xZR, gback, gyoffset + 8); + } + break; + case 0x02: + INST_NAME("VPHADDD Gx, Vx, Ex"); + nextop = F8; + GETEX(x1, 0, vex.l ? 44 : 12); + GETGX(); + GETVX(); + GETGY(); + GETVY(); + if (gd == ed) { + ADDI(x5, xEmu, offsetof(x64emu_t, scratch)); + LD(x3, wback, fixedaddress + 0); + LD(x4, wback, fixedaddress + 8); + SD(x3, x5, 0); + SD(x4, x5, 8); + wback = x5; + fixedaddress = 0; + } + for (int i = 0; i < 2; ++i) { + // GX->sd[i] = VX->sd[i*2+0]+VX->sd[i*2+1]; + LW(x3, vback, vxoffset + 4 * (i * 2 + 0)); + LW(x4, vback, vxoffset + 4 * (i * 2 + 1)); + ADDW(x3, x3, x4); + SW(x3, gback, gdoffset + 4 * i); + } + if (MODREG && ed == vex.v) { + // GX->q[1] = GX->q[0]; + LD(x3, gback, gdoffset + 0); + SD(x3, gback, gdoffset + 8); + } else { + for (int i = 0; i < 2; ++i) { + // GX->sd[4+i] = EX->sd[i*2+0]+EX->sd[i*2+1]; + LW(x3, wback, fixedaddress + 4 * (i * 2 + 0)); + LW(x4, wback, fixedaddress + 4 * (i * 2 + 1)); + ADDW(x3, x3, x4); + SW(x3, gback, gdoffset + 4 * (2 + i)); + } + } + if (vex.l) { + GETEY(); + if (gd == ed) { + ADDI(x5, xEmu, offsetof(x64emu_t, scratch)); + LD(x3, wback, fixedaddress + 0); + LD(x4, wback, fixedaddress + 8); + SD(x3, x5, 0); + SD(x4, x5, 8); + wback = x5; + fixedaddress = 0; + } + for (int i = 0; i < 2; ++i) { + // GY->sd[i] = VY->sd[i*2+0]+VY->sd[i*2+1]; + LW(x3, vback, vyoffset + 4 * (i * 2 + 0)); + LW(x4, vback, vyoffset + 4 * (i * 2 + 1)); + ADDW(x3, x3, x4); + SW(x3, gback, gyoffset + 4 * i); + } + if (MODREG && ed == vex.v) { + // GY->q[1] = GY->q[0]; + LD(x3, gback, gyoffset + 0); + SD(x3, gback, gyoffset + 8); + } else { + for (int i = 0; i < 4; ++i) { + // GY->sd[4+i] = EY->sd[i*2+0]+EY->sd[i*2+1]; + LW(x3, wback, fixedaddress + 4 * (i * 2 + 0)); + LW(x4, wback, fixedaddress + 4 * (i * 2 + 1)); + ADDW(x3, x3, x4); + SW(x3, gback, gyoffset + 4 * (2 + i)); + } + } + } else { + SD(xZR, gback, gyoffset + 0); + SD(xZR, gback, gyoffset + 8); + } + break; + case 0x05: + INST_NAME("VPHSUBW Gx, Vx, Ex"); + nextop = F8; + GETEX(x1, 0, vex.l ? 46 : 14); + GETGX(); + GETVX(); + GETGY(); + GETVY(); + if (gd == ed) { + ADDI(x5, xEmu, offsetof(x64emu_t, scratch)); + LD(x3, wback, fixedaddress + 0); + LD(x4, wback, fixedaddress + 8); + SD(x3, x5, 0); + SD(x4, x5, 8); + wback = x5; + fixedaddress = 0; + } + for (int i = 0; i < 4; ++i) { + // GX->sw[i] = VX->sw[i*2+0]-VX->sw[i*2+1]; + LH(x3, vback, vxoffset + 2 * (i * 2 + 0)); + LH(x4, vback, vxoffset + 2 * (i * 2 + 1)); + SUBW(x3, x3, x4); + SH(x3, gback, gdoffset + 2 * i); + } + if (MODREG && ed == vex.v) { + // GX->q[1] = GX->q[0]; + LD(x3, gback, gdoffset + 0); + SD(x3, gback, gdoffset + 8); + } else { + for (int i = 0; i < 4; ++i) { + // GX->sw[4+i] = EX->sw[i*2+0]-EX->sw[i*2+1]; + LH(x3, wback, fixedaddress + 2 * (i * 2 + 0)); + LH(x4, wback, fixedaddress + 2 * (i * 2 + 1)); + SUBW(x3, x3, x4); + SH(x3, gback, gdoffset + 2 * (4 + i)); + } + } + if (vex.l) { + GETEY(); + if (gd == ed) { + ADDI(x5, xEmu, offsetof(x64emu_t, scratch)); + LD(x3, wback, fixedaddress + 0); + LD(x4, wback, fixedaddress + 8); + SD(x3, x5, 0); + SD(x4, x5, 8); + wback = x5; + fixedaddress = 0; + } + for (int i = 0; i < 4; ++i) { + // GY->sw[i] = VY->sw[i*2+0]-VY->sw[i*2+1]; + LH(x3, vback, vyoffset + 2 * (i * 2 + 0)); + LH(x4, vback, vyoffset + 2 * (i * 2 + 1)); + SUBW(x3, x3, x4); + SH(x3, gback, gyoffset + 2 * i); + } + if (MODREG && ed == vex.v) { + // GY->q[1] = GY->q[0]; + LD(x3, gback, gyoffset + 0); + SD(x3, gback, gyoffset + 8); + } else { + for (int i = 0; i < 4; ++i) { + // GY->sw[4+i] = EY->sw[i*2+0]-EY->sw[i*2+1]; + LH(x3, wback, fixedaddress + 2 * (i * 2 + 0)); + LH(x4, wback, fixedaddress + 2 * (i * 2 + 1)); + SUBW(x3, x3, x4); + SH(x3, gback, gyoffset + 2 * (4 + i)); + } + } + } else { + SD(xZR, gback, gyoffset + 0); + SD(xZR, gback, gyoffset + 8); + } + break; + case 0x06: + INST_NAME("VPHSUBD Gx, Vx, Ex"); + nextop = F8; + GETEX(x1, 0, vex.l ? 44 : 12); + GETGX(); + GETVX(); + GETGY(); + GETVY(); + if (gd == ed) { + ADDI(x5, xEmu, offsetof(x64emu_t, scratch)); + LD(x3, wback, fixedaddress + 0); + LD(x4, wback, fixedaddress + 8); + SD(x3, x5, 0); + SD(x4, x5, 8); + wback = x5; + fixedaddress = 0; + } + for (int i = 0; i < 2; ++i) { + // GX->sd[i] = VX->sd[i*2+0]-VX->sd[i*2+1]; + LW(x3, vback, vxoffset + 4 * (i * 2 + 0)); + LW(x4, vback, vxoffset + 4 * (i * 2 + 1)); + SUBW(x3, x3, x4); + SW(x3, gback, gdoffset + 4 * i); + } + if (MODREG && ed == vex.v) { + // GX->q[1] = GX->q[0]; + LD(x3, gback, gdoffset + 0); + SD(x3, gback, gdoffset + 8); + } else { + for (int i = 0; i < 2; ++i) { + // GX->sd[4+i] = EX->sd[i*2+0]-EX->sd[i*2+1]; + LW(x3, wback, fixedaddress + 4 * (i * 2 + 0)); + LW(x4, wback, fixedaddress + 4 * (i * 2 + 1)); + SUBW(x3, x3, x4); + SW(x3, gback, gdoffset + 4 * (2 + i)); + } + } + if (vex.l) { + GETEY(); + if (gd == ed) { + ADDI(x5, xEmu, offsetof(x64emu_t, scratch)); + LD(x3, wback, fixedaddress + 0); + LD(x4, wback, fixedaddress + 8); + SD(x3, x5, 0); + SD(x4, x5, 8); + wback = x5; + fixedaddress = 0; + } + for (int i = 0; i < 2; ++i) { + // GY->sd[i] = VY->sd[i*2+0]-VY->sd[i*2+1]; + LW(x3, vback, vyoffset + 4 * (i * 2 + 0)); + LW(x4, vback, vyoffset + 4 * (i * 2 + 1)); + SUBW(x3, x3, x4); + SW(x3, gback, gyoffset + 4 * i); + } + if (MODREG && ed == vex.v) { + // GY->q[1] = GY->q[0]; + LD(x3, gback, gyoffset + 0); + SD(x3, gback, gyoffset + 8); + } else { + for (int i = 0; i < 4; ++i) { + // GY->sd[4+i] = EY->sd[i*2+0]-EY->sd[i*2+1]; + LW(x3, wback, fixedaddress + 4 * (i * 2 + 0)); + LW(x4, wback, fixedaddress + 4 * (i * 2 + 1)); + SUBW(x3, x3, x4); + SW(x3, gback, gyoffset + 4 * (2 + i)); + } + } + } else { + SD(xZR, gback, gyoffset + 0); + SD(xZR, gback, gyoffset + 8); + } + break; + default: + DEFAULT; + } + return addr; +} diff --git a/src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c b/src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c index 05bad3e7..6dbe2d37 100644 --- a/src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c @@ -30,7 +30,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, uint8_t opcode = F8; uint8_t nextop, u8; uint8_t gd, ed, vd; - uint8_t wback, wb1, wb2, gback, vback, gyback; + uint8_t wback, wb1, wb2, gback, vback; uint8_t eb1, eb2, gb1, gb2; int32_t i32, i32_; int cacheupd = 0; @@ -66,8 +66,8 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, SD(xZR, gback, gdoffset + 8); } GETGY(); - SD(xZR, gyback, gyoffset); - SD(xZR, gyback, gyoffset + 8); + SD(xZR, gback, gyoffset); + SD(xZR, gback, gyoffset + 8); break; case 0x11: INST_NAME("VMOVSS Ex, [Vx,] Gx"); @@ -102,8 +102,8 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, LD(x2, vback, vxoffset + 8); SD(x2, gback, gdoffset + 8); } - SD(xZR, gyback, gyoffset); - SD(xZR, gyback, gyoffset + 8); + SD(xZR, gback, gyoffset); + SD(xZR, gback, gyoffset + 8); break; case 0x5D: INST_NAME("VMINSS Gx, Vx, Ex"); @@ -132,8 +132,8 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, LD(x2, vback, vxoffset + 8); SD(x2, gback, gdoffset + 8); } - SD(xZR, gyback, gyoffset); - SD(xZR, gyback, gyoffset + 8); + SD(xZR, gback, gyoffset); + SD(xZR, gback, gyoffset + 8); break; case 0x5F: INST_NAME("VMAXSS Gx, Vx, Ex"); @@ -162,8 +162,8 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, LD(x2, vback, vxoffset + 8); SD(x2, gback, gdoffset + 8); } - SD(xZR, gyback, gyoffset); - SD(xZR, gyback, gyoffset + 8); + SD(xZR, gback, gyoffset); + SD(xZR, gback, gyoffset + 8); break; case 0xC2: INST_NAME("VCMPSS Gx, Vx, Ex, Ib"); @@ -227,8 +227,8 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, LD(x2, vback, vxoffset + 8); SD(x2, gback, gdoffset + 8); } - SD(xZR, gyback, gyoffset); - SD(xZR, gyback, gyoffset + 8); + SD(xZR, gback, gyoffset); + SD(xZR, gback, gyoffset + 8); break; default: DEFAULT; diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 7fdcad6b..973f21fa 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -460,7 +460,6 @@ #define GETGY() \ gd = ((nextop & 0x38) >> 3) + (rex.r << 3); \ /* TODO: forget */ \ - gyback = xEmu; \ gyoffset = offsetof(x64emu_t, ymm[gd]) #define GETVX() \ @@ -468,6 +467,10 @@ vback = xEmu; \ vxoffset = offsetof(x64emu_t, xmm[vex.v]) +#define GETVY() \ + /* TODO: forget */ \ + vyoffset = offsetof(x64emu_t, ymm[vex.v]); + // Get Ex address in general register a, will purge SS or SD if it's reg and is loaded. May use x3. Use wback as load address! #define GETEX(a, D, I12) \ if (MODREG) { \ @@ -1286,10 +1289,11 @@ #define dynarec64_F20F_vector STEPNAME(dynarec64_F20F_vector) #define dynarec64_F30F_vector STEPNAME(dynarec64_F30F_vector) -#define dynarec64_AVX STEPNAME(dynarec64_AVX) -#define dynarec64_AVX_0F STEPNAME(dynarec64_AVX_0F) -#define dynarec64_AVX_66_0F STEPNAME(dynarec64_AVX_66_0F) -#define dynarec64_AVX_F3_0F STEPNAME(dynarec64_AVX_F3_0F) +#define dynarec64_AVX STEPNAME(dynarec64_AVX) +#define dynarec64_AVX_0F STEPNAME(dynarec64_AVX_0F) +#define dynarec64_AVX_66_0F STEPNAME(dynarec64_AVX_66_0F) +#define dynarec64_AVX_66_0F38 STEPNAME(dynarec64_AVX_66_0F38) +#define dynarec64_AVX_F3_0F STEPNAME(dynarec64_AVX_F3_0F) #define geted STEPNAME(geted) #define geted32 STEPNAME(geted32) @@ -1740,6 +1744,7 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i uintptr_t dynarec64_AVX(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog); uintptr_t dynarec64_AVX_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog); uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog); +uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog); uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog); #if STEP < 2 |