diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2025-08-19 19:17:02 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-08-19 13:17:02 +0200 |
| commit | a280884f0986f9658547d2bdd4b515e0beb28259 (patch) | |
| tree | 5413f976ad3f8bd8a69862f90f2db3cbb93b2595 /src | |
| parent | 7435006b7c65d0d2dc58446e3a84fc8d96f4cf0f (diff) | |
| download | box64-a280884f0986f9658547d2bdd4b515e0beb28259.tar.gz box64-a280884f0986f9658547d2bdd4b515e0beb28259.zip | |
[RV64_DYNAREC] Added more scalar avx opcodes (#2951)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_avx_66_0f.c | 62 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c | 132 |
2 files changed, 193 insertions, 1 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_avx_66_0f.c b/src/dynarec/rv64/dynarec_rv64_avx_66_0f.c index a360cfa9..7a530d9e 100644 --- a/src/dynarec/rv64/dynarec_rv64_avx_66_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_avx_66_0f.c @@ -40,7 +40,7 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int s0; uint64_t tmp64u, u64; int64_t j64; - int64_t fixedaddress, gdoffset, vxoffset, gyoffset; + int64_t fixedaddress, gdoffset, vxoffset, gyoffset, vyoffset; int unscaled; rex_t rex = vex.rex; @@ -67,6 +67,35 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, SD(xZR, gback, gyoffset + 8); } break; + case 0x66: + INST_NAME("VPCMPGTD Gx, Vx, Ex"); + nextop = F8; + GETEX(x2, 0, vex.l ? 28 : 12); + GETGX(); + GETVX(); + GETGY(); + GETVY(); + for (int i = 0; i < 4; ++i) { + LW(x3, vback, vxoffset + i * 4); + LW(x4, wback, fixedaddress + i * 4); + SLT(x4, x4, x3); + NEG(x3, x4); + SW(x3, gback, gdoffset + i * 4); + } + if (vex.l) { + GETEY(); + for (int i = 0; i < 4; ++i) { + LW(x3, vback, vyoffset + i * 4); + LW(x4, wback, fixedaddress + i * 4); + SLT(x4, x4, x3); + NEG(x3, x4); + SW(x3, gback, gyoffset + i * 4); + } + } else { + SD(xZR, gback, gyoffset + 0); + SD(xZR, gback, gyoffset + 8); + } + break; case 0x6E: INST_NAME("VMOVD Gx, Ed"); nextop = F8; @@ -145,6 +174,37 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, SD(xZR, wback, fixedaddress + 8); } break; + case 0xEF: + INST_NAME("VPXOR Gx, Vx, Ex"); + nextop = F8; + GETEX(x2, 0, vex.l ? 24 : 8); + GETGX(); + GETVX(); + GETGY(); + GETVY(); + LD(x3, vback, vxoffset + 0); + LD(x4, wback, fixedaddress + 0); + XOR(x3, x3, x4); + SD(x3, gback, gdoffset + 0); + LD(x3, vback, vxoffset + 8); + LD(x4, wback, fixedaddress + 8); + XOR(x3, x3, x4); + SD(x3, gback, gdoffset + 8); + if (vex.l) { + GETEY(); + LD(x3, vback, vyoffset + 0); + LD(x4, wback, fixedaddress + 0); + XOR(x3, x3, x4); + SD(x3, gback, gyoffset + 0); + LD(x3, vback, vyoffset + 8); + LD(x4, wback, fixedaddress + 8); + XOR(x3, x3, x4); + SD(x3, gback, gyoffset + 8); + } else { + SD(xZR, gback, gyoffset + 0); + SD(xZR, gback, gyoffset + 8); + } + break; default: DEFAULT; } diff --git a/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c b/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c index 4e70bc8c..fc263e8f 100644 --- a/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c +++ b/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c @@ -448,6 +448,138 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SD(xZR, gback, gyoffset + 8); } break; + case 0x08: + INST_NAME("VPSIGNB Gx, Vx, Ex"); + nextop = F8; + GETEX(x1, 0, vex.l ? 31 : 15); + GETGX(); + GETVX(); + GETGY(); + GETVY(); + for (int i = 0; i < 16; ++i) { + LB(x3, vback, vxoffset + i); + LB(x4, wback, fixedaddress + i); + SLT(x1, xZR, x4); + SRAI(x5, x4, 63); + OR(x1, x1, x5); + MUL(x3, x1, x3); + SB(x3, gback, gdoffset + i); + } + if (vex.l) { + GETEY(); + for (int i = 0; i < 16; ++i) { + LB(x3, vback, vyoffset + i); + LB(x4, wback, fixedaddress + i); + SLT(x1, xZR, x4); + SRAI(x5, x4, 63); + OR(x1, x1, x5); + MUL(x3, x1, x3); + SB(x3, gback, gyoffset + i); + } + } else { + SD(xZR, gback, gyoffset + 0); + SD(xZR, gback, gyoffset + 8); + } + break; + case 0x09: + INST_NAME("VPSIGNW Gx, Vx, Ex"); + nextop = F8; + GETEX(x1, 0, vex.l ? 30 : 14); + GETGX(); + GETVX(); + GETGY(); + GETVY(); + for (int i = 0; i < 8; ++i) { + LH(x3, vback, vxoffset + i * 2); + LH(x4, wback, fixedaddress + i * 2); + SLT(x1, xZR, x4); + SRAI(x5, x4, 63); + OR(x1, x1, x5); + MUL(x3, x1, x3); + SH(x3, gback, gdoffset + i * 2); + } + if (vex.l) { + GETEY(); + for (int i = 0; i < 8; ++i) { + LH(x3, vback, vyoffset + i * 2); + LH(x4, wback, fixedaddress + i * 2); + SLT(x1, xZR, x4); + SRAI(x5, x4, 63); + OR(x1, x1, x5); + MUL(x3, x1, x3); + SH(x3, gback, gyoffset + i * 2); + } + } else { + SD(xZR, gback, gyoffset + 0); + SD(xZR, gback, gyoffset + 8); + } + break; + case 0x0A: + INST_NAME("VPSIGND Gx, Vx, Ex"); + nextop = F8; + GETEX(x1, 0, vex.l ? 28 : 12); + GETGX(); + GETVX(); + GETGY(); + GETVY(); + for (int i = 0; i < 4; ++i) { + LH(x3, vback, vxoffset + i * 4); + LH(x4, wback, fixedaddress + i * 4); + SLT(x1, xZR, x4); + SRAI(x5, x4, 63); + OR(x1, x1, x5); + MUL(x3, x1, x3); + SH(x3, gback, gdoffset + i * 4); + } + if (vex.l) { + GETEY(); + for (int i = 0; i < 4; ++i) { + LH(x3, vback, vyoffset + i * 4); + LH(x4, wback, fixedaddress + i * 4); + SLT(x1, xZR, x4); + SRAI(x5, x4, 63); + OR(x1, x1, x5); + MUL(x3, x1, x3); + SH(x3, gback, gyoffset + i * 4); + } + } else { + SD(xZR, gback, gyoffset + 0); + SD(xZR, gback, gyoffset + 8); + } + break; + case 0x0B: + INST_NAME("VPMULHRSW Gx, Vx, Ex"); + nextop = F8; + GETEX(x1, 0, vex.l ? 30 : 14); + GETGX(); + GETVX(); + GETGY(); + GETVY(); + for (int i = 0; i < 8; ++i) { + LH(x3, gback, vxoffset + i * 2); + LH(x4, wback, fixedaddress + i * 2); + MUL(x3, x3, x4); + SRAI(x3, x3, 14); + ADDI(x3, x3, 1); + SRAI(x3, x3, 1); + SH(x3, gback, gdoffset + i * 2); + } + if (vex.l) { + GETEY(); + for (int i = 0; i < 8; ++i) { + LH(x3, gback, vyoffset + i * 2); + LH(x4, wback, fixedaddress + i * 2); + MUL(x3, x3, x4); + SRAI(x3, x3, 14); + ADDI(x3, x3, 1); + SRAI(x3, x3, 1); + SH(x3, gback, gyoffset + i * 2); + } + } else { + SD(xZR, gback, gyoffset + 0); + SD(xZR, gback, gyoffset + 8); + } + break; default: DEFAULT; } |