diff options
| author | wannacu <76616478+wannacu@users.noreply.github.com> | 2025-02-17 20:42:43 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-02-17 13:42:43 +0100 |
| commit | f107321d0678f94893f22f83aee582d50bfd90a9 (patch) | |
| tree | b9cfb27d4c5223a65901f311a46a1245b024ebd0 /src | |
| parent | 64313d5aa6a0bafe84ddda61fe7cd541f81b42e7 (diff) | |
| download | box64-f107321d0678f94893f22f83aee582d50bfd90a9.tar.gz box64-f107321d0678f94893f22f83aee582d50bfd90a9.zip | |
[ARM64_DYNAREC] Added more AVX opcodes (#2372)
* [ARM64_DYNAREC] Fixed AVX.F2.0F 12 opcode * [ARM64_DYNAREC] Added more AVX opcodes
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_0f.c | 13 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_67_avx.c | 14 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c | 60 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c | 2 |
4 files changed, 84 insertions, 5 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index ec13f6aa..a032ce15 100644 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -2816,7 +2816,18 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin GETEM(d1, 0); URHADD_16(d0, d0, d1); break; - + case 0xE4: + INST_NAME("PMULHUW Gm,Em"); + nextop = F8; + GETGM(v0); + GETEM(v1, 0); + q0 = fpu_get_scratch(dyn, ninst); + q1 = fpu_get_scratch(dyn, ninst); + VUMULL_16(q0, v0, v1); + VUMULL2_16(q1, v0, v1); + UQSHRN_16(v0, q0, 16); + UQSHRN2_16(v0, q1, 16); + break; case 0xE5: INST_NAME("PMULHW Gm,Em"); nextop = F8; diff --git a/src/dynarec/arm64/dynarec_arm64_67_avx.c b/src/dynarec/arm64/dynarec_arm64_67_avx.c index 78ad685f..def36fb3 100644 --- a/src/dynarec/arm64/dynarec_arm64_67_avx.c +++ b/src/dynarec/arm64/dynarec_arm64_67_avx.c @@ -126,6 +126,20 @@ uintptr_t dynarec64_67_AVX(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int } break; + case 0x6E: + INST_NAME("VMOVD Gx, Ed"); + nextop = F8; + GETGX_empty(v0); + GETED(0); + VEORQ(v0, v0, v0); // RAZ vector + if(rex.w) { + FMOVDx(v0, ed); + } else { + FMOVSw(v0, ed); + } + YMM0(gd); + break; + default: DEFAULT; } diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c index 4f2b1c28..541855ff 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c @@ -99,7 +99,18 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip } if(!vex.l) YMM0(gd); break; - + case 0x03: + INST_NAME("VPHADDSW Gx, Vx, Ex"); + nextop = F8; + v0 = fpu_get_scratch(dyn, ninst); + for(int l=0; l<1+vex.l; ++l) { + if(!l) { GETGX_empty_VXEX(q0, q2, q1, 0); } else { GETGY_empty_VYEY(q0, q2, q1); } + VUZP2Q_16(v0, q2, q1); + VUZP1Q_16(q0, q2, q1); + SQADDQ_16(q0, q0, v0); + } + if(!vex.l) YMM0(gd); + break; case 0x04: INST_NAME("PMADDUBSW Gx, Vx, Ex"); nextop = F8; @@ -126,7 +137,42 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip } if(!vex.l) YMM0(gd); break; - + case 0x05: + INST_NAME("VPHSUBW Gx, Vx, Ex"); + nextop = F8; + v0 = fpu_get_scratch(dyn, ninst); + for(int l=0; l<1+vex.l; ++l) { + if(!l) { GETGX_empty_VXEX(q0, q2, q1, 0); } else { GETGY_empty_VYEY(q0, q2, q1); } + VUZP2Q_16(v0, q2, q1); + VUZP1Q_16(q0, q2, q1); + VSUBQ_16(q0, q0, v0); + } + if(!vex.l) YMM0(gd); + break; + case 0x06: + INST_NAME("VPHSUBD Gx, Vx, Ex"); + nextop = F8; + v0 = fpu_get_scratch(dyn, ninst); + for(int l=0; l<1+vex.l; ++l) { + if(!l) { GETGX_empty_VXEX(q0, q2, q1, 0); } else { GETGY_empty_VYEY(q0, q2, q1); } + VUZP2Q_32(v0, q2, q1); + VUZP1Q_32(q0, q2, q1); + VSUBQ_32(q0, q0, v0); + } + if(!vex.l) YMM0(gd); + break; + case 0x7: + INST_NAME("VPHSUBSW Gx, Vx, Ex"); + nextop = F8; + v0 = fpu_get_scratch(dyn, ninst); + for(int l=0; l<1+vex.l; ++l) { + if(!l) { GETGX_empty_VXEX(q0, q2, q1, 0); } else { GETGY_empty_VYEY(q0, q2, q1); } + VUZP2Q_16(v0, q2, q1); + VUZP1Q_16(q0, q2, q1); + SQSUBQ_16(q0, q0, v0); + } + if(!vex.l) YMM0(gd); + break; case 0x08: INST_NAME("VPSIGNB Gx, Vx, Ex"); nextop = F8; @@ -599,7 +645,15 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip } if(!vex.l) YMM0(gd); break; - + case 0x2A: + INST_NAME("VMOVNTDQA Gx, Ex"); + nextop = F8; + for(int l=0; l<1+vex.l; ++l) { + if(!l) { GETGX_empty_EX(v0, v1, 0); } else { GETGY_empty_EY(v0, v1); } + VMOVQ(v0, v1); + } + if(!vex.l) YMM0(gd); + break; case 0x2B: INST_NAME("VPACKUSDW Gx, Ex, Vx"); nextop = F8; diff --git a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c index afc1ed55..1e204a13 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c @@ -113,7 +113,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, if(vex.l) { GETGY_empty(v0, -1, -1, -1); ADDx_U12(x3, ed, 16); - VLDQ1R_64(v0, ed); + VLDQ1R_64(v0, x3); } } if(!vex.l) YMM0(gd); |