diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2024-06-08 11:53:35 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2024-06-08 11:53:35 +0200 |
| commit | 97530e4095af2786fac3258952c71ab3b98ce4ad (patch) | |
| tree | f91983791e05b32a71d6c9a94eeb95f7e2506d11 /src | |
| parent | 4c285d4d8a13a13b7fd618884eb983b332740fa7 (diff) | |
| download | box64-97530e4095af2786fac3258952c71ab3b98ce4ad.tar.gz box64-97530e4095af2786fac3258952c71ab3b98ce4ad.zip | |
[ARM64_DYNAREC] Added AVX.66.0F E6, AVX.66.0F38 98 and fixed AVX.F2.0F E6 opcodes
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_66_0f.c | 46 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c | 12 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c | 9 |
3 files changed, 61 insertions, 6 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c index c9243180..634dd91c 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c @@ -1457,7 +1457,51 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, } if(!vex.l) YMM0(gd); break; - + case 0xE6: + INST_NAME("VCVTTPD2DQ Gx, Ex"); + nextop = F8; + for(int l=0; l<1+vex.l; ++l) { + if(!l) { + GETEX_Y(v1, 0, 0); + GETGX_empty(v0); + } else { + if(box64_dynarec_fastround) + d0 = fpu_get_scratch(dyn, ninst); + GETEY(v1); + } + if(box64_dynarec_fastround) { + VFCVTZSQD(l?d0:v0, v1); // convert double -> int64 + if(!l) + SQXTN_32(v0, v0); // convert int64 -> int32 with saturation in lower part, RaZ high part + else + SQXTN2_32(v0, d0); // convert int64 -> int32 with saturation in higher part + } else { + if(!l) { + MRS_fpsr(x5); + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + MSR_fpsr(x5); + ORRw_mask(x4, xZR, 1, 0); //0x80000000 + d0 = fpu_get_scratch(dyn, ninst); + } + for(int i=0; i<2; ++i) { + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + MSR_fpsr(x5); + if(i) { + VMOVeD(d0, 0, v1, i); + FCVTZSwD(x1, d0); + } else { + FCVTZSwD(x1, v1); + } + MRS_fpsr(x5); // get back FPSR to check the IOC bit + TSTw_mask(x5, 0, 0); // mask = 1 = FPSR_IOC + CSELx(x1, x1, x4, cEQ); + VMOVQSfrom(v0, i+l*2, x1); + } + if(!vex.l && !l) VMOVQDfrom(v0, 1, xZR); + } + } + YMM0(gd); + break; case 0xE7: INST_NAME("VMOVNTDQ Ex,Gx"); nextop = F8; diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c index e4ebd5b2..59573d53 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c @@ -840,6 +840,18 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip if(!vex.l) {YMM0(gd); YMM0(vex.v);} break; + case 0x98: + INST_NAME("VFMADD132PS/D Gx, Vx, Ex"); + nextop = F8; + for(int l=0; l<1+vex.l; ++l) { + if(!l) { GETGX_VXEX(v0, v2, v1, 0); } else { GETGY_VYEY(v0, v2, v1); } + if(!l && v0!=v2) q0 = fpu_get_scratch(dyn, ninst); + if(v0!=v2) VMOVQ(q0, v2); else q0 = v2; + if(rex.w) VFMLAQD(q0, v0, v1); else VFMLAQS(q0, v0, v1); + VMOVQ(v0, q0); + } + if(!vex.l) YMM0(gd); + break; case 0x99: INST_NAME("VFMADD132SS/D Gx, Vx, Ex"); nextop = F8; diff --git a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c index 816b695c..d51d6d2b 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c @@ -356,14 +356,14 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, if(!l) { GETEX_Y(v1, 0, 0); GETGX_empty(v0); - } else { - if(box64_dynarec_fastround) + if(!box64_dynarec_fastround || vex.l) d0 = fpu_get_scratch(dyn, ninst); + } else { GETEY(v1); } if(box64_dynarec_fastround) { - VFRINTIDQ(v0, v1); - VFCVTNSQD(v0, v0); // convert double -> int64 + VFRINTIDQ(l?d0:v0, v1); + VFCVTNSQD(l?d0:v0, l?d0:v0); // convert double -> int64 if(!l) SQXTN_32(v0, v0); // convert int64 -> int32 with saturation in lower part, RaZ high part else @@ -374,7 +374,6 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, BFCw(x5, FPSR_IOC, 1); // reset IOC bit MSR_fpsr(x5); ORRw_mask(x4, xZR, 1, 0); //0x80000000 - d0 = fpu_get_scratch(dyn, ninst); } for(int i=0; i<2; ++i) { BFCw(x5, FPSR_IOC, 1); // reset IOC bit |