diff options
| author | phorcys <phorcys@126.com> | 2025-08-01 17:49:29 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-08-01 11:49:29 +0200 |
| commit | 7e9775de9406dbae5dd43e2206a6a50655e16b8f (patch) | |
| tree | ecc43eec23e885128e269a97d4b338f0aa8ef81e /src | |
| parent | ae0ac5a1d074fa76a1cd85947bb1688f91244966 (diff) | |
| download | box64-7e9775de9406dbae5dd43e2206a6a50655e16b8f.tar.gz box64-7e9775de9406dbae5dd43e2206a6a50655e16b8f.zip | |
[LA64_DYNAREC] Add la64 avx cvt ops, part 3. (#2869)
Double <=> Integer convert.
Half Float <=> Integer convert.
VCVT{DQ2PD, PD2DQ, TPD2DQ}
VCVT{SI2SD, SD2SI, TSD2SI}
VCVT{PH2PS, PS2PH}Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_66_0f.c | 41 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_66_0f38.c | 14 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_66_0f3a.c | 32 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_f2_0f.c | 134 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_f3_0f.c | 16 |
5 files changed, 237 insertions, 0 deletions
diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f.c b/src/dynarec/la64/dynarec_la64_avx_66_0f.c index 7850cbf3..eac332ff 100644 --- a/src/dynarec/la64/dynarec_la64_avx_66_0f.c +++ b/src/dynarec/la64/dynarec_la64_avx_66_0f.c @@ -1099,6 +1099,47 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, GETGY_empty_VYEY_xy(v0, v1, v2, 0); VMUHxy(H, v0, v1, v2); break; + case 0xE6: + INST_NAME("VCVTTPD2DQ Gx, Ex"); + nextop = F8; + GETEYxy(v1, 0, 0); + GETGYx_empty(v0); + d0 = fpu_get_scratch(dyn); + d1 = fpu_get_scratch(dyn); + if(vex.l){ + XVXOR_V(d0, d0, d0); + XVFTINTRZ_W_D(d1, d0, v1); // v0 [lo0, lo1, --, --, hi0, hi1, --, -- ] + if (!BOX64ENV(dynarec_fastround)) { + q0 = fpu_get_scratch(dyn); + q1 = fpu_get_scratch(dyn); + XVLDI(q0, 0b1001110000000); // broadcast 0x80000000 to all + /* + VCVTTPD2DQ has default rounding mode RZ + so we could combine +-NAN +overflow to xvfcmp.cule 0x41e0000000000000 + */ + LU52I_D(x5, xZR, 0x41e); + XVREPLGR2VR_D(q1, x5); + XVFCMP_D(d0, q1, v1, cULE); // get Nan mask + XVSRLNI_W_D(d0, d0, 0); + XVBITSEL_V(v0, d1, q0, d0); + } + XVPERMI_D(v0, v0, 0b11011000); + }else{ + VFTINTRZ_W_D(d0, v1, v1); + if (!BOX64ENV(dynarec_fastround)) { + q0 = fpu_get_scratch(dyn); + q1 = fpu_get_scratch(dyn); + XVLDI(q0, 0b1001110000000); // broadcast 0x80000000 to all + LU52I_D(x5, xZR, 0x41e); + XVREPLGR2VR_D(q1, x5); + XVFCMP_D(q1, q1, v1, cULE); // get Nan mask + VSHUF4I_W(q1, q1, 0b11011000); + VBITSEL_V(d0, d0, q0, q1); + } + XVPICKVE_D(v0, d0, 0); + YMM_UNMARK_UPPER_ZERO(v0); + } + break; case 0xE7: INST_NAME("VMOVNTDQ Ex, Gx"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f38.c b/src/dynarec/la64/dynarec_la64_avx_66_0f38.c index 5e7f064d..037d49a6 100644 --- a/src/dynarec/la64/dynarec_la64_avx_66_0f38.c +++ b/src/dynarec/la64/dynarec_la64_avx_66_0f38.c @@ -256,6 +256,20 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i } MARK2; break; + case 0x13: + INST_NAME("VCVTPH2PS Gx, Ex"); + nextop = F8; + GETEYSD(v1, 0, 0); + GETGYxy_empty(v0); + d0 = fpu_get_scratch(dyn); + if(vex.l) { + XVFCVTH_S_H(d0, v1); + XVFCVTL_S_H(v0, v1); + XVPERMI_Q(v0, d0, XVPERMI_IMM_4_0(0, 2)); + } else { + VFCVTL_S_H(v0, v1); + } + break; case 0x16: INST_NAME("VPERMPS Gx, Vx, Ex"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c b/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c index 3493c3b9..ccfe759c 100644 --- a/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c +++ b/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c @@ -435,6 +435,38 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i } } break; + case 0x1D: + INST_NAME("VCVTPS2PH Ex, Gx, Ib"); + nextop = F8; + GETGYxy(v0, 0); + if(vex.l) { + GETEYx(v1, 1, 1); + } else { + GETEYSD(v1, 1, 1); + } + u8 = F8; + d0 = fpu_get_scratch(dyn); + if (u8 & 4) { + u8 = sse_setround(dyn, ninst, x1, x2); + } else { + MOVFCSR2GR(x4, FCSR3); + ORI(x5, x5, round_round[u8&3]); + SLLI_D(x5, x5, 8); + MOVGR2FCSR(FCSR3, x5); + u8 = x4; + } + if(vex.l){ + XVXOR_V(d0, d0, d0); + XVFCVT_H_S(v1, d0, v0); + XVPERMI_D(v1, v1, 0b11011000); + PUTEYx(v1); + } else { + XVXOR_V(d0, d0, d0); + VFCVT_H_S(v1, d0, v0); + PUTEYSD(v1); + } + x87_restoreround(dyn, ninst, u8); + break; case 0x21: INST_NAME("VINSERTPS Gx, Vx, Ex, Ib"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_avx_f2_0f.c b/src/dynarec/la64/dynarec_la64_avx_f2_0f.c index 0a713483..28673d0d 100644 --- a/src/dynarec/la64/dynarec_la64_avx_f2_0f.c +++ b/src/dynarec/la64/dynarec_la64_avx_f2_0f.c @@ -116,6 +116,90 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, VREPLVE_D(q0, q1, 0); } break; + case 0x2A: + INST_NAME("VCVTSI2SD Gx, Vx, Ed"); + nextop = F8; + GETED(0); + GETVYx(v1, 0); + GETGYx_empty(v0); + d1 = fpu_get_scratch(dyn); + if (!BOX64ENV(dynarec_fastround)) { + u8 = sse_setround(dyn, ninst, x2, x3); + } + d1 = fpu_get_scratch(dyn); + if (rex.w) { + MOVGR2FR_D(d1, ed); + FFINT_D_L(d1, d1); + } else { + MOVGR2FR_W(d1, ed); + FFINT_D_W(d1, d1); + } + if (!BOX64ENV(dynarec_fastround)) { + x87_restoreround(dyn, ninst, u8); + } + if(v0 != v1) VOR_V(v0, v1, v1); + VEXTRINS_D(v0, d1, 0); + break; + case 0x2C: + INST_NAME("VCVTTSD2SI Gd, Ex"); + nextop = F8; + GETGD; + GETEYSD(q0, 0, 0); + if (!BOX64ENV(dynarec_fastround)) { + MOVGR2FCSR(FCSR2, xZR); // reset all bits + } + d1 = fpu_get_scratch(dyn); + if (rex.w) { + FTINTRZ_L_D(d1, q0); + MOVFR2GR_D(gd, d1); + } else { + FTINTRZ_W_D(d1, q0); + MOVFR2GR_S(gd, d1); + ZEROUP(gd); + } + if (!BOX64ENV(dynarec_fastround)) { + MOVFCSR2GR(x5, FCSR2); // get back FPSR to check + MOV32w(x3, (1 << FR_V) | (1 << FR_O)); + AND(x5, x5, x3); + CBZ_NEXT(x5); + if (rex.w) { + MOV64x(gd, 0x8000000000000000LL); + } else { + MOV32w(gd, 0x80000000); + } + } + break; + case 0x2D: + INST_NAME("VCVTSD2SI Gd, Ex"); + nextop = F8; + GETGD; + GETEYSD(q0, 0, 0); + if (!BOX64ENV(dynarec_fastround)) { + MOVGR2FCSR(FCSR2, xZR); // reset all bits + } + d1 = fpu_get_scratch(dyn); + u8 = sse_setround(dyn, ninst, x2, x3); + if (rex.w) { + FTINT_L_D(d1, q0); + MOVFR2GR_D(gd, d1); + } else { + FTINT_W_D(d1, q0); + MOVFR2GR_S(gd, d1); + ZEROUP(gd); + } + x87_restoreround(dyn, ninst, u8); + if (!BOX64ENV(dynarec_fastround)) { + MOVFCSR2GR(x5, FCSR2); // get back FPSR to check + MOV32w(x3, (1 << FR_V) | (1 << FR_O)); + AND(x5, x5, x3); + CBZ_NEXT(x5); + if (rex.w) { + MOV64x(gd, 0x8000000000000000LL); + } else { + MOV32w(gd, 0x80000000); + } + } + break; case 0x51: INST_NAME("VSQRTSD Gx, Vx, Ex"); nextop = F8; @@ -361,6 +445,56 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, VBITSEL_Vxy(v0, v0, d1, d0); } break; + case 0xE6: + INST_NAME("VCVTPD2DQ Gx, Ex"); + nextop = F8; + GETEYxy(v1, 0, 0); + GETGYx_empty(v0); + u8 = sse_setround(dyn, ninst, x1, x2); + d0 = fpu_get_scratch(dyn); + if(vex.l){ + XVXOR_V(d0, d0, d0); + XVFTINT_W_D(v0, d0, v1); // v0 [lo0, lo1, --, --, hi0, hi1, --, -- ] + if (!BOX64ENV(dynarec_fastround)) { + q0 = fpu_get_scratch(dyn); + q1 = fpu_get_scratch(dyn); + q2 = fpu_get_scratch(dyn); + d1 = fpu_get_scratch(dyn); + XVFTINT_L_D(q2, v1); + XVLDI(q0, 0b1001110000000); // broadcast 0x80000000 to all + MOV32w(x5, 0x7FFFFFFF); + BSTRPICK_D(x5, x5, 31, 0); + XVREPLGR2VR_D(q1, x5); + XVFCMP_D(d0, v1, v1, cUN); // get Nan mask + XVSLT_D(d1, q1, q2); // get +inf mask + XVOR_V(d0, d1, d0); + XVSRLNI_W_D(d0, d0, 0); // [A,B,C,D] => [a,b,--,--,c,d,--,--] + XVBITSEL_V(v0, v0, q0, d0); + } + XVPERMI_D(v0, v0, 0b11011000); + }else{ + VFTINT_W_D(d0, v1, v1); + if (!BOX64ENV(dynarec_fastround)) { + q0 = fpu_get_scratch(dyn); + q1 = fpu_get_scratch(dyn); + q2 = fpu_get_scratch(dyn); + d1 = fpu_get_scratch(dyn); + VFTINT_L_D(d1, v1); + VLDI(q0, 0b1001110000000); // broadcast 0x80000000 to all + MOV32w(x5, 0x7FFFFFFF); + BSTRPICK_D(x5, x5, 31, 0); + VREPLGR2VR_D(q1, x5); + VSLT_D(q1, q1, d1); // get +inf mask + VFCMP_D(q2, v1, v1, cUN); // get Nan mask + VOR_V(q1, q1, q2); + VSHUF4I_W(q1, q1, 0b11011000); + VBITSEL_V(d0, d0, q0, q1); + } + XVPICKVE_D(v0, d0, 0); + YMM_UNMARK_UPPER_ZERO(v0); + } + x87_restoreround(dyn, ninst, u8); + break; case 0xF0: INST_NAME("VLDDQU Gx, Ex"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_avx_f3_0f.c b/src/dynarec/la64/dynarec_la64_avx_f3_0f.c index eb927210..1ff336a4 100644 --- a/src/dynarec/la64/dynarec_la64_avx_f3_0f.c +++ b/src/dynarec/la64/dynarec_la64_avx_f3_0f.c @@ -476,6 +476,22 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, XVINSVE0_W(v0, q0, 0); YMM_UNMARK_UPPER_ZERO(v0); break; + case 0xE6: + INST_NAME("CVTDQ2PD Gx, Ex"); + nextop = F8; + d0 = fpu_get_scratch(dyn); + if(vex.l){ + GETEYx(v1, 0, 0); + GETGYy_empty(v0); + XVFFINTL_D_W(v0, v1); + XVFFINTH_D_W(d0, v1); + XVPERMI_Q(v0, d0, XVPERMI_IMM_4_0(0, 2)); + }else{ + GETEYSD(v1, 0, 0); + GETGYx_empty(v0); + VFFINTL_D_W(v0, v1); + } + break; default: DEFAULT; } |