about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorphorcys <phorcys@126.com>2025-08-01 17:49:29 +0800
committerGitHub <noreply@github.com>2025-08-01 11:49:29 +0200
commit7e9775de9406dbae5dd43e2206a6a50655e16b8f (patch)
treeecc43eec23e885128e269a97d4b338f0aa8ef81e /src
parentae0ac5a1d074fa76a1cd85947bb1688f91244966 (diff)
downloadbox64-7e9775de9406dbae5dd43e2206a6a50655e16b8f.tar.gz
box64-7e9775de9406dbae5dd43e2206a6a50655e16b8f.zip
[LA64_DYNAREC] Add la64 avx cvt ops, part 3. (#2869)
Double <=> Integer convert.
Half Float <=> Integer convert.
VCVT{DQ2PD, PD2DQ, TPD2DQ}
VCVT{SI2SD, SD2SI, TSD2SI}
VCVT{PH2PS, PS2PH}
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_66_0f.c41
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_66_0f38.c14
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_66_0f3a.c32
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_f2_0f.c134
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_f3_0f.c16
5 files changed, 237 insertions, 0 deletions
diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f.c b/src/dynarec/la64/dynarec_la64_avx_66_0f.c
index 7850cbf3..eac332ff 100644
--- a/src/dynarec/la64/dynarec_la64_avx_66_0f.c
+++ b/src/dynarec/la64/dynarec_la64_avx_66_0f.c
@@ -1099,6 +1099,47 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
             GETGY_empty_VYEY_xy(v0, v1, v2, 0);
             VMUHxy(H, v0, v1, v2);
             break;
+        case 0xE6:
+            INST_NAME("VCVTTPD2DQ Gx, Ex");
+            nextop = F8;
+            GETEYxy(v1, 0, 0);
+            GETGYx_empty(v0);
+            d0 = fpu_get_scratch(dyn);
+            d1 = fpu_get_scratch(dyn);
+            if(vex.l){
+                XVXOR_V(d0, d0, d0);
+                XVFTINTRZ_W_D(d1, d0, v1);       // v0 [lo0, lo1, --, --, hi0, hi1, --, -- ]
+                if (!BOX64ENV(dynarec_fastround)) {
+                    q0 = fpu_get_scratch(dyn);
+                    q1 = fpu_get_scratch(dyn);
+                    XVLDI(q0, 0b1001110000000); // broadcast 0x80000000 to all
+                    /*
+                        VCVTTPD2DQ has default rounding mode RZ
+                        so we could combine +-NAN +overflow to xvfcmp.cule 0x41e0000000000000
+                    */
+                    LU52I_D(x5, xZR, 0x41e);
+                    XVREPLGR2VR_D(q1, x5);       
+                    XVFCMP_D(d0, q1, v1, cULE); // get Nan mask
+                    XVSRLNI_W_D(d0, d0, 0);
+                    XVBITSEL_V(v0, d1, q0, d0);
+                }
+                XVPERMI_D(v0, v0, 0b11011000);
+            }else{
+                VFTINTRZ_W_D(d0, v1, v1);
+                if (!BOX64ENV(dynarec_fastround)) {
+                    q0 = fpu_get_scratch(dyn);
+                    q1 = fpu_get_scratch(dyn);
+                    XVLDI(q0, 0b1001110000000); // broadcast 0x80000000 to all
+                    LU52I_D(x5, xZR, 0x41e);
+                    XVREPLGR2VR_D(q1, x5);       
+                    XVFCMP_D(q1, q1, v1, cULE); // get Nan mask
+                    VSHUF4I_W(q1, q1, 0b11011000);
+                    VBITSEL_V(d0, d0, q0, q1);
+                }
+                XVPICKVE_D(v0, d0, 0);
+                YMM_UNMARK_UPPER_ZERO(v0);
+            }
+            break;
         case 0xE7:
             INST_NAME("VMOVNTDQ Ex, Gx");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f38.c b/src/dynarec/la64/dynarec_la64_avx_66_0f38.c
index 5e7f064d..037d49a6 100644
--- a/src/dynarec/la64/dynarec_la64_avx_66_0f38.c
+++ b/src/dynarec/la64/dynarec_la64_avx_66_0f38.c
@@ -256,6 +256,20 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i
             }
             MARK2;
             break;
+        case 0x13:
+            INST_NAME("VCVTPH2PS Gx, Ex");
+            nextop = F8;
+            GETEYSD(v1, 0, 0);
+            GETGYxy_empty(v0);
+            d0 = fpu_get_scratch(dyn);
+            if(vex.l) {
+                XVFCVTH_S_H(d0, v1);
+                XVFCVTL_S_H(v0, v1);
+                XVPERMI_Q(v0, d0, XVPERMI_IMM_4_0(0, 2));
+            } else {
+                VFCVTL_S_H(v0, v1);
+            }
+            break;
         case 0x16:
             INST_NAME("VPERMPS Gx, Vx, Ex");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c b/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c
index 3493c3b9..ccfe759c 100644
--- a/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c
+++ b/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c
@@ -435,6 +435,38 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i
                 }
             }
             break;
+        case 0x1D:
+            INST_NAME("VCVTPS2PH Ex, Gx, Ib");
+            nextop = F8;
+            GETGYxy(v0, 0);
+            if(vex.l) {
+                GETEYx(v1, 1, 1);
+            } else {
+                GETEYSD(v1, 1, 1);
+            }
+            u8 = F8;
+            d0 = fpu_get_scratch(dyn);
+            if (u8 & 4) {
+                u8 = sse_setround(dyn, ninst, x1, x2);
+            } else {
+                MOVFCSR2GR(x4, FCSR3);
+                ORI(x5, x5, round_round[u8&3]);
+                SLLI_D(x5, x5, 8);
+                MOVGR2FCSR(FCSR3, x5);
+                u8 = x4;
+            }
+            if(vex.l){
+                XVXOR_V(d0, d0, d0);
+                XVFCVT_H_S(v1, d0, v0);
+                XVPERMI_D(v1, v1, 0b11011000);
+                PUTEYx(v1);
+            } else {
+                XVXOR_V(d0, d0, d0);
+                VFCVT_H_S(v1, d0, v0);
+                PUTEYSD(v1);
+            }
+            x87_restoreround(dyn, ninst, u8);
+            break;
         case 0x21:
             INST_NAME("VINSERTPS Gx, Vx, Ex, Ib");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_avx_f2_0f.c b/src/dynarec/la64/dynarec_la64_avx_f2_0f.c
index 0a713483..28673d0d 100644
--- a/src/dynarec/la64/dynarec_la64_avx_f2_0f.c
+++ b/src/dynarec/la64/dynarec_la64_avx_f2_0f.c
@@ -116,6 +116,90 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 VREPLVE_D(q0, q1, 0);
             }
             break;
+        case 0x2A:
+            INST_NAME("VCVTSI2SD Gx, Vx, Ed");
+            nextop = F8;
+            GETED(0);
+            GETVYx(v1, 0);
+            GETGYx_empty(v0);
+            d1 = fpu_get_scratch(dyn);
+            if (!BOX64ENV(dynarec_fastround)) {
+                u8 = sse_setround(dyn, ninst, x2, x3);
+            }
+            d1 = fpu_get_scratch(dyn);
+            if (rex.w) {
+                MOVGR2FR_D(d1, ed);
+                FFINT_D_L(d1, d1);
+            } else {
+                MOVGR2FR_W(d1, ed);
+                FFINT_D_W(d1, d1);
+            }
+            if (!BOX64ENV(dynarec_fastround)) {
+                x87_restoreround(dyn, ninst, u8);
+            }
+            if(v0 != v1) VOR_V(v0, v1, v1);
+            VEXTRINS_D(v0, d1, 0);
+            break;
+        case 0x2C:
+            INST_NAME("VCVTTSD2SI Gd, Ex");
+            nextop = F8;
+            GETGD;
+            GETEYSD(q0, 0, 0);
+            if (!BOX64ENV(dynarec_fastround)) {
+                MOVGR2FCSR(FCSR2, xZR); // reset all bits
+            }
+            d1 = fpu_get_scratch(dyn);
+            if (rex.w) {
+                FTINTRZ_L_D(d1, q0);
+                MOVFR2GR_D(gd, d1);
+            } else {
+                FTINTRZ_W_D(d1, q0);
+                MOVFR2GR_S(gd, d1);
+                ZEROUP(gd);
+            }
+            if (!BOX64ENV(dynarec_fastround)) {
+                MOVFCSR2GR(x5, FCSR2); // get back FPSR to check
+                MOV32w(x3, (1 << FR_V) | (1 << FR_O));
+                AND(x5, x5, x3);
+                CBZ_NEXT(x5);
+                if (rex.w) {
+                    MOV64x(gd, 0x8000000000000000LL);
+                } else {
+                    MOV32w(gd, 0x80000000);
+                }
+            }
+            break;
+        case 0x2D:
+            INST_NAME("VCVTSD2SI Gd, Ex");
+            nextop = F8;
+            GETGD;
+            GETEYSD(q0, 0, 0);
+            if (!BOX64ENV(dynarec_fastround)) {
+                MOVGR2FCSR(FCSR2, xZR); // reset all bits
+            }
+            d1 = fpu_get_scratch(dyn);
+            u8 = sse_setround(dyn, ninst, x2, x3);
+            if (rex.w) {
+                FTINT_L_D(d1, q0);
+                MOVFR2GR_D(gd, d1);
+            } else {
+                FTINT_W_D(d1, q0);
+                MOVFR2GR_S(gd, d1);
+                ZEROUP(gd);
+            }
+            x87_restoreround(dyn, ninst, u8);
+            if (!BOX64ENV(dynarec_fastround)) {
+                MOVFCSR2GR(x5, FCSR2); // get back FPSR to check
+                MOV32w(x3, (1 << FR_V) | (1 << FR_O));
+                AND(x5, x5, x3);
+                CBZ_NEXT(x5);
+                if (rex.w) {
+                    MOV64x(gd, 0x8000000000000000LL);
+                } else {
+                    MOV32w(gd, 0x80000000);
+                }
+            }
+            break;
         case 0x51:
             INST_NAME("VSQRTSD Gx, Vx, Ex");
             nextop = F8;
@@ -361,6 +445,56 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 VBITSEL_Vxy(v0, v0, d1, d0);
             }
             break;
+        case 0xE6:
+            INST_NAME("VCVTPD2DQ Gx, Ex");
+            nextop = F8;
+            GETEYxy(v1, 0, 0);
+            GETGYx_empty(v0);
+            u8 = sse_setround(dyn, ninst, x1, x2);
+            d0 = fpu_get_scratch(dyn);
+            if(vex.l){
+                XVXOR_V(d0, d0, d0);
+                XVFTINT_W_D(v0, d0, v1);       // v0 [lo0, lo1, --, --, hi0, hi1, --, -- ]
+                if (!BOX64ENV(dynarec_fastround)) {
+                    q0 = fpu_get_scratch(dyn);
+                    q1 = fpu_get_scratch(dyn);
+                    q2 = fpu_get_scratch(dyn);
+                    d1 = fpu_get_scratch(dyn);
+                    XVFTINT_L_D(q2, v1);
+                    XVLDI(q0, 0b1001110000000); // broadcast 0x80000000 to all
+                    MOV32w(x5, 0x7FFFFFFF);
+                    BSTRPICK_D(x5, x5, 31, 0);
+                    XVREPLGR2VR_D(q1, x5);       
+                    XVFCMP_D(d0, v1, v1, cUN); // get Nan mask
+                    XVSLT_D(d1, q1, q2); // get +inf mask
+                    XVOR_V(d0, d1, d0);
+                    XVSRLNI_W_D(d0, d0, 0); // [A,B,C,D] => [a,b,--,--,c,d,--,--]
+                    XVBITSEL_V(v0, v0, q0, d0);
+                }
+                XVPERMI_D(v0, v0, 0b11011000);
+            }else{
+                VFTINT_W_D(d0, v1, v1);
+                if (!BOX64ENV(dynarec_fastround)) {
+                    q0 = fpu_get_scratch(dyn);
+                    q1 = fpu_get_scratch(dyn);
+                    q2 = fpu_get_scratch(dyn);
+                    d1 = fpu_get_scratch(dyn);
+                    VFTINT_L_D(d1, v1);
+                    VLDI(q0, 0b1001110000000); // broadcast 0x80000000 to all
+                    MOV32w(x5, 0x7FFFFFFF);
+                    BSTRPICK_D(x5, x5, 31, 0);
+                    VREPLGR2VR_D(q1, x5);       
+                    VSLT_D(q1, q1, d1); // get +inf mask
+                    VFCMP_D(q2, v1, v1, cUN); // get Nan mask
+                    VOR_V(q1, q1, q2);
+                    VSHUF4I_W(q1, q1, 0b11011000);
+                    VBITSEL_V(d0, d0, q0, q1);
+                }
+                XVPICKVE_D(v0, d0, 0);
+                YMM_UNMARK_UPPER_ZERO(v0);
+            }
+            x87_restoreround(dyn, ninst, u8);
+            break;
         case 0xF0:
             INST_NAME("VLDDQU Gx, Ex");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_avx_f3_0f.c b/src/dynarec/la64/dynarec_la64_avx_f3_0f.c
index eb927210..1ff336a4 100644
--- a/src/dynarec/la64/dynarec_la64_avx_f3_0f.c
+++ b/src/dynarec/la64/dynarec_la64_avx_f3_0f.c
@@ -476,6 +476,22 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
             XVINSVE0_W(v0, q0, 0);
             YMM_UNMARK_UPPER_ZERO(v0);
             break;
+        case 0xE6:
+            INST_NAME("CVTDQ2PD Gx, Ex");
+            nextop = F8;
+            d0 = fpu_get_scratch(dyn);
+            if(vex.l){
+                GETEYx(v1, 0, 0);
+                GETGYy_empty(v0);
+                XVFFINTL_D_W(v0, v1);
+                XVFFINTH_D_W(d0, v1);
+                XVPERMI_Q(v0, d0, XVPERMI_IMM_4_0(0, 2));
+            }else{
+                GETEYSD(v1, 0, 0);
+                GETGYx_empty(v0);
+                VFFINTL_D_W(v0, v1);
+            }
+            break;
         default:
             DEFAULT;
     }