about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2025-08-29 20:34:00 +0800
committerGitHub <noreply@github.com>2025-08-29 14:34:00 +0200
commit67d6501f54cdb24a1f46249fa44b3309eec3a688 (patch)
treec4996f2b62f06adaca5b4a7b1d2dee9867504c9e /src
parentd0ce4ef9138f7afcf844cd8f0f1b9258891642e0 (diff)
downloadbox64-67d6501f54cdb24a1f46249fa44b3309eec3a688.tar.gz
box64-67d6501f54cdb24a1f46249fa44b3309eec3a688.zip
[LA64_DYNAREC] Fixed AVX VCVT[T]PS2DQ fastround path (#2979)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_66_0f.c12
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_f2_0f.c4
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_f3_0f.c14
3 files changed, 19 insertions, 11 deletions
diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f.c b/src/dynarec/la64/dynarec_la64_avx_66_0f.c
index 660ac6a8..6cc45b57 100644
--- a/src/dynarec/la64/dynarec_la64_avx_66_0f.c
+++ b/src/dynarec/la64/dynarec_la64_avx_66_0f.c
@@ -384,28 +384,32 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
             GETGY_empty_EY_xy(v0, v1, 0);
             u8 = sse_setround(dyn, ninst, x6, x4);
             if(vex.l){
-                d1 = fpu_get_scratch(dyn);
-                XVFTINT_W_S(d1, v1);
                 if (!BOX64ENV(dynarec_fastround)) {
+                    d1 = fpu_get_scratch(dyn);
                     q0 = fpu_get_scratch(dyn);
                     q1 = fpu_get_scratch(dyn); // mask
                     d0 = fpu_get_scratch(dyn);
+                    XVFTINT_W_S(d1, v1);
                     XVLDI(q0, 0b1001110000000); // broadcast 0x80000000 to all
                     XVLDI(d0, (0b10011 << 8) | 0x4f);
                     XVFCMP_S(q1, d0, v1, cULE); // get Nan,+overflow mark
                     XVBITSEL_V(v0, d1, q0, q1);
+                } else {
+                    XVFTINT_W_S(v0, v1);
                 }
             } else {
-                d1 = fpu_get_scratch(dyn);
-                VFTINT_W_S(d1, v1);
                 if (!BOX64ENV(dynarec_fastround)) {
+                    d1 = fpu_get_scratch(dyn);
                     q0 = fpu_get_scratch(dyn);
                     q1 = fpu_get_scratch(dyn); // mask
                     d0 = fpu_get_scratch(dyn);
+                    VFTINT_W_S(d1, v1);
                     VLDI(q0, 0b1001110000000); // broadcast 0x80000000 to all
                     VLDI(d0, (0b10011 << 8) | 0x4f);
                     VFCMP_S(q1, d0, v1, cULE); // get Nan,+overflow mark
                     VBITSEL_V(v0, d1, q0, q1);
+                } else {
+                    VFTINT_W_S(v0, v1);
                 }
             }
             x87_restoreround(dyn, ninst, u8);
diff --git a/src/dynarec/la64/dynarec_la64_avx_f2_0f.c b/src/dynarec/la64/dynarec_la64_avx_f2_0f.c
index 0796e67f..3e9f8353 100644
--- a/src/dynarec/la64/dynarec_la64_avx_f2_0f.c
+++ b/src/dynarec/la64/dynarec_la64_avx_f2_0f.c
@@ -453,7 +453,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
             GETGYx_empty(v0);
             u8 = sse_setround(dyn, ninst, x1, x2);
             d0 = fpu_get_scratch(dyn);
-            if(vex.l){
+            if (vex.l) {
                 XVXOR_V(d0, d0, d0);
                 XVFTINT_W_D(v0, d0, v1);       // v0 [lo0, lo1, --, --, hi0, hi1, --, -- ]
                 if (!BOX64ENV(dynarec_fastround)) {
@@ -473,7 +473,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                     XVBITSEL_V(v0, v0, q0, d0);
                 }
                 XVPERMI_D(v0, v0, 0b11011000);
-            }else{
+            } else {
                 VFTINT_W_D(d0, v1, v1);
                 if (!BOX64ENV(dynarec_fastround)) {
                     q0 = fpu_get_scratch(dyn);
diff --git a/src/dynarec/la64/dynarec_la64_avx_f3_0f.c b/src/dynarec/la64/dynarec_la64_avx_f3_0f.c
index 1c18f724..b0854d67 100644
--- a/src/dynarec/la64/dynarec_la64_avx_f3_0f.c
+++ b/src/dynarec/la64/dynarec_la64_avx_f3_0f.c
@@ -286,29 +286,33 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
             INST_NAME("VCVTTPS2DQ Gx, Ex");
             nextop = F8;
             GETGY_empty_EY_xy(v0, v1, 0);
-            if(vex.l){
-                d1 = fpu_get_scratch(dyn);
-                XVFTINTRZ_W_S(d1, v1);
+            if (vex.l) {
                 if (!BOX64ENV(dynarec_fastround)) {
+                    d1 = fpu_get_scratch(dyn);
                     q0 = fpu_get_scratch(dyn);
                     q1 = fpu_get_scratch(dyn); // mask
                     d0 = fpu_get_scratch(dyn);
+                    XVFTINTRZ_W_S(d1, v1);
                     XVLDI(q0, 0b1001110000000); // broadcast 0x80000000 to all
                     XVLDI(d0, (0b10011 << 8) | 0x4f);
                     XVFCMP_S(q1, d0, v1, cULE); // get Nan,+overflow mark
                     XVBITSEL_V(v0, d1, q0, q1);
+                } else {
+                    XVFTINTRZ_W_S(v0, v1);
                 }
             } else {
-                d1 = fpu_get_scratch(dyn);
-                VFTINTRZ_W_S(d1, v1);
                 if (!BOX64ENV(dynarec_fastround)) {
+                    d1 = fpu_get_scratch(dyn);
                     q0 = fpu_get_scratch(dyn);
                     q1 = fpu_get_scratch(dyn); // mask
                     d0 = fpu_get_scratch(dyn);
+                    VFTINTRZ_W_S(d1, v1);
                     VLDI(q0, 0b1001110000000); // broadcast 0x80000000 to all
                     VLDI(d0, (0b10011 << 8) | 0x4f);
                     VFCMP_S(q1, d0, v1, cULE); // get Nan,+overflow mark
                     VBITSEL_V(v0, d1, q0, q1);
+                } else {
+                    VFTINTRZ_W_S(v0, v1);
                 }
             }
             break;