about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorphorcys <phorcys@126.com>2025-06-25 00:38:12 +0800
committerGitHub <noreply@github.com>2025-06-24 18:38:12 +0200
commit7962aacc7fbe1746da2bd1ed14d84856d5c5dd6f (patch)
tree98cfff449112f4e3be94e0bb303e184392f74f96 /src
parent9195bc2d4a02770fe13d421c4c9c4eb438dec6e3 (diff)
downloadbox64-7962aacc7fbe1746da2bd1ed14d84856d5c5dd6f.tar.gz
box64-7962aacc7fbe1746da2bd1ed14d84856d5c5dd6f.zip
[LA64_DYNAREC] Add la64 avx load/store ops part 1. (#2766)
*  VEX.0f       VMOVNTPS/VLDMXCSR/VSTMXCSR
  *  VEX.66.0f    VMOVNTPD/VMOVD/VMOVNTDQ
  *  VEX.66.0f.3a VMOVNTDQA
  *  VEX.f2.0f    VMOVDDUP/VLDDQU
  *  VEX.f3.0f    VMOVSLDUP/VMOVSHDUP/VMOVD
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_0f.c52
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_66_0f.c48
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_66_0f3a.c22
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_f2_0f.c46
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_f3_0f.c38
5 files changed, 194 insertions, 12 deletions
diff --git a/src/dynarec/la64/dynarec_la64_avx_0f.c b/src/dynarec/la64/dynarec_la64_avx_0f.c
index ec260232..9c693fc3 100644
--- a/src/dynarec/la64/dynarec_la64_avx_0f.c
+++ b/src/dynarec/la64/dynarec_la64_avx_0f.c
@@ -141,6 +141,22 @@ uintptr_t dynarec64_AVX_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, in
                 SMWRITE2();
             }
             break;
+        case 0x2B:
+            INST_NAME("VMOVNTPS Ex, Gx");
+            nextop = F8;
+            GETGYxy(q0, 0);
+            if (MODREG) {
+                DEFAULT;
+            } else {
+                addr = geted(dyn, addr, ninst, nextop, &ed, x4, x5, &fixedaddress, rex, NULL, 1, 0);
+                if (vex.l) {
+                    XVST(q0, ed, fixedaddress);
+                } else {
+                    VST(q0, ed, fixedaddress);
+                }
+                SMWRITE2();
+            }
+            break;
         case 0x77:
             if (!vex.l) {
                 INST_NAME("VZEROUPPER");
@@ -150,9 +166,9 @@ uintptr_t dynarec64_AVX_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, in
                     q2 = fpu_get_scratch(dyn);
                     XVXOR_V(q2, q2, q2);
                     for (int i = 0; i < (rex.is32bits ? 8 : 16); ++i) {
-                        if (dyn->lsx.avxcache[i].v != -1 ){
+                        if (dyn->lsx.avxcache[i].v != -1) {
                             // avx used register
-                            if(dyn->lsx.avxcache[i].width == LSX_AVX_WIDTH_256) {
+                            if (dyn->lsx.avxcache[i].width == LSX_AVX_WIDTH_256) {
                                 // 256 width, fill upper 128bits with zero.
                                 q1 = avx_get_reg(dyn, ninst, x1, i, 1, LSX_AVX_WIDTH_256); // mark reg write (dirty)
                                 XVPERMI_Q(q1, q2, XVPERMI_IMM_4_0(0, 2));
@@ -161,7 +177,7 @@ uintptr_t dynarec64_AVX_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, in
                                 q1 = avx_get_reg(dyn, ninst, x1, i, 1, LSX_AVX_WIDTH_128); // mark reg write (dirty)
                                 dyn->lsx.avxcache[i].zero_upper = 1;
                             }
-                        }else {
+                        } else {
                             // SSE register or unused register, store 128bit zero to x64emu_t.ymm[]
                             VST(q2, xEmu, offsetof(x64emu_t, ymm[i]));
                         }
@@ -180,7 +196,35 @@ uintptr_t dynarec64_AVX_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, in
                 }
             }
             break;
-
+        case 0xAE:
+            nextop = F8;
+            if (MODREG) {
+                DEFAULT;
+            } else
+                switch ((nextop >> 3) & 7) {
+                    case 2:
+                        INST_NAME("VLDMXCSR Md");
+                        GETED(0);
+                        ST_W(ed, xEmu, offsetof(x64emu_t, mxcsr));
+                        if (BOX64ENV(sse_flushto0)) {
+                            // sync with fpsr, with mask from mxcsr
+                            // TODO
+                        }
+                        break;
+                    case 3:
+                        INST_NAME("VSTMXCSR Md");
+                        addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, NULL, 0, 0);
+                        LD_WU(x4, xEmu, offsetof(x64emu_t, mxcsr));
+                        ST_W(x4, wback, fixedaddress);
+                        if (BOX64ENV(sse_flushto0)) {
+                            // sync with fpsr, with mask from mxcsr
+                            // TODO
+                        }
+                        break;
+                    default:
+                        DEFAULT;
+                }
+            break;
         default:
             DEFAULT;
     }
diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f.c b/src/dynarec/la64/dynarec_la64_avx_66_0f.c
index f8c4963c..95004d73 100644
--- a/src/dynarec/la64/dynarec_la64_avx_66_0f.c
+++ b/src/dynarec/la64/dynarec_la64_avx_66_0f.c
@@ -141,6 +141,22 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 SMWRITE2();
             }
             break;
+        case 0x2B:
+            INST_NAME("VMOVNTPD Ex, Gx");
+            nextop = F8;
+            GETGYxy(q0, 0);
+            if (MODREG) {
+                DEFAULT;
+            } else {
+                addr = geted(dyn, addr, ninst, nextop, &ed, x4, x5, &fixedaddress, rex, NULL, 1, 0);
+                if (vex.l) {
+                    XVST(q0, ed, fixedaddress);
+                } else {
+                    VST(q0, ed, fixedaddress);
+                }
+                SMWRITE2();
+            }
+            break;
         case 0x6E:
             INST_NAME("VMOVD Gx, Ed");
             nextop = F8;
@@ -217,7 +233,37 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 SMWRITE2();
             }
             break;
-
+        case 0xD6:
+            INST_NAME("VMOVD Ex, Gx");
+            nextop = F8;
+            GETGYx(q0, 0);
+            if (MODREG) {
+                GETEYx_empty(q1, 0);
+                XVXOR_V(q1, q1, q1);
+                XVINSVE0_D(q1, q0, 0);
+                YMM_UNMARK_UPPER_ZERO(q1);
+            } else {
+                addr = geted(dyn, addr, ninst, nextop, &ed, x4, x5, &fixedaddress, rex, NULL, 1, 0);
+                FST_D(q0, ed, fixedaddress);
+                SMWRITE2();
+            }
+            break;
+        case 0xE7:
+            INST_NAME("VMOVNTDQ Ex, Gx");
+            nextop = F8;
+            GETGYxy(q0, 0);
+            if (MODREG) {
+                DEFAULT;
+            } else {
+                addr = geted(dyn, addr, ninst, nextop, &ed, x4, x5, &fixedaddress, rex, NULL, 1, 0);
+                if (vex.l) {
+                    XVST(q0, ed, fixedaddress);
+                } else {
+                    VST(q0, ed, fixedaddress);
+                }
+                SMWRITE2();
+            }
+            break;
         default:
             DEFAULT;
     }
diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c b/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c
index 1310cac2..74cf1759 100644
--- a/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c
+++ b/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c
@@ -68,7 +68,7 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i
             GETGY_empty_VYEY_xy(q0, q1, q2, 1);
             u8 = F8;
             XVOR_V(q0, q1, q1);
-            XVPERMI_Q(q0, q2, (u8&1) == 0 ? 0b00110000 : 0b00000010);
+            XVPERMI_Q(q0, q2, (u8 & 1) == 0 ? 0b00110000 : 0b00000010);
             break;
         case 0x19:
         case 0x39:
@@ -81,9 +81,9 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i
             GETEY_GY_xy(q1, q0, 1);
             u8 = F8;
             if (MODREG) {
-                XVPERMI_Q(q1, q0, (u8&1) == 0 ? XVPERMI_IMM_4_0(3, 0) : XVPERMI_IMM_4_0(3, 1));
+                XVPERMI_Q(q1, q0, (u8 & 1) == 0 ? XVPERMI_IMM_4_0(3, 0) : XVPERMI_IMM_4_0(3, 1));
             } else {
-                if ((u8&1) == 1) {
+                if ((u8 & 1) == 1) {
                     XVPERMI_Q(q1, q0, XVPERMI_IMM_4_0(3, 1));
                     VST(q1, ed, fixedaddress);
                 } else {
@@ -91,6 +91,22 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i
                 }
             }
             break;
+        case 0x2A:
+            INST_NAME("VMOVNTDQA Gx, Ex");
+            nextop = F8;
+            if (MODREG) {
+                DEFAULT;
+            } else {
+                GETGYxy_empty(q0);
+                SMREAD();
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 0, 0);
+                if (vex.l) {
+                    XVLD(q0, ed, fixedaddress);
+                } else {
+                    VLD(q0, ed, fixedaddress);
+                }
+            }
+            break;
         default:
             DEFAULT;
     }
diff --git a/src/dynarec/la64/dynarec_la64_avx_f2_0f.c b/src/dynarec/la64/dynarec_la64_avx_f2_0f.c
index 6d6f8209..f97e9890 100644
--- a/src/dynarec/la64/dynarec_la64_avx_f2_0f.c
+++ b/src/dynarec/la64/dynarec_la64_avx_f2_0f.c
@@ -69,7 +69,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 VEXTRINS_D(q0, q2, 0);
             } else {
                 GETEYSD(q2, 0, 0);
-                GETGYx_empty(q0);                
+                GETGYx_empty(q0);
                 XVXOR_V(q0, q0, q0);
                 XVINSVE0_D(q0, q2, 0);
                 YMM_UNMARK_UPPER_ZERO(q0);
@@ -95,7 +95,49 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 SMWRITE2();
             }
             break;
-
+        case 0x12:
+            INST_NAME("VMOVDDUP Gx, Ex");
+            nextop = F8;
+            if (MODREG) {
+                GETGY_empty_EY_xy(q0, q1, 0);
+            } else {
+                GETGYxy_empty(q0);
+                q1 = fpu_get_scratch(dyn);
+                SMREAD();
+                addr = geted(dyn, addr, ninst, nextop, &ed, x4, x5, &fixedaddress, rex, NULL, 0, 0);
+                if (vex.l) {
+                    XVLD(q1, ed, 0);
+                } else {
+                    VLDREPL_D(q0, ed, 0);
+                }
+            }
+            if (vex.l) {
+                XVSHUF4I_D(q0, q1, 0b1010);
+            } else if (MODREG) {
+                VREPLVE_D(q0, q1, 0);
+            }
+            break;
+        case 0xF0:
+            INST_NAME("VLDDQU Gx, Ex");
+            nextop = F8;
+            if (MODREG) {
+                GETGY_empty_EY_xy(q0, q1, 0);
+                if (vex.l) {
+                    XVOR_V(q0, q1, q1);
+                } else {
+                    VOR_V(q0, q1, q1);
+                }
+            } else {
+                GETGYxy_empty(q0);
+                SMREAD();
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                if (vex.l) {
+                    XVLD(q0, ed, fixedaddress);
+                } else {
+                    VLD(q0, ed, fixedaddress);
+                }
+            }
+            break;
         default:
             DEFAULT;
     }
diff --git a/src/dynarec/la64/dynarec_la64_avx_f3_0f.c b/src/dynarec/la64/dynarec_la64_avx_f3_0f.c
index d9ad1413..f6f4535d 100644
--- a/src/dynarec/la64/dynarec_la64_avx_f3_0f.c
+++ b/src/dynarec/la64/dynarec_la64_avx_f3_0f.c
@@ -50,8 +50,8 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
             if (MODREG) {
                 GETVYx(q1, 0);
                 GETEYSS(q2, 0, 0);
-                GETGYx_empty(q0);                
-                if ( !gd == vex.v) VOR_V(q0, q1, q1);
+                GETGYx_empty(q0);
+                if (!gd == vex.v) VOR_V(q0, q1, q1);
                 VEXTRINS_W(q0, q2, 0);
             } else {
                 GETEYSS(q2, 0, 0);
@@ -81,6 +81,26 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 SMWRITE2();
             }
             break;
+        case 0x12:
+            INST_NAME("VMOVSLDUP Gx, Ex");
+            nextop = F8;
+            GETGY_empty_EY_xy(q0, q1, 0);
+            if (vex.l) {
+                XVPACKEV_W(q0, q1, q1);
+            } else {
+                VPACKEV_W(q0, q1, q1);
+            }
+            break;
+        case 0x16:
+            INST_NAME("VMOVSHDUP Gx, Ex");
+            nextop = F8;
+            GETGY_empty_EY_xy(q0, q1, 0);
+            if (vex.l) {
+                XVPACKOD_W(q0, q1, q1);
+            } else {
+                VPACKOD_W(q0, q1, q1);
+            }
+            break;
         case 0x6F:
             INST_NAME("VMOVDQU Gx, Ex");
             nextop = F8;
@@ -102,6 +122,20 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 }
             }
             break;
+        case 0x7E:
+            INST_NAME("VMOVD Gx, Ex");
+            nextop = F8;
+            if (MODREG) {
+                GETEYx(q1, 0, 0);
+                GETGYx_empty(q0);
+            } else {
+                GETEYSD(q1, 0, 0);
+                GETGYx_empty(q0);
+            }
+            XVXOR_V(q0, q0, q0);
+            XVINSVE0_D(q0, q1, 0);
+            YMM_UNMARK_UPPER_ZERO(q0);
+            break;
         case 0x7F:
             INST_NAME("VMOVDQU Ex, Gx");
             nextop = F8;