about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2025-07-14 10:09:47 +0200
committerptitSeb <sebastien.chev@gmail.com>2025-07-14 10:09:47 +0200
commit179cba7a28e5fede829da7b8c7c78d5a37a05882 (patch)
treea826154a60c0ecbb65fba6da05a372c25b0aa16d /src
parent0a59321106f13817fa52614a401e18abe11e3f5b (diff)
parentce08e8e27f7fda2ff2c02af215f3b8e16d3f0576 (diff)
downloadbox64-179cba7a28e5fede829da7b8c7c78d5a37a05882.tar.gz
box64-179cba7a28e5fede829da7b8c7c78d5a37a05882.zip
Merge remote-tracking branch 'refs/remotes/origin/main'
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/la64/dynarec_la64_660f.c96
-rw-r--r--src/dynarec/la64/dynarec_la64_avx.c8
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_66_0f.c90
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_66_0f38.c53
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_f2_0f38.c73
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_f3_0f38.c73
-rw-r--r--src/dynarec/la64/dynarec_la64_helper.h4
-rw-r--r--src/dynarec/la64/la64_emitter.h74
8 files changed, 407 insertions, 64 deletions
diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c
index 4eb11299..e4fca3c4 100644
--- a/src/dynarec/la64/dynarec_la64_660f.c
+++ b/src/dynarec/la64/dynarec_la64_660f.c
@@ -2205,7 +2205,7 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             REVBxw(gd, gd);
             break;
         case 0xD0:
-            INST_NAME("ADDSUBPD Gx,Ex");
+            INST_NAME("ADDSUBPD Gx, Ex");
             nextop = F8;
             GETGX(q0, 1);
             GETEX(q1, 0, 0);
@@ -2215,7 +2215,7 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             VEXTRINS_D(q0, v0, 0);
             break;
         case 0xD1:
-            INST_NAME("PSRLW Gx,Ex");
+            INST_NAME("PSRLW Gx, Ex");
             nextop = F8;
             GETGX(q0, 1);
             GETEX(q1, 0, 0);
@@ -2234,16 +2234,14 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETEX(q1, 0, 0);
             v0 = fpu_get_scratch(dyn);
             v1 = fpu_get_scratch(dyn);
-            VSAT_DU(v0, q1, 31);
-            VREPLVEI_W(v0, v0, 0);
-            VLDI(v1, 0b1000000011111); // broadcast 31 as uint32
-            VSLT_WU(v1, v1, v0);
-            VMINI_WU(v0, v0, 31);
-            VSRL_W(q0, q0, v0);
+            VREPLVEI_D(v0, q1, 0);
+            VSLEI_DU(v0, v0, 31);
+            VREPLVEI_W(v1, q1, 0);
             VSRL_W(q0, q0, v1);
+            VAND_V(q0, q0, v0);
             break;
         case 0xD3:
-            INST_NAME("PSRLQ Gx,Ex");
+            INST_NAME("PSRLQ Gx, Ex");
             nextop = F8;
             GETGX(q0, 1);
             GETEX(q1, 0, 0);
@@ -2252,10 +2250,9 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             d0 = fpu_get_scratch(dyn);
             VREPLVEI_D(v0, q1, 0);
             VLDI(v1, 0b0110000111111); // broadcast 63 as uint64
-            VMIN_DU(d0, v0, v1);
-            VSLT_DU(v1, v1, v0);
-            VSRL_D(q0, q0, d0);
-            VSRL_D(q0, q0, v1);
+            VSLE_DU(v1, v0, v1);
+            VSRL_D(q0, q0, v0);
+            VAND_V(q0, q0, v1);
             break;
         case 0xD4:
             INST_NAME("PADDQ Gx, Ex");
@@ -2265,7 +2262,7 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             VADD_D(v0, v0, q0);
             break;
         case 0xD5:
-            INST_NAME("PMULLW Gx,Ex");
+            INST_NAME("PMULLW Gx, Ex");
             nextop = F8;
             GETGX(q0, 1);
             GETEX(q1, 0, 0);
@@ -2377,36 +2374,31 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             VSRA_H(q0, q0, v0);
             break;
         case 0xE2:
-            INST_NAME("PSRAD Gx,Ex");
+            INST_NAME("PSRAD Gx, Ex");
             nextop = F8;
             GETGX(q0, 1);
             GETEX(q1, 0, 0);
             v0 = fpu_get_scratch(dyn);
-            v1 = fpu_get_scratch(dyn);
-            v2 = fpu_get_scratch(dyn);
-            VREPLVEI_D(v0, q1, 0);
-            VSLEI_DU(v1, v0, 31);
-            VREPLVEI_W(v0, q1, 0);
-            VSRAI_W(v2, q0, 31);
+            VMINI_DU(v0, q1, 31);
+            VREPLVEI_W(v0, v0, 0);
             VSRA_W(q0, q0, v0);
-            VBITSEL_V(q0, v2, q0, v1);
             break;
         case 0xE3:
-            INST_NAME("PAVGW Gx,Ex");
+            INST_NAME("PAVGW Gx, Ex");
             nextop = F8;
             GETGX(v0, 1);
             GETEX(v1, 0, 0);
             VAVGR_HU(v0, v0, v1);
             break;
         case 0xE4:
-            INST_NAME("PMULHUW Gx,Ex");
+            INST_NAME("PMULHUW Gx, Ex");
             nextop = F8;
             GETGX(v0, 1);
             GETEX(v1, 0, 0);
             VMUH_HU(v0, v0, v1);
             break;
         case 0xE5:
-            INST_NAME("PMULHW Gx,Ex");
+            INST_NAME("PMULHW Gx, Ex");
             nextop = F8;
             GETGX(v0, 1);
             GETEX(v1, 0, 0);
@@ -2434,56 +2426,56 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             }
             break;
         case 0xE8:
-            INST_NAME("PSUBSB Gx,Ex");
+            INST_NAME("PSUBSB Gx, Ex");
             nextop = F8;
             GETGX(v0, 1);
             GETEX(q0, 0, 0);
             VSSUB_B(v0, v0, q0);
             break;
         case 0xE9:
-            INST_NAME("PSUBSW Gx,Ex");
+            INST_NAME("PSUBSW Gx, Ex");
             nextop = F8;
             GETGX(v0, 1);
             GETEX(q0, 0, 0);
             VSSUB_H(v0, v0, q0);
             break;
         case 0xEA:
-            INST_NAME("PMINSW Gx,Ex");
+            INST_NAME("PMINSW Gx, Ex");
             nextop = F8;
             GETGX(v0, 1);
             GETEX(q0, 0, 0);
             VMIN_H(v0, v0, q0);
             break;
         case 0xEB:
-            INST_NAME("POR Gx,Ex");
+            INST_NAME("POR Gx, Ex");
             nextop = F8;
             GETGX(v0, 1);
             GETEX(q0, 0, 0);
             VOR_V(v0, v0, q0);
             break;
         case 0xEC:
-            INST_NAME("PADDSB Gx,Ex");
+            INST_NAME("PADDSB Gx, Ex");
             nextop = F8;
             GETGX(v0, 1);
             GETEX(q0, 0, 0);
             VSADD_B(v0, v0, q0);
             break;
         case 0xED:
-            INST_NAME("PADDSW Gx,Ex");
+            INST_NAME("PADDSW Gx, Ex");
             nextop = F8;
             GETGX(v0, 1);
             GETEX(q0, 0, 0);
             VSADD_H(v0, v0, q0);
             break;
         case 0xEE:
-            INST_NAME("PMAXSW Gx,Ex");
+            INST_NAME("PMAXSW Gx, Ex");
             nextop = F8;
             GETGX(v0, 1);
             GETEX(q0, 0, 0);
             VMAX_H(v0, v0, q0);
             break;
         case 0xEF:
-            INST_NAME("PXOR Gx,Ex");
+            INST_NAME("PXOR Gx, Ex");
             nextop = F8;
             GETG;
             if (MODREG && ((nextop & 7) + (rex.b << 3) == gd)) {
@@ -2503,15 +2495,11 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETEX(q1, 0, 0);
             v0 = fpu_get_scratch(dyn);
             v1 = fpu_get_scratch(dyn);
-            VREPLVE_H(v1, q1, xZR);
-            VPICKVE2GR_DU(x4, q1, 0);
-            SLTUI(x3, x4, 16);
-            SUB_D(x3, xZR, x3);
-            NOR(x3, x3, xZR);
-            VREPLGR2VR_D(v0, x3);
+            VREPLVEI_D(v0, q1, 0);
+            VSLEI_DU(v0, v0, 15);
+            VREPLVEI_H(v1, q1, 0);
             VSLL_H(q0, q0, v1);
-            VAND_V(v0, q0, v0);
-            VXOR_V(q0, q0, v0);
+            VAND_V(q0, q0, v0);
             break;
         case 0xF2:
             INST_NAME("PSLLD Gx, Ex");
@@ -2520,15 +2508,11 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETEX(q1, 0, 0);
             v0 = fpu_get_scratch(dyn);
             v1 = fpu_get_scratch(dyn);
-            VREPLVE_W(v1, q1, xZR);
-            VPICKVE2GR_DU(x4, q1, 0);
-            SLTUI(x3, x4, 32);
-            SUB_D(x3, xZR, x3);
-            NOR(x3, x3, xZR);
-            VREPLGR2VR_D(v0, x3);
+            VREPLVEI_D(v0, q1, 0);
+            VSLEI_DU(v0, v0, 31);
+            VREPLVEI_W(v1, q1, 0);
             VSLL_W(q0, q0, v1);
-            VAND_V(v0, q0, v0);
-            VXOR_V(q0, q0, v0);
+            VAND_V(q0, q0, v0);
             break;
         case 0xF3:
             INST_NAME("PSLLQ Gx, Ex");
@@ -2537,15 +2521,11 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETEX(q1, 0, 0);
             v0 = fpu_get_scratch(dyn);
             v1 = fpu_get_scratch(dyn);
-            VREPLVE_D(v1, q1, xZR);
-            VPICKVE2GR_DU(x4, q1, 0);
-            SLTUI(x3, x4, 64);
-            SUB_D(x3, xZR, x3);
-            NOR(x3, x3, xZR);
-            VREPLGR2VR_D(v0, x3);
-            VSLL_D(q0, q0, v1);
-            VAND_V(v0, q0, v0);
-            VXOR_V(q0, q0, v0);
+            VREPLVEI_D(v0, q1, 0);
+            VLDI(v1, (0b011 << 10) | 0x3f);
+            VSLEI_DU(v1, v0, v1);
+            VSLL_D(q0, q0, v0);
+            VAND_V(q0, q0, v1);
             break;
         case 0xF4:
             INST_NAME("PMULUDQ Gx,Ex");
diff --git a/src/dynarec/la64/dynarec_la64_avx.c b/src/dynarec/la64/dynarec_la64_avx.c
index 12e00db2..f395c80e 100644
--- a/src/dynarec/la64/dynarec_la64_avx.c
+++ b/src/dynarec/la64/dynarec_la64_avx.c
@@ -63,12 +63,12 @@ uintptr_t dynarec64_AVX(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int n
         addr = dynarec64_AVX_66_0F38(dyn, addr, ip, ninst, vex, ok, need_epilog);
     else if ((vex.m == VEX_M_0F3A) && (vex.p == VEX_P_66))
         addr = dynarec64_AVX_66_0F3A(dyn, addr, ip, ninst, vex, ok, need_epilog);
-    // else if( (vex.m==VEX_M_0F38) && (vex.p==VEX_P_F2))
-    //     addr = dynarec64_AVX_F2_0F38(dyn, addr, ip, ninst, vex, ok, need_epilog);
+    else if( (vex.m == VEX_M_0F38) && (vex.p == VEX_P_F2))
+        addr = dynarec64_AVX_F2_0F38(dyn, addr, ip, ninst, vex, ok, need_epilog);
     else if ((vex.m == VEX_M_0F3A) && (vex.p == VEX_P_F2))
         addr = dynarec64_AVX_F2_0F3A(dyn, addr, ip, ninst, vex, ok, need_epilog);
-    // else if( (vex.m==VEX_M_0F38) && (vex.p==VEX_P_F3))
-    //     addr = dynarec64_AVX_F3_0F38(dyn, addr, ip, ninst, vex, ok, need_epilog);
+    else if( (vex.m==VEX_M_0F38) && (vex.p==VEX_P_F3))
+        addr = dynarec64_AVX_F3_0F38(dyn, addr, ip, ninst, vex, ok, need_epilog);
     else {
         DEFAULT;
     }
diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f.c b/src/dynarec/la64/dynarec_la64_avx_66_0f.c
index 0c707a28..26b6f684 100644
--- a/src/dynarec/la64/dynarec_la64_avx_66_0f.c
+++ b/src/dynarec/la64/dynarec_la64_avx_66_0f.c
@@ -457,6 +457,42 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 SMWRITE2();
             }
             break;
+        case 0xD1:
+            INST_NAME("VPSRLW Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            q0 = fpu_get_scratch(dyn);
+            d0 = fpu_get_scratch(dyn);
+            VREPLVE0xy(D, q0, v2);
+            VREPLVE0xy(H, d0, v2);
+            VSLEIxy(DU, q0, q0, 15);
+            VSRLxy(H, v0, v1, d0);
+            VAND_Vxy(v0, v0, q0);
+            break;
+        case 0xD2:
+            INST_NAME("VPSRLD Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            q0 = fpu_get_scratch(dyn);
+            d0 = fpu_get_scratch(dyn);
+            VREPLVE0xy(D, q0, v2);
+            VREPLVE0xy(W, d0, v2);
+            VSLEIxy(DU, q0, q0, 31);
+            VSRLxy(W, v0, v1, d0);
+            VAND_Vxy(v0, v0, q0);
+            break;
+        case 0xD3:
+            INST_NAME("VPSRLQ Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            q0 = fpu_get_scratch(dyn);
+            d0 = fpu_get_scratch(dyn);
+            VREPLVE0xy(D, q0 ,v2);
+            VLDIxy(d0, (0b011 << 10) | 0x3f);
+            VSLExy(DU, d0, q0, d0);
+            VSRLxy(D, v0, v1, q0);
+            VAND_Vxy(v0, v0, d0);
+            break;
         case 0xD6:
             INST_NAME("VMOVD Ex, Gx");
             nextop = F8;
@@ -499,6 +535,24 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
             GETGY_empty_VYEY_xy(v0, v1, v2, 0);
             VANDN_Vxy(v0, v1, v2);
             break;
+        case 0xE1:
+            INST_NAME("VPSRAW Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            d0 = fpu_get_scratch(dyn);
+            VMINIxy(DU, d0, v2, 15);
+            VREPLVE0xy(H, d0, d0);
+            VSRAxy(H, v0, v1, d0);            
+            break;
+        case 0xE2:
+            INST_NAME("VPSRAD Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            d0 = fpu_get_scratch(dyn);
+            VMINIxy(DU, d0, v2, 31);
+            VREPLVE0xy(W, d0, d0);
+            VSRAxy(W, v0, v1, d0);            
+            break;
         case 0xE7:
             INST_NAME("VMOVNTDQ Ex, Gx");
             nextop = F8;
@@ -527,6 +581,42 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
             GETGY_empty_VYEY_xy(v0, v1, v2, 0);
             VXOR_Vxy(v0, v1, v2);
             break;
+        case 0xF1:
+            INST_NAME("VPSLLW Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            q0 = fpu_get_scratch(dyn);
+            d0 = fpu_get_scratch(dyn);
+            VREPLVE0xy(D, q0, v2);
+            VSLEIxy(DU, q0, q0, 15);
+            VREPLVE0xy(H, d0, v2);
+            VSLLxy(H, v0, v1, d0);
+            VAND_Vxy(v0, v0, q0);
+            break;
+        case 0xF2:
+            INST_NAME("VPSLLD Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            q0 = fpu_get_scratch(dyn);
+            d0 = fpu_get_scratch(dyn);
+            VREPLVE0xy(D, q0, v2);
+            VSLEIxy(DU, q0, q0, 31);
+            VREPLVE0xy(W, d0, v2);
+            VSLLxy(W, v0, v1, d0);
+            VAND_Vxy(v0, v0, q0);
+            break;
+        case 0xF3:
+            INST_NAME("VPSLLQ Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            q0 = fpu_get_scratch(dyn);
+            d0 = fpu_get_scratch(dyn);
+            VREPLVE0xy(D, q0, v2);
+            VLDIxy(d0, (0b011 << 10) | 0x3f);
+            VSLExy(DU, d0, q0, d0);
+            VSLLxy(D, v0, v1, q0);
+            VAND_Vxy(v0, v0, d0);
+            break;
         case 0xF7:
             INST_NAME("VMASKMOVDQU Gx, Ex");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f38.c b/src/dynarec/la64/dynarec_la64_avx_66_0f38.c
index 48d4e4e1..1e561b67 100644
--- a/src/dynarec/la64/dynarec_la64_avx_66_0f38.c
+++ b/src/dynarec/la64/dynarec_la64_avx_66_0f38.c
@@ -27,7 +27,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i
 
     uint8_t opcode = F8;
     uint8_t nextop, u8;
-    uint8_t gd, ed;
+    uint8_t gd, ed, vd;
     uint8_t wback, wb1, wb2;
     uint8_t eb1, eb2, gb1, gb2;
     int32_t i32, i32_;
@@ -307,6 +307,48 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i
                 VSLLWIL_DU_WU(q0, q1, 0);
             }
             break;
+        case 0x45:
+            INST_NAME("VPSRLVD/Q Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            d1 = fpu_get_scratch(dyn);
+            if (rex.w) {
+                d0 = fpu_get_scratch(dyn);
+                VLDIxy(d0, (0b011 << 10) | 63);
+                VSLExy(DU, d1, v2, d0);
+                VSRLxy(D, v0, v1, v2);
+                VAND_Vxy(v0, v0, d1);
+            } else {
+                VSLEIxy(WU, d1, v2, 31);
+                VSRLxy(W, v0, v1, v2);
+                VAND_Vxy(v0, v0, d1);
+            }
+            break;
+        case 0x46:
+            INST_NAME("VPSRAVD Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            d0 = fpu_get_scratch(dyn);
+            VMINIxy(WU, d0, v2, 31);
+            VSRAxy(W, v0, v1, d0);
+            break;
+        case 0x47:
+            INST_NAME("VPSLLVD/Q Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            d1 = fpu_get_scratch(dyn);
+            if (rex.w) {
+                d0 = fpu_get_scratch(dyn);
+                VLDIxy(d0, (0b011 << 10) | 63);
+                VSLExy(DU, d1, v2, d0);
+                VSLLxy(D, v0, v1, v2);
+                VAND_Vxy(v0, v0, d1);
+            } else {
+                VSLEIxy(WU, d1, v2, 31);
+                VSLLxy(W, v0, v1, v2);
+                VAND_Vxy(v0, v0, d1);
+            }
+            break;
         case 0x8C:
             INST_NAME("VPMASKMOVD/Q Gx, Vx, Ex");
             nextop = F8;
@@ -354,6 +396,15 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i
                 PUTEYx(v0);
             }
             break;
+        case 0xF7:
+            INST_NAME("SHLX Gd, Ed, Vd");
+            nextop = F8;
+            GETGD;
+            GETED(0);
+            GETVD;
+            ANDI(x5, vd, rex.w ? 0x3f : 0x1f);
+            SLLxw(gd, ed, x5);
+            break;
         default:
             DEFAULT;
     }
diff --git a/src/dynarec/la64/dynarec_la64_avx_f2_0f38.c b/src/dynarec/la64/dynarec_la64_avx_f2_0f38.c
new file mode 100644
index 00000000..f8a29542
--- /dev/null
+++ b/src/dynarec/la64/dynarec_la64_avx_f2_0f38.c
@@ -0,0 +1,73 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <errno.h>
+
+#include "debug.h"
+#include "env.h"
+#include "box64context.h"
+#include "box64cpu.h"
+#include "emu/x64emu_private.h"
+#include "x64emu.h"
+#include "box64stack.h"
+#include "callback.h"
+#include "emu/x64run_private.h"
+#include "x64trace.h"
+#include "dynarec_native.h"
+
+#include "la64_printer.h"
+#include "dynarec_la64_private.h"
+#include "dynarec_la64_functions.h"
+#include "../dynarec_helper.h"
+
+uintptr_t dynarec64_AVX_F2_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog)
+{
+    (void)ip;
+    (void)need_epilog;
+
+    uint8_t opcode = F8;
+    uint8_t nextop, u8;
+    uint8_t gd, ed, vd;
+    uint8_t wback, wb1, wb2;
+    uint8_t eb1, eb2, gb1, gb2;
+    int32_t i32, i32_;
+    int cacheupd = 0;
+    int v0, v1, v2;
+    int q0, q1, q2;
+    int d0, d1, d2;
+    int s0;
+    uint64_t tmp64u;
+    int64_t j64;
+    int64_t fixedaddress;
+    int unscaled;
+    MAYUSE(wb1);
+    MAYUSE(wb2);
+    MAYUSE(eb1);
+    MAYUSE(eb2);
+    MAYUSE(gb1);
+    MAYUSE(gb2);
+    MAYUSE(q0);
+    MAYUSE(q1);
+    MAYUSE(d0);
+    MAYUSE(d1);
+    MAYUSE(s0);
+    MAYUSE(j64);
+    MAYUSE(cacheupd);
+
+    rex_t rex = vex.rex;
+
+    switch (opcode) {
+        case 0xF7:
+            INST_NAME("SHRX Gd, Ed, Vd");
+            nextop = F8;
+            GETGD;
+            GETED(0);
+            GETVD;
+            ANDI(x5, vd, rex.w ? 0x3f : 0x1f);
+            SRLxw(gd, ed, x5);
+            break;
+        default:
+            DEFAULT;
+    }
+    return addr;
+}
diff --git a/src/dynarec/la64/dynarec_la64_avx_f3_0f38.c b/src/dynarec/la64/dynarec_la64_avx_f3_0f38.c
new file mode 100644
index 00000000..8e8e6781
--- /dev/null
+++ b/src/dynarec/la64/dynarec_la64_avx_f3_0f38.c
@@ -0,0 +1,73 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <errno.h>
+
+#include "debug.h"
+#include "env.h"
+#include "box64context.h"
+#include "box64cpu.h"
+#include "emu/x64emu_private.h"
+#include "x64emu.h"
+#include "box64stack.h"
+#include "callback.h"
+#include "emu/x64run_private.h"
+#include "x64trace.h"
+#include "dynarec_native.h"
+
+#include "la64_printer.h"
+#include "dynarec_la64_private.h"
+#include "dynarec_la64_functions.h"
+#include "../dynarec_helper.h"
+
+uintptr_t dynarec64_AVX_F3_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog)
+{
+    (void)ip;
+    (void)need_epilog;
+
+    uint8_t opcode = F8;
+    uint8_t nextop, u8;
+    uint8_t gd, ed, vd;
+    uint8_t wback, wb1, wb2;
+    uint8_t eb1, eb2, gb1, gb2;
+    int32_t i32, i32_;
+    int cacheupd = 0;
+    int v0, v1, v2;
+    int q0, q1, q2;
+    int d0, d1, d2;
+    int s0;
+    uint64_t tmp64u;
+    int64_t j64;
+    int64_t fixedaddress;
+    int unscaled;
+    MAYUSE(wb1);
+    MAYUSE(wb2);
+    MAYUSE(eb1);
+    MAYUSE(eb2);
+    MAYUSE(gb1);
+    MAYUSE(gb2);
+    MAYUSE(q0);
+    MAYUSE(q1);
+    MAYUSE(d0);
+    MAYUSE(d1);
+    MAYUSE(s0);
+    MAYUSE(j64);
+    MAYUSE(cacheupd);
+
+    rex_t rex = vex.rex;
+
+    switch (opcode) {
+        case 0xF7:
+            INST_NAME("SARX Gd, Ed, Vd");
+            nextop = F8;
+            GETGD;
+            GETED(0);
+            GETVD;
+            ANDI(x5, vd, rex.w ? 0x3f : 0x1f);
+            SRAxw(gd, ed, x5);
+            break;
+        default:
+            DEFAULT;
+    }
+    return addr;
+}
diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h
index b6b646a8..acd22031 100644
--- a/src/dynarec/la64/dynarec_la64_helper.h
+++ b/src/dynarec/la64/dynarec_la64_helper.h
@@ -1100,8 +1100,10 @@
 #define dynarec64_AVX_66_0F38 STEPNAME(dynarec64_AVX_66_0F38)
 #define dynarec64_AVX_66_0F3A STEPNAME(dynarec64_AVX_66_0F3A)
 #define dynarec64_AVX_F2_0F   STEPNAME(dynarec64_AVX_F2_0F)
+#define dynarec64_AVX_F2_0F38 STEPNAME(dynarec64_AVX_F2_0F38)
 #define dynarec64_AVX_F2_0F3A STEPNAME(dynarec64_AVX_F2_0F3A)
 #define dynarec64_AVX_F3_0F   STEPNAME(dynarec64_AVX_F3_0F)
+#define dynarec64_AVX_F3_0F38 STEPNAME(dynarec64_AVX_F3_0F38)
 
 #define geted               STEPNAME(geted)
 #define geted32             STEPNAME(geted32)
@@ -1392,8 +1394,10 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
 uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
 uintptr_t dynarec64_AVX_66_0F3A(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
 uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
+uintptr_t dynarec64_AVX_F2_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
 uintptr_t dynarec64_AVX_F2_0F3A(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
 uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
+uintptr_t dynarec64_AVX_F3_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
 
 
 #if STEP < 3
diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h
index 0472481b..692b8fc8 100644
--- a/src/dynarec/la64/la64_emitter.h
+++ b/src/dynarec/la64/la64_emitter.h
@@ -1530,7 +1530,6 @@ LSX instruction starts with V, LASX instruction starts with XV.
 #define VFNMADD_D(vd, vj, vk, va)    EMIT(type_4R(0b000010011010, va, vk, vj, vd))
 #define VFNMSUB_D(vd, vj, vk, va)    EMIT(type_4R(0b000010011110, va, vk, vj, vd))
 
-
 #define XVADD_B(vd, vj, vk)          EMIT(type_3R(0b01110100000010100, vk, vj, vd))
 #define XVADD_H(vd, vj, vk)          EMIT(type_3R(0b01110100000010101, vk, vj, vd))
 #define XVADD_W(vd, vj, vk)          EMIT(type_3R(0b01110100000010110, vk, vj, vd))
@@ -2239,6 +2238,7 @@ LSX instruction starts with V, LASX instruction starts with XV.
 #define XVMINI_DU(xd, xj, imm5)      EMIT(type_2RI5(0b01110110100101111, imm5, xj, xd))
 #define XVFRSTPI_B(xd, xj, imm5)     EMIT(type_2RI5(0b01110110100110100, imm5, xj, xd))
 #define XVFRSTPI_H(xd, xj, imm5)     EMIT(type_2RI5(0b01110110100110101, imm5, xj, xd))
+#define XVLDI(xd, imm13)             EMIT(type_1RI13(0b01110111111000, imm13, xd))
 
 #define XVFMADD_S(xd, xj, xk, xa)  EMIT(type_4R(0b000010100001, xa, xk, xj, xd))
 #define XVFMSUB_S(xd, xj, xk, xa)  EMIT(type_4R(0b000010100101, xa, xk, xj, xd))
@@ -2702,4 +2702,76 @@ LSX instruction starts with V, LASX instruction starts with XV.
             VSRAI_##width(vd, vj, imm);  \
         }                                \
     } while (0)
+
+#define VSLLxy(width, vd, vj, vk)      \
+    do {                               \
+        if (vex.l) {                   \
+            XVSLL_##width(vd, vj, vk); \
+        } else {                       \
+            VSLL_##width(vd, vj, vk);  \
+        }                              \
+    } while (0)
+
+#define VSRLxy(width, vd, vj, vk)      \
+    do {                               \
+        if (vex.l) {                   \
+            XVSRL_##width(vd, vj, vk); \
+        } else {                       \
+            VSRL_##width(vd, vj, vk);  \
+        }                              \
+    } while (0)
+
+#define VSRAxy(width, vd, vj, vk)      \
+    do {                               \
+        if (vex.l) {                   \
+            XVSRA_##width(vd, vj, vk); \
+        } else {                       \
+            VSRA_##width(vd, vj, vk);  \
+        }                              \
+    } while (0)
+
+#define VSLEIxy(width, vd, vj, imm)      \
+    do {                                 \
+        if (vex.l) {                     \
+            XVSLEI_##width(vd, vj, imm); \
+        } else {                         \
+            VSLEI_##width(vd, vj, imm);  \
+        }                                \
+    } while (0)
+
+#define VSLExy(width, vd, vj, vk)      \
+    do {                               \
+        if (vex.l) {                   \
+            XVSLE_##width(vd, vj, vk); \
+        } else {                       \
+            VSLE_##width(vd, vj, vk);  \
+        }                              \
+    } while (0)
+
+#define VLDIxy(vd, imm)     \
+    do {                    \
+        if (vex.l) {        \
+            XVLDI(vd, imm); \
+        } else {            \
+            VLDI(vd, imm);  \
+        }                   \
+    } while (0)
+
+#define VREPLVE0xy(width, vd, vj)          \
+    do {                                 \
+        if (vex.l) {                     \
+            XVREPLVE0_##width(vd, vj);   \
+        } else {                         \
+            VREPLVEI_##width(vd, vj, 0); \
+        }                                \
+    } while (0)
+
+#define VMINIxy(width, vd, vj, imm)      \
+    do {                                 \
+        if (vex.l) {                     \
+            XVMINI_##width(vd, vj, imm); \
+        } else {                         \
+            VMINI_##width(vd, vj, imm);  \
+        }                                \
+    } while (0)
 #endif //__ARM64_EMITTER_H__