about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorphorcys <phorcys@126.com>2025-08-04 14:57:32 +0800
committerGitHub <noreply@github.com>2025-08-04 08:57:32 +0200
commit0dc9f8cb62de2877b6f5d480c480b203c5831fa8 (patch)
treeeef978773f7edb94a9b5d90d6b5342fcdc59f4a6 /src
parentcb0b274c2704e5af3c118d30992d63c6f5dff6e8 (diff)
downloadbox64-0dc9f8cb62de2877b6f5d480c480b203c5831fa8.tar.gz
box64-0dc9f8cb62de2877b6f5d480c480b203c5831fa8.zip
[LA64_DYNAREC] Add la64 avx bit ops. (#2873)
*  VEX.66.0F.3A VPEXTRB/VPEXTRW/VPEXTRD/VPEXTRQ VPINSRB/VPINSRD/VPINSRQ
  *  VEX.66.0F.C5 VPEXTRW
  *  VEX.66.0f.C4 VPINSRW
  *  VEX.66.0F.38.41 VPHMINPOSUW
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_66_0f.c25
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_66_0f38.c21
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_66_0f3a.c85
-rw-r--r--src/dynarec/la64/la64_emitter.h2
4 files changed, 133 insertions, 0 deletions
diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f.c b/src/dynarec/la64/dynarec_la64_avx_66_0f.c
index 18379800..30b035a0 100644
--- a/src/dynarec/la64/dynarec_la64_avx_66_0f.c
+++ b/src/dynarec/la64/dynarec_la64_avx_66_0f.c
@@ -884,6 +884,31 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 case 0x0f: VSEQxy(B, v0, v1, v1); break;        // true
             }
             break;
+        case 0xC4:
+            INST_NAME("VPINSRW Gx, Vx, ED, Ib");
+            nextop = F8;
+            GETEWW(0, x5, 1);
+            GETVYx(v1, 0);
+            GETGYx_empty(v0);
+            u8 = F8;
+            if(v0 != v1) VOR_V(v0, v1, v1);
+            VINSGR2VR_H(v0, ed, (u8 & 0x7));
+            break;
+        case 0xC5:
+            INST_NAME("VPEXTRW Gd, Ex, Ib");
+            nextop = F8;
+            GETGD;
+            if (MODREG) {
+                GETEYx(v0, 0, 1);
+                u8 = (F8) & 7;
+                VPICKVE2GR_HU(gd, v0, u8);
+            } else {
+                SMREAD();
+                addr = geted(dyn, addr, ninst, nextop, &wback, x2, x4, &fixedaddress, rex, NULL, 0, 1);
+                u8 = (F8) & 7;
+                LD_HU(gd, wback, (u8 << 1));
+            }
+            break;
         case 0xC6:
             INST_NAME("VSHUFPD Gx, Vx, Ex, Ib");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f38.c b/src/dynarec/la64/dynarec_la64_avx_66_0f38.c
index 3017b7ae..1f5ebbc7 100644
--- a/src/dynarec/la64/dynarec_la64_avx_66_0f38.c
+++ b/src/dynarec/la64/dynarec_la64_avx_66_0f38.c
@@ -677,6 +677,27 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i
             GETGY_empty_VYEY_xy(v0, v1, v2, 0);
             VMULxy(W, v0, v1, v2);
             break;
+        case 0x41:
+            INST_NAME("VPHMINPOSUW Gx, Ex");
+            nextop = F8;
+            GETEYx(v1, 0, 0);
+            GETGYx_empty(v0);
+            q0 = fpu_get_scratch(dyn);
+            q1 = fpu_get_scratch(dyn);
+            q2 = fpu_get_scratch(dyn);
+                                           // v1[a,b,c,d,e,f,g,h]
+            VSHUF4I_W(q0, v1, 0b01001110); // q0[e,f,g,h,a,b,c,d]
+            VMIN_HU(q1, v1, q0);           // q1[ae,bf,cg,dh ...]
+            
+            VSHUF4I_H(q2, q1, 0b10110001); // q2[bf,ae,dh,cg ...]
+            VMIN_HU(q1, q1, q2);           // q1[aebf,aebf,cgdh,cgdh ...]
+            VSHUF4I_H(q0, q1, 0b01001110); // q0[cgdh,cgdh,aebf,aebf]
+            VMIN_HU(q2, q0, q1);           // all lane is min(abcdefgh)
+            VSEQ_H(q0, q2, v1);            // get mask(0xffff)
+            VFRSTPI_H(q2, q0, 1);          // find first neg(0xffff),insert index to q2
+            XVPICKVE_W(v0, q2, 0);
+            YMM_UNMARK_UPPER_ZERO(v0);
+            break;
         case 0x45:
             INST_NAME("VPSRLVD/Q Gx, Vx, Ex");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c b/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c
index dbd1bca4..9a90ce0f 100644
--- a/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c
+++ b/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c
@@ -386,6 +386,63 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i
                 }
             }
             break;
+        case 0x14:
+            INST_NAME("VPEXTRB Ed, Gx, imm8");
+            nextop = F8;
+            GETGYx(q0, 0);
+            if (MODREG) {
+                ed = TO_NAT((nextop & 7) + (rex.b << 3));
+                u8 = (F8) & 15;
+                VPICKVE2GR_BU(ed, q0, u8);
+            } else {
+                SMREAD();
+                addr = geted(dyn, addr, ninst, nextop, &wback, x2, x4, &fixedaddress, rex, NULL, 0, 1);
+                u8 = (F8) & 15;
+                VSTELM_B(q0, wback, 0, u8);
+            }
+            break;
+        case 0x15:
+            INST_NAME("VPEXTRW Ed, Gx, imm8");
+            nextop = F8;
+            GETGYx(q0, 0);
+            if (MODREG) {
+                ed = TO_NAT((nextop & 7) + (rex.b << 3));
+                u8 = (F8) & 7;
+                VPICKVE2GR_HU(ed, q0, u8);
+            } else {
+                SMREAD();
+                addr = geted(dyn, addr, ninst, nextop, &wback, x2, x4, &fixedaddress, rex, NULL, 0, 1);
+                u8 = (F8) & 7;
+                VSTELM_H(q0, wback, 0, u8);
+            }
+            break;
+        case 0x16:
+            if (rex.w) {
+                INST_NAME("VPEXTRQ Ed, Gx, Ib");
+            } else {
+                INST_NAME("VPEXTRD Ed, Gx, Ib");
+            }
+            nextop = F8;
+            GETGYx(q0, 0);
+            if (MODREG) {
+                ed = TO_NAT((nextop & 7) + (rex.b << 3));
+                u8 = F8;
+                if (rex.w) {
+                    VPICKVE2GR_D(ed, q0, (u8 & 1));
+                } else {
+                    VPICKVE2GR_WU(ed, q0, (u8 & 3));
+                }
+            } else {
+                addr = geted(dyn, addr, ninst, nextop, &ed, x3, x5, &fixedaddress, rex, NULL, 0, 1);
+                u8 = F8;
+                if (rex.w) {
+                    VSTELM_D(q0, ed, 0, (u8 & 1));
+                } else {
+                    VSTELM_W(q0, ed, 0, (u8 & 3));
+                }
+                SMWRITE2();
+            }
+            break;
         case 0x17:
             INST_NAME("VEXTRACTPS Ed, Gx, imm8");
             nextop = F8;
@@ -471,6 +528,16 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i
             }
             x87_restoreround(dyn, ninst, u8);
             break;
+        case 0x20:
+            INST_NAME("VPINSRB Gx, Vx, ED, Ib");
+            nextop = F8;
+            GETEB(x5, 1);
+            GETVYx(v1, 0);
+            GETGYx_empty(v0);
+            u8 = F8;
+            if(v0 != v1) VOR_V(v0, v1, v1);
+            VINSGR2VR_B(v0, ed, (u8 & 0xf));
+            break;
         case 0x21:
             INST_NAME("VINSERTPS Gx, Vx, Ex, Ib");
             nextop = F8;
@@ -506,6 +573,24 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i
                 }
             }
             break;
+        case 0x22:
+            if (rex.w) {
+                INST_NAME("VPINSRQ Gx, Vx, ED, Ib");
+            } else {
+                INST_NAME("VPINSRD Gx, Vx, ED, Ib");
+            }            
+            nextop = F8;
+            GETED(1);
+            GETVYx(v1, 0);
+            GETGYx_empty(v0);
+            u8 = F8;
+            if(v0 != v1) VOR_V(v0, v1, v1);
+            if(rex.w) {
+                VINSGR2VR_D(v0, ed, (u8 & 0x1));
+            } else {
+                VINSGR2VR_W(v0, ed, (u8 & 0x3));
+            }
+            break;
         case 0x2A:
             INST_NAME("VMOVNTDQA Gx, Ex");
             nextop = F8;
diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h
index 196126a7..85ad1fd7 100644
--- a/src/dynarec/la64/la64_emitter.h
+++ b/src/dynarec/la64/la64_emitter.h
@@ -1250,6 +1250,8 @@ LSX instruction starts with V, LASX instruction starts with XV.
 #define VBITREV_D(vd, vj, vk)        EMIT(type_3R(0b01110001000100011, vk, vj, vd))
 #define VFRSTP_B(vd, vj, vk)         EMIT(type_3R(0b01110001001010110, vk, vj, vd))
 #define VFRSTP_H(vd, vj, vk)         EMIT(type_3R(0b01110001001010111, vk, vj, vd))
+#define VFRSTPI_B(vd, vj, imm5)      EMIT(type_2RI5(0b01110010100110100, imm5, vj, vd))
+#define VFRSTPI_H(vd, vj, imm5)      EMIT(type_2RI5(0b01110010100110101, imm5, vj, vd))
 #define VFADD_S(vd, vj, vk)          EMIT(type_3R(0b01110001001100001, vk, vj, vd))
 #define VFADD_D(vd, vj, vk)          EMIT(type_3R(0b01110001001100010, vk, vj, vd))
 #define VFSUB_S(vd, vj, vk)          EMIT(type_3R(0b01110001001100101, vk, vj, vd))