about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorphorcys <phorcys@126.com>2025-07-23 17:01:12 +0800
committerGitHub <noreply@github.com>2025-07-23 11:01:12 +0200
commite1c1303d285287a9d27af0ea9a82c0673a8e744b (patch)
treefe1bcf39dd29579fe60cae3453489ab09c498ccc /src
parent4a8a3736622a559dee709fe4769cc64704f5b69e (diff)
downloadbox64-e1c1303d285287a9d27af0ea9a82c0673a8e744b.tar.gz
box64-e1c1303d285287a9d27af0ea9a82c0673a8e744b.zip
[LA64_DYNAREC] Add la64 avx float ops VDPP{S,D}, VH{ADD,SUB}{PS,PD} (#2842)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_66_0f.c42
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_66_0f3a.c50
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_f2_0f.c42
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_f3_0f.c8
-rw-r--r--src/dynarec/la64/la64_emitter.h21
-rw-r--r--src/dynarec/la64/la64_printer.c32
6 files changed, 191 insertions, 4 deletions
diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f.c b/src/dynarec/la64/dynarec_la64_avx_66_0f.c
index d7e2ecf5..4217a713 100644
--- a/src/dynarec/la64/dynarec_la64_avx_66_0f.c
+++ b/src/dynarec/la64/dynarec_la64_avx_66_0f.c
@@ -603,6 +603,48 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                     DEFAULT;
             }
             break;
+        case 0x7C:
+            INST_NAME("VHADDPD Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            q0 = fpu_get_scratch(dyn);
+            VPICKEVxy(D, q0, v2, v1);
+            VPICKODxy(D, v0, v2, v1);
+            if (!BOX64ENV(dynarec_fastnan)) {
+                d0 = fpu_get_scratch(dyn);
+                d1 = fpu_get_scratch(dyn);
+                VFCMPxy(D, d0, q0, v0, cUN);
+            }
+            VFADDxy(D, v0, q0, v0);
+            if (!BOX64ENV(dynarec_fastnan)) {
+                VFCMPxy(D, d1, v0, v0, cUN);
+                VANDN_Vxy(d0, d0, d1);
+                VLDIxy(d1, (0b011 << 9) | 0b111111000);
+                VSLLIxy(D, d1, d1, 48); // broadcast 0xfff8000000000000
+                VBITSEL_Vxy(v0, v0, d1, d0);
+            }
+            break;
+        case 0x7D:
+            INST_NAME("VHSUBPD Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            q0 = fpu_get_scratch(dyn);
+            VPICKEVxy(D, q0, v2, v1);
+            VPICKODxy(D, v0, v2, v1);
+            if (!BOX64ENV(dynarec_fastnan)) {
+                d0 = fpu_get_scratch(dyn);
+                d1 = fpu_get_scratch(dyn);
+                VFCMPxy(D, d0, q0, v0, cUN);
+            }
+            VFSUBxy(D, v0, q0, v0);
+            if (!BOX64ENV(dynarec_fastnan)) {
+                VFCMPxy(D, d1, v0, v0, cUN);
+                VANDN_Vxy(d0, d0, d1);
+                VLDIxy(d1, (0b011 << 9) | 0b111111000);
+                VSLLIxy(D, d1, d1, 48); // broadcast 0xfff8000000000000
+                VBITSEL_Vxy(v0, v0, d1, d0);
+            }
+            break;
         case 0x7E:
             INST_NAME("VMOVD Ed, Gx");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c b/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c
index fedc6ec8..beba561f 100644
--- a/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c
+++ b/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c
@@ -404,6 +404,56 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i
                 }
             }
             break;
+        case 0x40:
+            INST_NAME("VDPPS Gx, Vx, Ex, Ib");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 1);
+            u8 = F8;
+            d0 = fpu_get_scratch(dyn);
+            d1 = fpu_get_scratch(dyn);
+            d2 = fpu_get_scratch(dyn);
+            VFMULxy(S, d0, v1, v2);
+            VXOR_Vxy(d2, d2, d2);
+            for (int i = 0; i < 4; ++i) {
+                if (!(u8 & (1 << (4 + i)))) {
+                    VEXTRINSxy(W, d0, d2, (i << 4));
+                }
+            }
+            VSHUF4Ixy(W, d1, d0, 0b10110001); // v0[a,b,c,d] v1[b,a,d,c]
+            VFADDxy(S, d0, d0, d1);           // v0[ab,ba,cd,dc]
+            VSHUF4Ixy(W, d1, d0, 0b01001110); // v1[cd,dc,ab,ba]
+            VFADDxy(S, d0, d0, d1);           // v0[abcd,badc,cdab,dcba]
+            VREPLVEIxy(W, v0, d0, 0);
+            for (int i = 0; i < 4; ++i) {
+                if (!(u8 & (1 << i))) {
+                    VEXTRINSxy(W, v0, d2, (i << 4));
+                }
+            }
+            break;
+        case 0x41:
+            INST_NAME("VDPPD Gx, Vx, Ex, Ib");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 1);
+            u8 = F8;
+            d0 = fpu_get_scratch(dyn);
+            d1 = fpu_get_scratch(dyn);
+            d2 = fpu_get_scratch(dyn);
+            VFMULxy(D, d0, v1, v2);
+            VXOR_Vxy(d2, d2, d2);
+            for (int i = 0; i < 2; ++i) {
+                if (!(u8 & (1 << (4 + i)))) {
+                    VEXTRINSxy(D, d0, d2, (i << 4));
+                }
+            }
+            VSHUF4Ixy(W, d1, d0, 0b01001110); // v0[a,b] v1[b,a]
+            VFADDxy(D, d0, d0, d1);           // v0[ab,ba]
+            VREPLVEIxy(D, v0, d0, 0);
+            for (int i = 0; i < 2; ++i) {
+                if (!(u8 & (1 << i))) {
+                    VEXTRINSxy(D, v0, d2, (i << 4));
+                }
+            }
+            break;
         case 0x42:
             INST_NAME("VMPSADBW Gx, Vx, Ex, Ib");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_avx_f2_0f.c b/src/dynarec/la64/dynarec_la64_avx_f2_0f.c
index 446ed244..2f492c85 100644
--- a/src/dynarec/la64/dynarec_la64_avx_f2_0f.c
+++ b/src/dynarec/la64/dynarec_la64_avx_f2_0f.c
@@ -206,6 +206,48 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 VEXTRINSxy(D, v0, d0, VEXTRINS_IMM_4_0(0, 0));
             }
             break;
+        case 0x7C:
+            INST_NAME("VHADDPS Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            q0 = fpu_get_scratch(dyn);
+            VPICKEVxy(W, q0, v2, v1);
+            VPICKODxy(W, v0, v2, v1);
+            if (!BOX64ENV(dynarec_fastnan)) {
+                d0 = fpu_get_scratch(dyn);
+                d1 = fpu_get_scratch(dyn);
+                VFCMPxy(S, d0, q0, v0, cUN);
+            }
+            VFADDxy(S, v0, q0, v0);
+            if (!BOX64ENV(dynarec_fastnan)) {
+                VFCMPxy(S, d1, v0, v0, cUN);
+                VANDN_Vxy(d0, d0, d1);
+                VLDIxy(d1, (0b010 << 9) | 0b1111111100);
+                VSLLIxy(W, d1, d1, 20); // broadcast 0xFFC00000
+                VBITSEL_Vxy(v0, v0, d1, d0);
+            }
+            break;
+        case 0x7D:
+            INST_NAME("VHSUBPS Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            q0 = fpu_get_scratch(dyn);
+            VPICKEVxy(W, q0, v2, v1);
+            VPICKODxy(W, v0, v2, v1);
+            if (!BOX64ENV(dynarec_fastnan)) {
+                d0 = fpu_get_scratch(dyn);
+                d1 = fpu_get_scratch(dyn);
+                VFCMPxy(S, d0, q0, v0, cUN);
+            }
+            VFSUBxy(S, v0, q0, v0);
+            if (!BOX64ENV(dynarec_fastnan)) {
+                VFCMPxy(S, d1, v0, v0, cUN);
+                VANDN_Vxy(d0, d0, d1);
+                VLDIxy(d1, (0b010 << 9) | 0b1111111100);
+                VSLLIxy(W, d1, d1, 20); // broadcast 0xFFC00000
+                VBITSEL_Vxy(v0, v0, d1, d0);
+            }
+            break;
         case 0xD0:
             INST_NAME("VADDSUBPS Gx, Vx, Ex");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_avx_f3_0f.c b/src/dynarec/la64/dynarec_la64_avx_f3_0f.c
index 2e51ce90..f2ea3acd 100644
--- a/src/dynarec/la64/dynarec_la64_avx_f3_0f.c
+++ b/src/dynarec/la64/dynarec_la64_avx_f3_0f.c
@@ -104,7 +104,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
             INST_NAME("VADDSS Gx, Vx, Ex");
             nextop = F8;
             GETVYx(v1, 0);
-            GETEYSD(v2, 0, 0);
+            GETEYSS(v2, 0, 0);
             GETGYx_empty(v0);
             d0 = fpu_get_scratch(dyn);
             FADD_S(d0, v1, v2);
@@ -123,7 +123,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
             INST_NAME("VMULSS Gx, Vx, Ex");
             nextop = F8;
             GETVYx(v1, 0);
-            GETEYSD(v2, 0, 0);
+            GETEYSS(v2, 0, 0);
             GETGYx_empty(v0);
             d0 = fpu_get_scratch(dyn);
             FMUL_S(d0, v1, v2);
@@ -142,7 +142,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
             INST_NAME("VSUBSS Gx, Vx, Ex");
             nextop = F8;
             GETVYx(v1, 0);
-            GETEYSD(v2, 0, 0);
+            GETEYSS(v2, 0, 0);
             GETGYx_empty(v0);
             d0 = fpu_get_scratch(dyn);
             FSUB_S(d0, v1, v2);
@@ -161,7 +161,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
             INST_NAME("VDIVSS Gx, Vx, Ex");
             nextop = F8;
             GETVYx(v1, 0);
-            GETEYSD(v2, 0, 0);
+            GETEYSS(v2, 0, 0);
             GETGYx_empty(v0);
             d0 = fpu_get_scratch(dyn);
             FDIV_S(d0, v1, v2);
diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h
index 42899386..57feffcf 100644
--- a/src/dynarec/la64/la64_emitter.h
+++ b/src/dynarec/la64/la64_emitter.h
@@ -2132,6 +2132,14 @@ LSX instruction starts with V, LASX instruction starts with XV.
 #define XVFRSTPI_H(xd, xj, imm5)     EMIT(type_2RI5(0b01110110100110101, imm5, xj, xd))
 #define XVLDI(xd, imm13)             EMIT(type_1RI13(0b01110111111000, imm13, xd))
 #define XVSHUF_B(xd, xj, xk, xa)     EMIT(type_4R(0b000011010110, xa, xk, xj, xd))
+#define XVREPLVE_B(xd, xj, rk)       EMIT(type_3R(0b01110101001000100, rk, xj, xd))
+#define XVREPLVE_H(xd, xj, rk)       EMIT(type_3R(0b01110101001000101, rk, xj, xd))
+#define XVREPLVE_W(xd, xj, rk)       EMIT(type_3R(0b01110101001000110, rk, xj, xd))
+#define XVREPLVE_D(xd, xj, rk)       EMIT(type_3R(0b01110101001000111, rk, xj, xd))
+#define XVREPLGR2VR_B(xd, rj)        EMIT(type_2R(0b0111011010011111000000, rj, xd))
+#define XVREPLGR2VR_H(xd, rj)        EMIT(type_2R(0b0111011010011111000001, rj, xd))
+#define XVREPLGR2VR_W(xd, rj)        EMIT(type_2R(0b0111011010011111000010, rj, xd))
+#define XVREPLGR2VR_D(xd, rj)        EMIT(type_2R(0b0111011010011111000011, rj, xd))
 
 #define XVFMADD_S(xd, xj, xk, xa)  EMIT(type_4R(0b000010100001, xa, xk, xj, xd))
 #define XVFMSUB_S(xd, xj, xk, xa)  EMIT(type_4R(0b000010100101, xa, xk, xj, xd))
@@ -3198,4 +3206,17 @@ LSX instruction starts with V, LASX instruction starts with XV.
         }                               \
     } while (0)
 
+#define VREPLVEIxy(width, vd, vj, imm)         \
+    do {                                       \
+        if (vex.l) {                           \
+            if (imm > 0) {                     \
+                ADDI_D(x5, xZR, imm);          \
+                XVREPLVE_##width(vd, vj, x5);  \
+            } else {                           \
+                XVREPLVE0_##width(vd, vj); \
+            }                                  \
+        } else {                               \
+            VREPLVEI_##width(vd, vj, imm);     \
+        }                                      \
+    } while (0)
 #endif //__ARM64_EMITTER_H__
diff --git a/src/dynarec/la64/la64_printer.c b/src/dynarec/la64/la64_printer.c
index 6e21b93e..39d4c612 100644
--- a/src/dynarec/la64/la64_printer.c
+++ b/src/dynarec/la64/la64_printer.c
@@ -7524,6 +7524,38 @@ const char* la64_print(uint32_t opcode, uintptr_t addr)
         snprintf(buff, sizeof(buff), "%-15s %s, %s, %s, %s", "XVFNMSUB.D", XVt[Rd], XVt[Rj], XVt[Rk], XVt[Ra]);
         return buff;
     }
+    if (isMask(opcode, "01110101001000100kkkkkjjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %s", "XVREPLVE.B", XVt[Rd], XVt[Rj], Xt[Rk]);
+        return buff;
+    }
+    if (isMask(opcode, "01110101001000101kkkkkjjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %s", "XVREPLVE.H", XVt[Rd], XVt[Rj], Xt[Rk]);
+        return buff;
+    }
+    if (isMask(opcode, "01110101001000110kkkkkjjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %s", "XVREPLVE.W", XVt[Rd], XVt[Rj], Xt[Rk]);
+        return buff;
+    }
+    if (isMask(opcode, "01110101001000111kkkkkjjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %s", "XVREPLVE.D", XVt[Rd], XVt[Rj], Xt[Rk]);
+        return buff;
+    }
+    if (isMask(opcode, "0111011010011111000000jjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "XVREPLGR2VR.B", XVt[Rd], Xt[Rj]);
+        return buff;
+    }
+    if (isMask(opcode, "0111011010011111000001jjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "XVREPLGR2VR.H", XVt[Rd], Xt[Rj]);
+        return buff;
+    }
+    if (isMask(opcode, "0111011010011111000010jjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "XVREPLGR2VR.W", XVt[Rd], Xt[Rj]);
+        return buff;
+    }
+    if (isMask(opcode, "0111011010011111000011jjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "XVREPLGR2VR.D", XVt[Rd], Xt[Rj]);
+        return buff;
+    }
     snprintf(buff, sizeof(buff), "%08X ???", __builtin_bswap32(opcode));
     return buff;
 }