about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorphorcys <phorcys@126.com>2025-07-29 23:29:59 +0800
committerGitHub <noreply@github.com>2025-07-29 17:29:59 +0200
commit5fd843750fe1af2b8775f0e491cd07d1cf06fe77 (patch)
tree2f6e4d62d3fd5d936de7f44e433bc842d98d1307
parent8180cb321818abcec2ffe41d908c3095380e79b1 (diff)
downloadbox64-5fd843750fe1af2b8775f0e491cd07d1cf06fe77.tar.gz
box64-5fd843750fe1af2b8775f0e491cd07d1cf06fe77.zip
[LA64_DYNAREC] Add la64 avx int cmp ops. (#2856)
VCMP{EQ,GT}{B,W,D,Q}
VCOMISS, VUCOMISS, VCOMISD, VUCOMISD
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_0f.c41
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_66_0f.c76
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_66_0f38.c24
-rw-r--r--src/dynarec/la64/la64_emitter.h12
4 files changed, 146 insertions, 7 deletions
diff --git a/src/dynarec/la64/dynarec_la64_avx_0f.c b/src/dynarec/la64/dynarec_la64_avx_0f.c
index cb1921a5..8520045d 100644
--- a/src/dynarec/la64/dynarec_la64_avx_0f.c
+++ b/src/dynarec/la64/dynarec_la64_avx_0f.c
@@ -229,6 +229,47 @@ uintptr_t dynarec64_AVX_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, in
                 SMWRITE2();
             }
             break;
+        case 0x2E:
+            // no special check...
+        case 0x2F:
+            if (opcode == 0x2F) {
+                INST_NAME("VCOMISS Gx, Ex");
+            } else {
+                INST_NAME("VUCOMISS Gx, Ex");
+            }
+            SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION);
+            SET_DFNONE();
+            nextop = F8;
+            GETGYx(d0, 0);
+            GETEYSS(v0, 0, 0);
+            CLEAR_FLAGS(x2);
+            // if isnan(d0) || isnan(v0)
+            IFX (X_ZF | X_PF | X_CF) {
+                FCMP_S(fcc0, d0, v0, cUN);
+                BCEQZ_MARK(fcc0);
+                ORI(xFlags, xFlags, (1 << F_ZF) | (1 << F_PF) | (1 << F_CF));
+                B_MARK3_nocond;
+            }
+            MARK;
+            // else if isless(d0, v0)
+            IFX (X_CF) {
+                FCMP_S(fcc1, d0, v0, cLT);
+                BCEQZ_MARK2(fcc1);
+                ORI(xFlags, xFlags, 1 << F_CF);
+                B_MARK3_nocond;
+            }
+            MARK2;
+            // else if d0 == v0
+            IFX (X_ZF) {
+                FCMP_S(fcc2, d0, v0, cEQ);
+                BCEQZ_MARK3(fcc2);
+                ORI(xFlags, xFlags, 1 << F_ZF);
+            }
+            MARK3;
+            IFX (X_ALL) {
+                SPILL_EFLAGS();
+            }
+            break;
         case 0x50:
             nextop = F8;
             INST_NAME("VMOVMSKPS Gd, Ex");
diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f.c b/src/dynarec/la64/dynarec_la64_avx_66_0f.c
index 977419e1..63b2469e 100644
--- a/src/dynarec/la64/dynarec_la64_avx_66_0f.c
+++ b/src/dynarec/la64/dynarec_la64_avx_66_0f.c
@@ -223,6 +223,46 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 SMWRITE2();
             }
             break;
+        case 0x2E:
+            // no special check...
+        case 0x2F:
+            if (opcode == 0x2F) {
+                INST_NAME("VCOMISD Gx, Ex");
+            } else {
+                INST_NAME("VUCOMISD Gx, Ex");
+            }
+            SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION);
+            SET_DFNONE();
+            nextop = F8;
+            GETGYx(d0, 0);
+            GETEYSD(v0, 0, 0);
+
+            CLEAR_FLAGS(x3);
+            // if isnan(d0) || isnan(v0)
+            IFX (X_ZF | X_PF | X_CF) {
+                FCMP_D(fcc0, d0, v0, cUN);
+                BCEQZ_MARK(fcc0);
+                ORI(xFlags, xFlags, (1 << F_ZF) | (1 << F_PF) | (1 << F_CF));
+                B_MARK3_nocond;
+            }
+            MARK;
+            // else if isless(d0, v0)
+            IFX (X_CF) {
+                FCMP_D(fcc1, d0, v0, cLT);
+                BCEQZ_MARK2(fcc1);
+                ORI(xFlags, xFlags, 1 << F_CF);
+                B_MARK3_nocond;
+            }
+            MARK2;
+            // else if d0 == v0
+            IFX (X_ZF) {
+                FCMP_D(fcc2, d0, v0, cEQ);
+                BCEQZ_MARK3(fcc2);
+                ORI(xFlags, xFlags, 1 << F_ZF);
+            }
+            MARK3;
+            SPILL_EFLAGS();
+            break;
         case 0x50:
             nextop = F8;
             INST_NAME("VMOVMSKPD Gd, Ex");
@@ -410,6 +450,24 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 VPICKEVxy(B, v0, q0, v0);
             }
             break;
+        case 0x64:
+            INST_NAME("VPCMPGTB Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VSLTxy(B, v0, v2, v1);
+            break;
+        case 0x65:
+            INST_NAME("VPCMPGTW Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VSLTxy(H, v0, v2, v1);
+            break;
+        case 0x66:
+            INST_NAME("VPCMPGTD Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VSLTxy(W, v0, v2, v1);
+            break;
         case 0x67:
             INST_NAME("VPACKUSWB Gx, Vx, Ex");
             nextop = F8;
@@ -646,6 +704,24 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                     DEFAULT;
             }
             break;
+        case 0x74:
+            INST_NAME("VPCMPEQB Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VSEQxy(B, v0, v1, v2);
+            break;
+        case 0x75:
+            INST_NAME("VPCMPEQW Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VSEQxy(H, v0, v1, v2);
+            break;
+        case 0x76:
+            INST_NAME("VPCMPEQD Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VSEQxy(W, v0, v1, v2);
+            break;
         case 0x7C:
             INST_NAME("VHADDPD Gx, Vx, Ex");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f38.c b/src/dynarec/la64/dynarec_la64_avx_66_0f38.c
index 26808486..aa832385 100644
--- a/src/dynarec/la64/dynarec_la64_avx_66_0f38.c
+++ b/src/dynarec/la64/dynarec_la64_avx_66_0f38.c
@@ -339,6 +339,12 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i
             GETGY_empty_VYEY_xy(v0, v1, v2, 0);
             VMULWEVxy(D_W, v0, v1, v2);
             break;
+        case 0x29:
+            INST_NAME("VPCMPEQQ Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VSEQxy(D, v0, v2, v1);
+            break;
         case 0x2B:
             INST_NAME("VPACKUSDW Gx, Vx, Ex");
             nextop = F8;
@@ -507,6 +513,12 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i
             GETGY_empty_VYEY_xy(v0, v1, v2, 0);
             XVPERM_W(v0, v2, v1);
             break;
+        case 0x37:
+            INST_NAME("VPCMPGTQ Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VSLTxy(D, v0, v2, v1);
+            break;
         case 0x38:
             INST_NAME("VPMINSB Gx, Vx, Ex");
             nextop = F8;
@@ -872,7 +884,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i
             q0 = fpu_get_scratch(dyn);
             VFMADDxyxw(q0, v0, v2, v1);
             VFMSUBxyxw(v0, v0, v2, v1);
-            if(rex.w){
+            if (rex.w) {
                 VEXTRINSxy(D, v0, q0, VEXTRINS_IMM_4_0(1, 1));
             } else {
                 VEXTRINSxy(W, v0, q0, VEXTRINS_IMM_4_0(1, 1));
@@ -886,7 +898,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i
             q0 = fpu_get_scratch(dyn);
             VFMSUBxyxw(q0, v0, v2, v1);
             VFMADDxyxw(v0, v0, v2, v1);
-            if(rex.w){
+            if (rex.w) {
                 VEXTRINSxy(D, v0, q0, VEXTRINS_IMM_4_0(1, 1));
             } else {
                 VEXTRINSxy(W, v0, q0, VEXTRINS_IMM_4_0(1, 1));
@@ -956,7 +968,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i
             q0 = fpu_get_scratch(dyn);
             VFMADDxyxw(q0, v1, v0, v2);
             VFMSUBxyxw(v0, v1, v0, v2);
-            if(rex.w){
+            if (rex.w) {
                 VEXTRINSxy(D, v0, q0, VEXTRINS_IMM_4_0(1, 1));
             } else {
                 VEXTRINSxy(W, v0, q0, VEXTRINS_IMM_4_0(1, 1));
@@ -970,7 +982,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i
             q0 = fpu_get_scratch(dyn);
             VFMSUBxyxw(q0, v1, v0, v2);
             VFMADDxyxw(v0, v1, v0, v2);
-            if(rex.w){
+            if (rex.w) {
                 VEXTRINSxy(D, v0, q0, VEXTRINS_IMM_4_0(1, 1));
             } else {
                 VEXTRINSxy(W, v0, q0, VEXTRINS_IMM_4_0(1, 1));
@@ -1040,7 +1052,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i
             q0 = fpu_get_scratch(dyn);
             VFMADDxyxw(q0, v1, v2, v0);
             VFMSUBxyxw(v0, v1, v2, v0);
-            if(rex.w){
+            if (rex.w) {
                 VEXTRINSxy(D, v0, q0, VEXTRINS_IMM_4_0(1, 1));
             } else {
                 VEXTRINSxy(W, v0, q0, VEXTRINS_IMM_4_0(1, 1));
@@ -1054,7 +1066,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i
             q0 = fpu_get_scratch(dyn);
             VFMSUBxyxw(q0, v1, v2, v0);
             VFMADDxyxw(v0, v1, v2, v0);
-            if(rex.w){
+            if (rex.w) {
                 VEXTRINSxy(D, v0, q0, VEXTRINS_IMM_4_0(1, 1));
             } else {
                 VEXTRINSxy(W, v0, q0, VEXTRINS_IMM_4_0(1, 1));
diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h
index 0c49824b..aa5399b9 100644
--- a/src/dynarec/la64/la64_emitter.h
+++ b/src/dynarec/la64/la64_emitter.h
@@ -3295,4 +3295,14 @@ LSX instruction starts with V, LASX instruction starts with XV.
             VSEQ_##width(vd, vj, vk);  \
         }                              \
     } while (0)
-#endif //__ARM64_EMITTER_H__
+
+#define VSLTxy(width, vd, vj, vk)      \
+    do {                               \
+        if (vex.l) {                   \
+            XVSLT_##width(vd, vj, vk); \
+        } else {                       \
+            VSLT_##width(vd, vj, vk);  \
+        }                              \
+    } while (0)
+
+#endif //__LA64_EMITTER_H__