about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorphorcys <phorcys@126.com>2025-07-29 15:55:40 +0800
committerGitHub <noreply@github.com>2025-07-29 09:55:40 +0200
commit0734efea394ad1086f33a5e01df6aab900f01b19 (patch)
tree32a2b6aedc4ea1258a43f908bad5739634adce1f
parentf43a221ffca63e96e33301148df240a91df0d3c2 (diff)
downloadbox64-0734efea394ad1086f33a5e01df6aab900f01b19.tar.gz
box64-0734efea394ad1086f33a5e01df6aab900f01b19.zip
[LA64_DYNAREC] Add la64 avx float cmp ops. (#2854)
VCMPPD,VCMPPS,VCMPSD,VCMPSS
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_0f.c24
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_66_0f.c24
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_f2_0f.c44
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_f3_0f.c49
-rw-r--r--src/dynarec/la64/la64_emitter.h8
5 files changed, 132 insertions, 17 deletions
diff --git a/src/dynarec/la64/dynarec_la64_avx_0f.c b/src/dynarec/la64/dynarec_la64_avx_0f.c
index f9211fbe..c0302d45 100644
--- a/src/dynarec/la64/dynarec_la64_avx_0f.c
+++ b/src/dynarec/la64/dynarec_la64_avx_0f.c
@@ -472,6 +472,30 @@ uintptr_t dynarec64_AVX_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, in
                         DEFAULT;
                 }
             break;
+        case 0xC2:
+            INST_NAME("VCMPPS Gx, Vx, Ex, Ib");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 1);
+            u8 = F8;
+            switch (u8 & 0xf) {
+                case 0x00: VFCMPxy(S, v0, v1, v2, cEQ); break;  // Equal, not unordered
+                case 0x01: VFCMPxy(S, v0, v1, v2, cLT); break;  // Less than
+                case 0x02: VFCMPxy(S, v0, v1, v2, cLE); break;  // Less or equal
+                case 0x03: VFCMPxy(S, v0, v1, v2, cUN); break;  // unordered
+                case 0x04: VFCMPxy(S, v0, v1, v2, cUNE); break; // Not Equal (or unordered on ARM, not on X86...)
+                case 0x05: VFCMPxy(S, v0, v2, v1, cULE); break; // Greater or equal or unordered
+                case 0x06: VFCMPxy(S, v0, v2, v1, cULT); break; // Greater or unordered
+                case 0x07: VFCMPxy(S, v0, v1, v2, cOR); break;  // Greater or unordered
+                case 0x08: VFCMPxy(S, v0, v1, v2, cUEQ); break; // Equal, or unordered
+                case 0x09: VFCMPxy(S, v0, v1, v2, cULT); break; // Less than or unordered
+                case 0x0a: VFCMPxy(S, v0, v1, v2, cULE); break; // Less or equal or unordered
+                case 0x0b: XVXOR_V(v0, v0, v0); break;          // false
+                case 0x0c: VFCMPxy(S, v0, v1, v2, cNE); break;  // Not Eual, ordered
+                case 0x0d: VFCMPxy(S, v0, v2, v1, cLE); break;  // Greater or Equal ordered
+                case 0x0e: VFCMPxy(S, v0, v2, v1, cLT); break;  // Greater ordered
+                case 0x0f: VSEQxy(B, v0, v1, v1); break;        // true
+            }
+            break;
         case 0xC6:
             INST_NAME("VSHUFPS Gx, Vx, Ex, Ib");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f.c b/src/dynarec/la64/dynarec_la64_avx_66_0f.c
index 6ae03e3c..977419e1 100644
--- a/src/dynarec/la64/dynarec_la64_avx_66_0f.c
+++ b/src/dynarec/la64/dynarec_la64_avx_66_0f.c
@@ -730,6 +730,30 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 SMWRITE2();
             }
             break;
+        case 0xC2:
+            INST_NAME("VCMPPD Gx, Vx, Ex, Ib");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 1);
+            u8 = F8;
+            switch (u8 & 0xf) {
+                case 0x00: VFCMPxy(D, v0, v1, v2, cEQ); break;  // Equal, not unordered
+                case 0x01: VFCMPxy(D, v0, v1, v2, cLT); break;  // Less than
+                case 0x02: VFCMPxy(D, v0, v1, v2, cLE); break;  // Less or equal
+                case 0x03: VFCMPxy(D, v0, v1, v2, cUN); break;  // unordered
+                case 0x04: VFCMPxy(D, v0, v1, v2, cUNE); break; // Not Equal (or unordered on ARM, not on X86...)
+                case 0x05: VFCMPxy(D, v0, v2, v1, cULE); break; // Greater or equal or unordered
+                case 0x06: VFCMPxy(D, v0, v2, v1, cULT); break; // Greater or unordered
+                case 0x07: VFCMPxy(D, v0, v1, v2, cOR); break;  // Greater or unordered
+                case 0x08: VFCMPxy(D, v0, v1, v2, cUEQ); break; // Equal, or unordered
+                case 0x09: VFCMPxy(D, v0, v1, v2, cULT); break; // Less than or unordered
+                case 0x0a: VFCMPxy(D, v0, v1, v2, cULE); break; // Less or equal or unordered
+                case 0x0b: XVXOR_V(v0, v0, v0); break;          // false
+                case 0x0c: VFCMPxy(D, v0, v1, v2, cNE); break;  // Not Eual, ordered
+                case 0x0d: VFCMPxy(D, v0, v2, v1, cLE); break;  // Greater or Equal ordered
+                case 0x0e: VFCMPxy(D, v0, v2, v1, cLT); break;  // Greater ordered
+                case 0x0f: VSEQxy(B, v0, v1, v1); break;        // true
+            }
+            break;
         case 0xC6:
             INST_NAME("VSHUFPD Gx, Vx, Ex, Ib");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_avx_f2_0f.c b/src/dynarec/la64/dynarec_la64_avx_f2_0f.c
index 84cbe5e3..afe0b086 100644
--- a/src/dynarec/la64/dynarec_la64_avx_f2_0f.c
+++ b/src/dynarec/la64/dynarec_la64_avx_f2_0f.c
@@ -131,7 +131,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 BCEQZ(fcc0, 4 + 4);
                 FNEG_D(d1, d1);
             }
-            if(v0 != v1) VOR_V(v0, v1, v1);
+            if (v0 != v1) VOR_V(v0, v1, v1);
             VEXTRINS_D(v0, d1, 0);
             break;
         case 0x58:
@@ -150,7 +150,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 FNEG_D(d0, d0);
             }
             MARK;
-            if(v0 != v1) VOR_V(v0, v1, v1);
+            if (v0 != v1) VOR_V(v0, v1, v1);
             VEXTRINS_D(v0, d0, 0);
             break;
         case 0x59:
@@ -169,7 +169,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 FNEG_D(d0, d0);
             }
             MARK;
-            if(v0 != v1) VOR_V(v0, v1, v1);
+            if (v0 != v1) VOR_V(v0, v1, v1);
             VEXTRINS_D(v0, d0, 0);
             break;
         case 0x5C:
@@ -188,7 +188,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 FNEG_D(d0, d0);
             }
             MARK;
-            if(v0 != v1) VOR_V(v0, v1, v1);
+            if (v0 != v1) VOR_V(v0, v1, v1);
             VEXTRINS_D(v0, d0, 0);
             break;
         case 0x5D:
@@ -204,7 +204,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 FCMP_D(fcc0, v2, v1, cULE);
                 FSEL(q0, v1, v2, fcc0);
             }
-            if(v0 != v1) VOR_V(v0, v1, v1);
+            if (v0 != v1) VOR_V(v0, v1, v1);
             VEXTRINS_D(v0, q0, 0);
             break;
         case 0x5E:
@@ -223,7 +223,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 FNEG_D(d0, d0);
             }
             MARK;
-            if(v0 != v1) VOR_V(v0, v1, v1);
+            if (v0 != v1) VOR_V(v0, v1, v1);
             VEXTRINS_D(v0, d0, 0);
             break;
         case 0x5F:
@@ -239,7 +239,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 FCMP_D(fcc0, v2, v1, cLT);
                 FSEL(q0, v2, v1, fcc0);
             }
-            if(v0 != v1) VOR_V(v0, v1, v1);
+            if (v0 != v1) VOR_V(v0, v1, v1);
             VEXTRINS_D(v0, q0, 0);
             break;
         case 0x70:
@@ -298,6 +298,36 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 VBITSEL_Vxy(v0, v0, d1, d0);
             }
             break;
+        case 0xC2:
+            INST_NAME("VCMPSD Gx, Vx, Ex, Ib");
+            nextop = F8;
+            GETVYx(v1, 0);
+            GETEYSD(v2, 0, 1);
+            GETGYx_empty(v0);
+            q0 = fpu_get_scratch(dyn);
+            u8 = F8;
+            switch (u8 & 0xf) {
+                case 0x00: VFCMP_D(q0, v1, v2, cEQ); break;  // Equal, not unordered
+                case 0x01: VFCMP_D(q0, v1, v2, cLT); break;  // Less than
+                case 0x02: VFCMP_D(q0, v1, v2, cLE); break;  // Less or equal
+                case 0x03: VFCMP_D(q0, v1, v2, cUN); break;  // unordered
+                case 0x04: VFCMP_D(q0, v1, v2, cUNE); break; // Not Equal (or unordered on ARM, not on X86...)
+                case 0x05: VFCMP_D(q0, v2, v1, cULE); break; // Greater or equal or unordered
+                case 0x06: VFCMP_D(q0, v2, v1, cULT); break; // Greater or unordered
+                case 0x07: VFCMP_D(q0, v1, v2, cOR); break;  // Greater or unordered
+                case 0x08: VFCMP_D(q0, v1, v2, cUEQ); break; // Equal, or unordered
+                case 0x09: VFCMP_D(q0, v1, v2, cULT); break; // Less than or unordered
+                case 0x0a: VFCMP_D(q0, v1, v2, cULE); break; // Less or equal or unordered
+                case 0x0b: VXOR_V(q0, q0, q0); break;        // false
+                case 0x0c: VFCMP_D(q0, v1, v2, cNE); break;  // Not Eual, ordered
+                case 0x0d: VFCMP_D(q0, v2, v1, cLE); break;  // Greater or Equal ordered
+                case 0x0e: VFCMP_D(q0, v2, v1, cLT); break;  // Greater ordered
+                case 0x0f: VSEQ_B(q0, v1, v1); break;        // true
+            }
+            XVXOR_V(v0, v0, v0);
+            XVINSVE0_D(v0, q0, 0);
+            YMM_UNMARK_UPPER_ZERO(v0);
+            break;
         case 0xD0:
             INST_NAME("VADDSUBPS Gx, Vx, Ex");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_avx_f3_0f.c b/src/dynarec/la64/dynarec_la64_avx_f3_0f.c
index a61dcbb7..ebe56672 100644
--- a/src/dynarec/la64/dynarec_la64_avx_f3_0f.c
+++ b/src/dynarec/la64/dynarec_la64_avx_f3_0f.c
@@ -115,7 +115,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 BCEQZ(fcc0, 4 + 4);
                 FNEG_S(d1, d1);
             }
-            if(v0 != v1) VOR_V(v0, v1, v1);
+            if (v0 != v1) VOR_V(v0, v1, v1);
             VEXTRINS_W(v0, d1, 0);
             break;
         case 0x52:
@@ -130,7 +130,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
             } else {
                 FRSQRT_S(d0, v1);
             }
-            if(v0 != v1) VOR_V(v0, v1, v1);
+            if (v0 != v1) VOR_V(v0, v1, v1);
             VEXTRINS_W(v0, d0, 0);
             break;
         case 0x53:
@@ -145,7 +145,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
             } else {
                 FRECIP_S(d0, v1);
             }
-            if(v0 != v1) VOR_V(v0, v1, v1);
+            if (v0 != v1) VOR_V(v0, v1, v1);
             VEXTRINS_W(v0, d0, 0);
             break;
         case 0x58:
@@ -164,7 +164,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 FNEG_S(d0, d0);
             }
             MARK;
-            if(v0 != v1) VOR_V(v0, v1, v1);
+            if (v0 != v1) VOR_V(v0, v1, v1);
             VEXTRINS_W(v0, d0, 0);
             break;
         case 0x59:
@@ -183,7 +183,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 FNEG_S(d0, d0);
             }
             MARK;
-            if(v0 != v1) VOR_V(v0, v1, v1);
+            if (v0 != v1) VOR_V(v0, v1, v1);
             VEXTRINS_W(v0, d0, 0);
             break;
         case 0x5C:
@@ -202,7 +202,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 FNEG_S(d0, d0);
             }
             MARK;
-            if(v0 != v1) VOR_V(v0, v1, v1);
+            if (v0 != v1) VOR_V(v0, v1, v1);
             VEXTRINS_W(v0, d0, 0);
             break;
         case 0x5D:
@@ -218,7 +218,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 FCMP_S(fcc0, v2, v1, cULE);
                 FSEL(q0, v1, v2, fcc0);
             }
-            if(v0 != v1) VOR_V(v0, v1, v1);
+            if (v0 != v1) VOR_V(v0, v1, v1);
             VEXTRINS_W(v0, q0, 0);
             break;
         case 0x5E:
@@ -237,7 +237,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 FNEG_S(d0, d0);
             }
             MARK;
-            if(v0 != v1) VOR_V(v0, v1, v1);
+            if (v0 != v1) VOR_V(v0, v1, v1);
             VEXTRINS_W(v0, d0, 0);
             break;
         case 0x5F:
@@ -253,7 +253,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 FCMP_S(fcc0, v2, v1, cLT);
                 FSEL(q0, v2, v1, fcc0);
             }
-            if(v0 != v1) VOR_V(v0, v1, v1);
+            if (v0 != v1) VOR_V(v0, v1, v1);
             VEXTRINS_W(v0, q0, 0);
             break;
         case 0x6F:
@@ -320,7 +320,36 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 SMWRITE2();
             }
             break;
-
+        case 0xC2:
+            INST_NAME("VCMPSS Gx, Vx, Ex, Ib");
+            nextop = F8;
+            GETVYx(v1, 0);
+            GETEYSS(v2, 0, 1);
+            GETGYx_empty(v0);
+            q0 = fpu_get_scratch(dyn);
+            u8 = F8;
+            switch (u8 & 0xf) {
+                case 0x00: VFCMP_S(q0, v1, v2, cEQ); break;  // Equal, not unordered
+                case 0x01: VFCMP_S(q0, v1, v2, cLT); break;  // Less than
+                case 0x02: VFCMP_S(q0, v1, v2, cLE); break;  // Less or equal
+                case 0x03: VFCMP_S(q0, v1, v2, cUN); break;  // unordered
+                case 0x04: VFCMP_S(q0, v1, v2, cUNE); break; // Not Equal (or unordered on ARM, not on X86...)
+                case 0x05: VFCMP_S(q0, v2, v1, cULE); break; // Greater or equal or unordered
+                case 0x06: VFCMP_S(q0, v2, v1, cULT); break; // Greater or unordered
+                case 0x07: VFCMP_S(q0, v1, v2, cOR); break;  // Greater or unordered
+                case 0x08: VFCMP_S(q0, v1, v2, cUEQ); break; // Equal, or unordered
+                case 0x09: VFCMP_S(q0, v1, v2, cULT); break; // Less than or unordered
+                case 0x0a: VFCMP_S(q0, v1, v2, cULE); break; // Less or equal or unordered
+                case 0x0b: VXOR_V(q0, q0, q0); break;        // false
+                case 0x0c: VFCMP_S(q0, v1, v2, cNE); break;  // Not Eual, ordered
+                case 0x0d: VFCMP_S(q0, v2, v1, cLE); break;  // Greater or Equal ordered
+                case 0x0e: VFCMP_S(q0, v2, v1, cLT); break;  // Greater ordered
+                case 0x0f: VSEQ_B(q0, v1, v1); break;        // true
+            }
+            XVXOR_V(v0, v0, v0);
+            XVINSVE0_W(v0, q0, 0);
+            YMM_UNMARK_UPPER_ZERO(v0);
+            break;
         default:
             DEFAULT;
     }
diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h
index b6fe7b65..0c49824b 100644
--- a/src/dynarec/la64/la64_emitter.h
+++ b/src/dynarec/la64/la64_emitter.h
@@ -3287,4 +3287,12 @@ LSX instruction starts with V, LASX instruction starts with XV.
         }                                     \
     } while (0)
 
+#define VSEQxy(width, vd, vj, vk)      \
+    do {                               \
+        if (vex.l) {                   \
+            XVSEQ_##width(vd, vj, vk); \
+        } else {                       \
+            VSEQ_##width(vd, vj, vk);  \
+        }                              \
+    } while (0)
 #endif //__ARM64_EMITTER_H__