about summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--src/dynarec/rv64/dynarec_rv64_avx_0f.c85
-rw-r--r--src/dynarec/rv64/dynarec_rv64_avx_f2_0f.c75
2 files changed, 160 insertions, 0 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_avx_0f.c b/src/dynarec/rv64/dynarec_rv64_avx_0f.c
index ec68be6e..e1cdf180 100644
--- a/src/dynarec/rv64/dynarec_rv64_avx_0f.c
+++ b/src/dynarec/rv64/dynarec_rv64_avx_0f.c
@@ -559,6 +559,91 @@ uintptr_t dynarec64_AVX_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, in
             } else
                 YMM0(gd);
             break;
+        case 0xC2:
+            INST_NAME("VCMPPS Gx, Vx, Ex, Ib");
+            nextop = F8;
+            GETEX(x2, 1, vex.l ? 28 : 12);
+            GETGX();
+            GETVX();
+            GETGY();
+            GETVY();
+            u8 = F8;
+            d0 = fpu_get_scratch(dyn);
+            d1 = fpu_get_scratch(dyn);
+            for (int i = 0; i < 4 + (vex.l ? 4 : 0); ++i) {
+                if (i == 4) { GETEY(); }
+                if (i < 4) {
+                    FLW(d0, vback, vxoffset + 4 * i);
+                    FLW(d1, wback, fixedaddress + 4 * i);
+                } else {
+                    FLW(d0, vback, vyoffset + 4 * (i - 4));
+                    FLW(d1, wback, fixedaddress + 4 * (i - 4));
+                }
+
+                if ((u8 & 0xf) != 0x0b && (u8 & 0xf) != 0xf) {
+                    // x6 = !(isnan(d0) || isnan(d1))
+                    FEQS(x4, d0, d0);
+                    FEQS(x3, d1, d1);
+                    AND(x6, x3, x4);
+                }
+
+                switch (u8 & 0x7) {
+                    case 0:
+                        FEQS(x3, d0, d1);
+                        break; // Equal
+                    case 1:
+                        BEQ(x6, xZR, 8);
+                        FLTS(x3, d0, d1);
+                        break; // Less than
+                    case 2:
+                        BEQ(x6, xZR, 8);
+                        FLES(x3, d0, d1);
+                        break; // Less or equal
+                    case 3:
+                        if (u8 & 0x8)
+                            ADDI(x3, xZR, 0);
+                        else
+                            XORI(x3, x6, 1);
+                        break;
+                    case 4:
+                        FEQS(x3, d0, d1);
+                        XORI(x3, x3, 1);
+                        break; // Not Equal or unordered
+                    case 5:
+                        BEQ(x6, xZR, 12);
+                        FLES(x3, d1, d0);
+                        J(8);
+                        ADDI(x3, xZR, 1);
+                        break; // Greater or equal or unordered
+                    case 6:
+                        BEQ(x6, xZR, 12);
+                        FLTS(x3, d1, d0);
+                        J(8);
+                        ADDI(x3, xZR, 1);
+                        break; // Greater or unordered
+                    case 7:
+                        if (u8 & 0x8)
+                            ADDI(x3, xZR, 1);
+                        else
+                            MV(x3, x6);
+                        break; // Not NaN
+                }
+                if ((u8 & 0x3) != 0x3) {
+                    if ((u8 & 0xC) == 0x8 || (u8 & 0xC) == 0x4) {
+                        XORI(x7, x6, 1);
+                        OR(x3, x3, x7);
+                    } else
+                        AND(x3, x3, x6);
+                }
+                NEG(x3, x3);
+                if (i < 4) {
+                    SW(x3, gback, gdoffset + 4 * i);
+                } else {
+                    SW(x3, gback, gyoffset + 4 * (i - 4));
+                }
+            }
+            if (!vex.l) YMM0(gd);
+            break;
         default:
             DEFAULT;
     }
diff --git a/src/dynarec/rv64/dynarec_rv64_avx_f2_0f.c b/src/dynarec/rv64/dynarec_rv64_avx_f2_0f.c
index 70ace058..9315bbb8 100644
--- a/src/dynarec/rv64/dynarec_rv64_avx_f2_0f.c
+++ b/src/dynarec/rv64/dynarec_rv64_avx_f2_0f.c
@@ -266,6 +266,81 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
             }
             YMM0(gd);
             break;
+        case 0xC2:
+            INST_NAME("VCMPSD Gx, Vx, Ex, Ib");
+            nextop = F8;
+            GETEX(x2, 1, 1);
+            GETGX();
+            GETVX();
+            u8 = F8;
+            d0 = fpu_get_scratch(dyn);
+            d1 = fpu_get_scratch(dyn);
+            FLD(d0, vback, vxoffset);
+            FLD(d1, wback, fixedaddress);
+
+            if ((u8 & 0xf) != 0x0b && (u8 & 0xf) != 0xf) {
+                // x6 = !(isnan(d0) || isnan(d1))
+                FEQD(x4, d0, d0);
+                FEQD(x3, d1, d1);
+                AND(x6, x3, x4);
+            }
+
+            switch (u8 & 0x7) {
+                case 0:
+                    FEQD(x3, d0, d1);
+                    break; // Equal
+                case 1:
+                    BEQ(x6, xZR, 8);
+                    FLTD(x3, d0, d1);
+                    break; // Less than
+                case 2:
+                    BEQ(x6, xZR, 8);
+                    FLED(x3, d0, d1);
+                    break; // Less or equal
+                case 3:
+                    if (u8 & 0x8)
+                        ADDI(x3, xZR, 0);
+                    else
+                        XORI(x3, x6, 1);
+                    break;
+                case 4:
+                    FEQD(x3, d0, d1);
+                    XORI(x3, x3, 1);
+                    break; // Not Equal or unordered
+                case 5:
+                    BEQ(x6, xZR, 12);
+                    FLED(x3, d1, d0);
+                    J(8);
+                    ADDI(x3, xZR, 1);
+                    break; // Greater or equal or unordered
+                case 6:
+                    BEQ(x6, xZR, 12);
+                    FLTD(x3, d1, d0);
+                    J(8);
+                    ADDI(x3, xZR, 1);
+                    break; // Greater or unordered
+                case 7:
+                    if (u8 & 0x8)
+                        ADDI(x3, xZR, 1);
+                    else
+                        MV(x3, x6);
+                    break; // Not NaN
+            }
+            if ((u8 & 0x3) != 0x3) {
+                if ((u8 & 0xC) == 0x8 || (u8 & 0xC) == 0x4) {
+                    XORI(x7, x6, 1);
+                    OR(x3, x3, x7);
+                } else
+                    AND(x3, x3, x6);
+            }
+            NEG(x3, x3);
+            SD(x3, gback, gdoffset);
+            if (gd != vex.v) {
+                LD(x3, vback, vxoffset + 8);
+                SD(x3, gback, gdoffset + 8);
+            }
+            YMM0(gd);
+            break;
         default:
             DEFAULT;
     }