about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2025-08-22 14:45:14 +0800
committerGitHub <noreply@github.com>2025-08-22 08:45:14 +0200
commit7c32cb24a05bff89b4b9ed4461ecb9e34fdf3d1d (patch)
treed23584c4a867910eafc1a59820554a1a3e93d4b9
parentcea8f9435823d86ca3a2c2566ad52b2c71e2e69b (diff)
downloadbox64-7c32cb24a05bff89b4b9ed4461ecb9e34fdf3d1d.tar.gz
box64-7c32cb24a05bff89b4b9ed4461ecb9e34fdf3d1d.zip
[RV64_DYNAREC] Added more scalar avx 66 0F opcodes (#2960)
-rw-r--r--src/dynarec/rv64/dynarec_rv64_660f.c2
-rw-r--r--src/dynarec/rv64/dynarec_rv64_avx_66_0f.c225
2 files changed, 226 insertions, 1 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c
index a4c929dd..0a37495c 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f.c
@@ -305,7 +305,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             nextop = F8;
             GETGD;
             GETEX(x1, 0, 8);
-            MV(gd, xZR);
+            XOR(gd, gd, gd);
             for (int i = 0; i < 2; ++i) {
                 // GD->dword[0] |= ((EX->q[i]>>63)&1)<<i;
                 LD(x2, wback, fixedaddress + 8 * i);
diff --git a/src/dynarec/rv64/dynarec_rv64_avx_66_0f.c b/src/dynarec/rv64/dynarec_rv64_avx_66_0f.c
index e16e54b0..4acef003 100644
--- a/src/dynarec/rv64/dynarec_rv64_avx_66_0f.c
+++ b/src/dynarec/rv64/dynarec_rv64_avx_66_0f.c
@@ -67,6 +67,231 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
                 SD(xZR, gback, gyoffset + 8);
             }
             break;
+        case 0x29:
+            INST_NAME("VMOVAPD Ex, Gx");
+            nextop = F8;
+            GETEX(x2, 0, vex.l ? 24 : 8);
+            GETGX();
+            GETGY();
+            LD(x3, gback, gdoffset + 0);
+            LD(x4, gback, gdoffset + 8);
+            SD(x3, wback, fixedaddress + 0);
+            SD(x4, wback, fixedaddress + 8);
+            if (vex.l) {
+                GETEY();
+                LD(x3, gback, gyoffset + 0);
+                LD(x4, gback, gyoffset + 8);
+                SD(x3, wback, fixedaddress + 0);
+                SD(x4, wback, fixedaddress + 8);
+            } else if (MODREG) {
+                GETEY();
+                SD(xZR, wback, fixedaddress + 0);
+                SD(xZR, wback, fixedaddress + 8);
+            }
+            if (!MODREG) SMWRITE2();
+            break;
+        case 0x50:
+            INST_NAME("VMOVMSKPD Gd, Ex");
+            nextop = F8;
+            GETGD;
+            GETEX(x1, 0, vex.l ? 24 : 8);
+            XOR(gd, gd, gd);
+            for (int i = 0; i < 2; ++i) {
+                // GD->dword[0] |= ((EX->q[i]>>63)&1)<<i;
+                LD(x2, wback, fixedaddress + 8 * i);
+                SRLI(x2, x2, 63);
+                if (i) SLLI(x2, x2, 1);
+                OR(gd, gd, x2);
+            }
+            if (vex.l) {
+                GETEY();
+                for (int i = 0; i < 2; ++i) {
+                    LD(x2, wback, fixedaddress + 8 * i);
+                    SRLI(x2, x2, 63);
+                    SLLI(x2, x2, i + 2);
+                    OR(gd, gd, x2);
+                }
+            }
+            break;
+        case 0x51:
+            INST_NAME("VSQRTPD Gx, Ex");
+            nextop = F8;
+            GETGX();
+            GETEX(x2, 0, 8);
+            d0 = fpu_get_scratch(dyn);
+            if (!BOX64ENV(dynarec_fastnan)) {
+                d1 = fpu_get_scratch(dyn);
+                FMVDX(d1, xZR);
+            }
+            for (int i = 0; i < 2; ++i) {
+                FLD(d0, wback, fixedaddress + i * 8);
+                if (!BOX64ENV(dynarec_fastnan)) {
+                    FLTD(x3, d0, d1);
+                }
+                FSQRTD(d0, d0);
+                if (!BOX64ENV(dynarec_fastnan)) {
+                    BEQ(x3, xZR, 8);
+                    FNEGD(d0, d0);
+                }
+                FSD(d0, gback, gdoffset + i * 8);
+            }
+            if (vex.l) {
+                GETEY();
+                for (int i = 0; i < 2; ++i) {
+                    FLD(d0, wback, fixedaddress + i * 8);
+                    if (!BOX64ENV(dynarec_fastnan)) {
+                        FLTD(x3, d0, d1);
+                    }
+                    FSQRTD(d0, d0);
+                    if (!BOX64ENV(dynarec_fastnan)) {
+                        BEQ(x3, xZR, 8);
+                        FNEGD(d0, d0);
+                    }
+                    FSD(d0, gback, gyoffset + i * 8);
+                }
+            } else {
+                SD(xZR, gback, gyoffset + 0);
+                SD(xZR, gback, gyoffset + 8);
+            }
+            break;
+        case 0x54:
+            INST_NAME("VANDPD Gx, Vx, Ex");
+            nextop = F8;
+            GETEX(x1, 0, vex.l ? 24 : 8);
+            GETGX();
+            GETGY();
+            GETVX();
+            GETVY();
+            LD(x3, wback, fixedaddress + 0);
+            LD(x4, wback, fixedaddress + 8);
+            LD(x5, vback, vxoffset + 0);
+            LD(x6, vback, vxoffset + 8);
+            AND(x5, x5, x3);
+            AND(x6, x6, x4);
+            SD(x5, gback, gdoffset + 0);
+            SD(x6, gback, gdoffset + 8);
+            if (vex.l) {
+                GETEY();
+                LD(x3, wback, fixedaddress + 0);
+                LD(x4, wback, fixedaddress + 8);
+                LD(x5, vback, vyoffset + 0);
+                LD(x6, vback, vyoffset + 8);
+                AND(x5, x5, x3);
+                AND(x6, x6, x4);
+                SD(x5, gback, gyoffset + 0);
+                SD(x6, gback, gyoffset + 8);
+            } else {
+                SD(xZR, gback, gyoffset);
+                SD(xZR, gback, gyoffset + 8);
+            }
+            break;
+        case 0x55:
+            INST_NAME("VANDNPD Gx, Vx, Ex");
+            nextop = F8;
+            GETEX(x1, 0, vex.l ? 24 : 8);
+            GETGX();
+            GETGY();
+            GETVX();
+            GETVY();
+            LD(x3, wback, fixedaddress + 0);
+            LD(x4, wback, fixedaddress + 8);
+            LD(x5, vback, vxoffset + 0);
+            LD(x6, vback, vxoffset + 8);
+            if (cpuext.zbb) {
+                ANDN(x5, x3, x5);
+                ANDN(x6, x4, x6);
+            } else {
+                NOT(x5, x5);
+                NOT(x6, x6);
+                AND(x5, x5, x3);
+                AND(x6, x6, x4);
+            }
+            SD(x5, gback, gdoffset + 0);
+            SD(x6, gback, gdoffset + 8);
+            if (vex.l) {
+                GETEY();
+                LD(x3, wback, fixedaddress + 0);
+                LD(x4, wback, fixedaddress + 8);
+                LD(x5, vback, vyoffset + 0);
+                LD(x6, vback, vyoffset + 8);
+                if (cpuext.zbb) {
+                    ANDN(x5, x3, x5);
+                    ANDN(x6, x4, x6);
+                } else {
+                    NOT(x5, x5);
+                    NOT(x6, x6);
+                    AND(x5, x5, x3);
+                    AND(x6, x6, x4);
+                }
+                SD(x5, gback, gyoffset + 0);
+                SD(x6, gback, gyoffset + 8);
+            } else {
+                SD(xZR, gback, gyoffset);
+                SD(xZR, gback, gyoffset + 8);
+            }
+            break;
+        case 0x56:
+            INST_NAME("VORPD Gx, Vx, Ex");
+            nextop = F8;
+            GETEX(x1, 0, vex.l ? 24 : 8);
+            GETGX();
+            GETGY();
+            GETVX();
+            GETVY();
+            LD(x3, wback, fixedaddress + 0);
+            LD(x4, wback, fixedaddress + 8);
+            LD(x5, vback, vxoffset + 0);
+            LD(x6, vback, vxoffset + 8);
+            OR(x5, x5, x3);
+            OR(x6, x6, x4);
+            SD(x5, gback, gdoffset + 0);
+            SD(x6, gback, gdoffset + 8);
+            if (vex.l) {
+                GETEY();
+                LD(x3, wback, fixedaddress + 0);
+                LD(x4, wback, fixedaddress + 8);
+                LD(x5, vback, vyoffset + 0);
+                LD(x6, vback, vyoffset + 8);
+                OR(x5, x5, x3);
+                OR(x6, x6, x4);
+                SD(x5, gback, gyoffset + 0);
+                SD(x6, gback, gyoffset + 8);
+            } else {
+                SD(xZR, gback, gyoffset);
+                SD(xZR, gback, gyoffset + 8);
+            }
+            break;
+        case 0x57:
+            INST_NAME("VXORPD Gx, Vx, Ex");
+            nextop = F8;
+            GETEX(x1, 0, vex.l ? 24 : 8);
+            GETGX();
+            GETGY();
+            GETVX();
+            GETVY();
+            LD(x3, wback, fixedaddress + 0);
+            LD(x4, wback, fixedaddress + 8);
+            LD(x5, vback, vxoffset + 0);
+            LD(x6, vback, vxoffset + 8);
+            XOR(x5, x5, x3);
+            XOR(x6, x6, x4);
+            SD(x5, gback, gdoffset + 0);
+            SD(x6, gback, gdoffset + 8);
+            if (vex.l) {
+                GETEY();
+                LD(x3, wback, fixedaddress + 0);
+                LD(x4, wback, fixedaddress + 8);
+                LD(x5, vback, vyoffset + 0);
+                LD(x6, vback, vyoffset + 8);
+                XOR(x5, x5, x3);
+                XOR(x6, x6, x4);
+                SD(x5, gback, gyoffset + 0);
+                SD(x6, gback, gyoffset + 8);
+            } else {
+                SD(xZR, gback, gyoffset);
+                SD(xZR, gback, gyoffset + 8);
+            }
+            break;
         case 0x66:
             INST_NAME("VPCMPGTD Gx, Vx, Ex");
             nextop = F8;