about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2025-08-26 20:25:44 +0800
committerGitHub <noreply@github.com>2025-08-26 14:25:44 +0200
commit485447850c24abc14e011333952ef9b46a8acaef (patch)
treea77a19dced22c5da8c793a03be556553d862531b /src
parent709a526217e6e495f8a5b8bb8566036f4f7ddf41 (diff)
downloadbox64-485447850c24abc14e011333952ef9b46a8acaef.tar.gz
box64-485447850c24abc14e011333952ef9b46a8acaef.zip
[RV64_DYNAREC] Added more scalar avx opcodes (#2974)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_avx_66_0f.c601
1 files changed, 601 insertions, 0 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_avx_66_0f.c b/src/dynarec/rv64/dynarec_rv64_avx_66_0f.c
index bea96f73..372e0faa 100644
--- a/src/dynarec/rv64/dynarec_rv64_avx_66_0f.c
+++ b/src/dynarec/rv64/dynarec_rv64_avx_66_0f.c
@@ -1202,6 +1202,198 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
             } else
                 YMM0(gd);
             break;
+        case 0x7C:
+            INST_NAME("VHADDPD Gx, Vx, Ex");
+            nextop = F8;
+            GETEX(x2, 0, vex.l ? 24 : 8);
+            GETGX();
+            GETGY();
+            GETVX();
+            GETVY();
+            v0 = fpu_get_scratch(dyn);
+            if (gd == ed) {
+                FLD(v0, gback, gdoffset + 0);
+            }
+            d0 = fpu_get_scratch(dyn);
+            d1 = fpu_get_scratch(dyn);
+            FLD(d0, gback, vxoffset + 0);
+            FLD(d1, gback, vxoffset + 8);
+            if (!BOX64ENV(dynarec_fastnan)) {
+                FEQD(x3, d0, d0);
+                FEQD(x4, d1, d1);
+                AND(x3, x3, x4);
+            }
+            FADDD(d0, d0, d1);
+            if (!BOX64ENV(dynarec_fastnan)) {
+                FEQD(x4, d0, d0);
+                BEQZ(x3, 12);
+                BNEZ(x4, 8);
+                FNEGD(d0, d0);
+            }
+            FSD(d0, gback, gdoffset + 0);
+            if (vex.v == ed) {
+                FSD(d0, gback, gdoffset + 8);
+            } else {
+                if (gd == ed)
+                    d0 = v0;
+                else
+                    FLD(d0, wback, fixedaddress + 0);
+                FLD(d1, wback, fixedaddress + 8);
+                if (!BOX64ENV(dynarec_fastnan)) {
+                    FEQD(x3, d0, d0);
+                    FEQD(x4, d1, d1);
+                    AND(x3, x3, x4);
+                }
+                FADDD(d0, d0, d1);
+                if (!BOX64ENV(dynarec_fastnan)) {
+                    FEQD(x4, d0, d0);
+                    BEQZ(x3, 12);
+                    BNEZ(x4, 8);
+                    FNEGD(d0, d0);
+                }
+                FSD(d0, gback, gdoffset + 8);
+            }
+            if (vex.l) {
+                GETEY();
+                if (gd == ed) {
+                    FLD(v0, gback, gyoffset + 0);
+                }
+                FLD(d0, gback, vyoffset + 0);
+                FLD(d1, gback, vyoffset + 8);
+                if (!BOX64ENV(dynarec_fastnan)) {
+                    FEQD(x3, d0, d0);
+                    FEQD(x4, d1, d1);
+                    AND(x3, x3, x4);
+                }
+                FADDD(d0, d0, d1);
+                if (!BOX64ENV(dynarec_fastnan)) {
+                    FEQD(x4, d0, d0);
+                    BEQZ(x3, 12);
+                    BNEZ(x4, 8);
+                    FNEGD(d0, d0);
+                }
+                FSD(d0, gback, gyoffset + 0);
+                if (vex.v == ed) {
+                    FSD(d0, gback, gyoffset + 8);
+                } else {
+                    if (gd == ed)
+                        d0 = v0;
+                    else
+                        FLD(d0, wback, fixedaddress + 0);
+                    FLD(d1, wback, fixedaddress + 8);
+                    if (!BOX64ENV(dynarec_fastnan)) {
+                        FEQD(x3, d0, d0);
+                        FEQD(x4, d1, d1);
+                        AND(x3, x3, x4);
+                    }
+                    FADDD(d0, d0, d1);
+                    if (!BOX64ENV(dynarec_fastnan)) {
+                        FEQD(x4, d0, d0);
+                        BEQZ(x3, 12);
+                        BNEZ(x4, 8);
+                        FNEGD(d0, d0);
+                    }
+                    FSD(d0, gback, gyoffset + 8);
+                }
+            } else
+                YMM0(gd);
+            break;
+        case 0x7D:
+            INST_NAME("VHSUBPD Gx, Vx, Ex");
+            nextop = F8;
+            GETEX(x2, 0, vex.l ? 24 : 8);
+            GETGX();
+            GETGY();
+            GETVX();
+            GETVY();
+            v0 = fpu_get_scratch(dyn);
+            if (gd == ed) {
+                FLD(v0, gback, gdoffset + 0);
+            }
+            d0 = fpu_get_scratch(dyn);
+            d1 = fpu_get_scratch(dyn);
+            FLD(d0, gback, vxoffset + 0);
+            FLD(d1, gback, vxoffset + 8);
+            if (!BOX64ENV(dynarec_fastnan)) {
+                FEQD(x3, d0, d0);
+                FEQD(x4, d1, d1);
+                AND(x3, x3, x4);
+            }
+            FSUBD(d0, d0, d1);
+            if (!BOX64ENV(dynarec_fastnan)) {
+                FEQD(x4, d0, d0);
+                BEQZ(x3, 12);
+                BNEZ(x4, 8);
+                FNEGD(d0, d0);
+            }
+            FSD(d0, gback, gdoffset + 0);
+            if (vex.v == ed) {
+                FSD(d0, gback, gdoffset + 8);
+            } else {
+                if (gd == ed)
+                    d0 = v0;
+                else
+                    FLD(d0, wback, fixedaddress + 0);
+                FLD(d1, wback, fixedaddress + 8);
+                if (!BOX64ENV(dynarec_fastnan)) {
+                    FEQD(x3, d0, d0);
+                    FEQD(x4, d1, d1);
+                    AND(x3, x3, x4);
+                }
+                FSUBD(d0, d0, d1);
+                if (!BOX64ENV(dynarec_fastnan)) {
+                    FEQD(x4, d0, d0);
+                    BEQZ(x3, 12);
+                    BNEZ(x4, 8);
+                    FNEGD(d0, d0);
+                }
+                FSD(d0, gback, gdoffset + 8);
+            }
+            if (vex.l) {
+                GETEY();
+                if (gd == ed) {
+                    FLD(v0, gback, gyoffset + 0);
+                }
+                FLD(d0, gback, vyoffset + 0);
+                FLD(d1, gback, vyoffset + 8);
+                if (!BOX64ENV(dynarec_fastnan)) {
+                    FEQD(x3, d0, d0);
+                    FEQD(x4, d1, d1);
+                    AND(x3, x3, x4);
+                }
+                FSUBD(d0, d0, d1);
+                if (!BOX64ENV(dynarec_fastnan)) {
+                    FEQD(x4, d0, d0);
+                    BEQZ(x3, 12);
+                    BNEZ(x4, 8);
+                    FNEGD(d0, d0);
+                }
+                FSD(d0, gback, gyoffset + 0);
+                if (vex.v == ed) {
+                    FSD(d0, gback, gyoffset + 8);
+                } else {
+                    if (gd == ed)
+                        d0 = v0;
+                    else
+                        FLD(d0, wback, fixedaddress + 0);
+                    FLD(d1, wback, fixedaddress + 8);
+                    if (!BOX64ENV(dynarec_fastnan)) {
+                        FEQD(x3, d0, d0);
+                        FEQD(x4, d1, d1);
+                        AND(x3, x3, x4);
+                    }
+                    FSUBD(d0, d0, d1);
+                    if (!BOX64ENV(dynarec_fastnan)) {
+                        FEQD(x4, d0, d0);
+                        BEQZ(x3, 12);
+                        BNEZ(x4, 8);
+                        FNEGD(d0, d0);
+                    }
+                    FSD(d0, gback, gyoffset + 8);
+                }
+            } else
+                YMM0(gd);
+            break;
         case 0x7E:
             INST_NAME("VMOVD Ed, Gx");
             nextop = F8;
@@ -1513,6 +1705,361 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
             } else
                 YMM0(gd);
             break;
+        case 0xD4:
+            INST_NAME("VPADDQ Gx, Vx, Ex");
+            nextop = F8;
+            GETEX(x2, 0, 8);
+            GETGX();
+            GETGY();
+            GETVX();
+            GETVY();
+            for (int i = 0; i < 2; ++i) {
+                LD(x3, vback, vxoffset + 8 * i);
+                LD(x4, wback, fixedaddress + 8 * i);
+                ADD(x3, x3, x4);
+                SD(x3, gback, gdoffset + 8 * i);
+            }
+            if (vex.l) {
+                GETEY();
+                for (int i = 0; i < 2; ++i) {
+                    LD(x3, vback, vyoffset + 8 * i);
+                    LD(x4, wback, fixedaddress + 8 * i);
+                    ADD(x3, x3, x4);
+                    SD(x3, gback, gyoffset + 8 * i);
+                }
+            } else
+                YMM0(gd);
+            break;
+        case 0xD5:
+            INST_NAME("VPMULLW Gx, Vx, Ex");
+            nextop = F8;
+            GETEX(x2, 0, vex.l ? 30 : 14);
+            GETGX();
+            GETGY();
+            GETVX();
+            GETVY();
+            for (int i = 0; i < 8; ++i) {
+                LH(x3, vback, vxoffset + 2 * i);
+                LH(x4, wback, fixedaddress + 2 * i);
+                MULW(x3, x3, x4);
+                SH(x3, gback, gdoffset + 2 * i);
+            }
+            if (vex.l) {
+                GETEY();
+                for (int i = 0; i < 8; ++i) {
+                    LH(x3, vback, vyoffset + 2 * i);
+                    LH(x4, wback, fixedaddress + 2 * i);
+                    MULW(x3, x3, x4);
+                    SH(x3, gback, gyoffset + 2 * i);
+                }
+            } else
+                YMM0(gd);
+            break;
+        case 0xD8:
+            INST_NAME("VPSUBUSB Gx, Vx, Ex");
+            nextop = F8;
+            GETEX(x1, 0, vex.l ? 31 : 15);
+            GETGX();
+            GETGY();
+            GETVX();
+            GETVY();
+            for (int i = 0; i < 16; ++i) {
+                LBU(x3, vback, vxoffset + i);
+                LBU(x4, wback, fixedaddress + i);
+                SUB(x3, x3, x4);
+                if (cpuext.zbb) {
+                    MAX(x3, x3, xZR);
+                } else {
+                    NOT(x4, x3);
+                    SRAI(x4, x4, 63);
+                    AND(x3, x3, x4);
+                }
+                SB(x3, gback, gdoffset + i);
+            }
+            if (vex.l) {
+                GETEY();
+                for (int i = 0; i < 16; ++i) {
+                    LBU(x3, vback, vyoffset + i);
+                    LBU(x4, wback, fixedaddress + i);
+                    SUB(x3, x3, x4);
+                    if (cpuext.zbb) {
+                        MAX(x3, x3, xZR);
+                    } else {
+                        NOT(x4, x3);
+                        SRAI(x4, x4, 63);
+                        AND(x3, x3, x4);
+                    }
+                    SB(x3, gback, gyoffset + i);
+                }
+            } else
+                YMM0(gd);
+            break;
+        case 0xD9:
+            INST_NAME("VPSUBUSW Gx, Vx, Ex");
+            nextop = F8;
+            GETEX(x1, 0, vex.l ? 31 : 15);
+            GETGX();
+            GETGY();
+            GETVX();
+            GETVY();
+            for (int i = 0; i < 8; ++i) {
+                LHU(x3, vback, vxoffset + i * 2);
+                LHU(x4, wback, fixedaddress + i * 2);
+                SUB(x3, x3, x4);
+                if (cpuext.zbb) {
+                    MAX(x3, x3, xZR);
+                } else {
+                    NOT(x4, x3);
+                    SRAI(x4, x4, 63);
+                    AND(x3, x3, x4);
+                }
+                SH(x3, gback, gdoffset + i * 2);
+            }
+            if (vex.l) {
+                GETEY();
+                for (int i = 0; i < 8; ++i) {
+                    LHU(x3, vback, vyoffset + i * 2);
+                    LHU(x4, wback, fixedaddress + i * 2);
+                    SUB(x3, x3, x4);
+                    if (cpuext.zbb) {
+                        MAX(x3, x3, xZR);
+                    } else {
+                        NOT(x4, x3);
+                        SRAI(x4, x4, 63);
+                        AND(x3, x3, x4);
+                    }
+                    SH(x3, gback, gyoffset + i * 2);
+                }
+            } else
+                YMM0(gd);
+            break;
+        case 0xDA:
+            INST_NAME("VPMINUB Gx, Vx, Ex");
+            nextop = F8;
+            GETEX(x1, 0, vex.l ? 31 : 15);
+            GETGX();
+            GETGY();
+            GETVX();
+            GETVY();
+            for (int i = 0; i < 16; ++i) {
+                LBU(x3, vback, vxoffset + i);
+                LBU(x4, wback, fixedaddress + i);
+                if (cpuext.zbb) {
+                    MINU(x3, x3, x4);
+                } else {
+                    BLTU(x3, x4, 8);
+                    MV(x3, x4);
+                }
+                SB(x3, gback, gdoffset + i);
+            }
+            if (vex.l) {
+                GETEY();
+                for (int i = 0; i < 16; ++i) {
+                    LBU(x3, vback, vyoffset + i);
+                    LBU(x4, wback, fixedaddress + i);
+                    if (cpuext.zbb) {
+                        MINU(x3, x3, x4);
+                    } else {
+                        BLTU(x3, x4, 8);
+                        MV(x3, x4);
+                    }
+                    SB(x3, gback, gyoffset + i);
+                }
+            } else
+                YMM0(gd);
+            break;
+        case 0xDB:
+            INST_NAME("VPAND Gx, Vx, Ex");
+            nextop = F8;
+            GETEX(x2, 0, 8);
+            GETGX();
+            GETGY();
+            GETVX();
+            GETVY();
+            for (int i = 0; i < 2; ++i) {
+                LD(x3, vback, vxoffset + 8 * i);
+                LD(x4, wback, fixedaddress + 8 * i);
+                AND(x3, x3, x4);
+                SD(x3, gback, gdoffset + 8 * i);
+            }
+            if (vex.l) {
+                GETEY();
+                for (int i = 0; i < 2; ++i) {
+                    LD(x3, vback, vyoffset + 8 * i);
+                    LD(x4, wback, fixedaddress + 8 * i);
+                    AND(x3, x3, x4);
+                    SD(x3, gback, gyoffset + 8 * i);
+                }
+            } else
+                YMM0(gd);
+            break;
+        case 0xDC:
+            INST_NAME("VPADDUSB Gx, Vx, Ex");
+            nextop = F8;
+            GETEX(x1, 0, vex.l ? 31 : 15);
+            GETGX();
+            GETGY();
+            GETVX();
+            GETVY();
+            ADDI(x5, xZR, 0xFF);
+            for (int i = 0; i < 16; ++i) {
+                LBU(x3, vback, vxoffset + i);
+                LBU(x4, wback, fixedaddress + i);
+                ADD(x3, x3, x4);
+                if (cpuext.zbb) {
+                    MINU(x3, x3, x5);
+                } else {
+                    BLT(x3, x5, 8);
+                    ADDI(x3, xZR, 0xFF);
+                }
+                SB(x3, gback, gdoffset + i);
+            }
+            if (vex.l) {
+                GETEY();
+                for (int i = 0; i < 16; ++i) {
+                    LBU(x3, vback, vyoffset + i);
+                    LBU(x4, wback, fixedaddress + i);
+                    ADD(x3, x3, x4);
+                    if (cpuext.zbb) {
+                        MINU(x3, x3, x5);
+                    } else {
+                        BLT(x3, x5, 8);
+                        ADDI(x3, xZR, 0xFF);
+                    }
+                    SB(x3, gback, gyoffset + i);
+                }
+            } else
+                YMM0(gd);
+            break;
+        case 0xDD:
+            INST_NAME("VPADDUSW Gx, Vx, Ex");
+            nextop = F8;
+            GETEX(x1, 0, vex.l ? 30 : 14);
+            GETGX();
+            GETGY();
+            GETVX();
+            GETVY();
+            MOV32w(x5, 65535);
+            for (int i = 0; i < 8; ++i) {
+                LHU(x3, vback, vxoffset + i * 2);
+                LHU(x4, wback, fixedaddress + i * 2);
+                ADDW(x3, x3, x4);
+                if (cpuext.zbb) {
+                    MINU(x3, x3, x5);
+                } else {
+                    BGE(x5, x3, 8);
+                    MV(x3, x5);
+                }
+                SH(x3, gback, gdoffset + i * 2);
+            }
+            if (vex.l) {
+                GETEY();
+                for (int i = 0; i < 8; ++i) {
+                    LHU(x3, vback, vyoffset + i * 2);
+                    LHU(x4, wback, fixedaddress + i * 2);
+                    ADDW(x3, x3, x4);
+                    if (cpuext.zbb) {
+                        MINU(x3, x3, x5);
+                    } else {
+                        BGE(x5, x3, 8);
+                        MV(x3, x5);
+                    }
+                    SH(x3, gback, gyoffset + i * 2);
+                }
+            } else
+                YMM0(gd);
+            break;
+        case 0xDE:
+            INST_NAME("VPMAXUB Gx, Vx, Ex");
+            nextop = F8;
+            GETEX(x1, 0, vex.l ? 31 : 15);
+            GETGX();
+            GETGY();
+            GETVX();
+            GETVY();
+            for (int i = 0; i < 16; ++i) {
+                LBU(x3, vback, vxoffset + i);
+                LBU(x4, wback, fixedaddress + i);
+                if (cpuext.zbb) {
+                    MAXU(x3, x3, x4);
+                } else {
+                    BLTU(x4, x3, 8);
+                    MV(x3, x4);
+                }
+                SB(x3, gback, gdoffset + i);
+            }
+            if (vex.l) {
+                GETEY();
+                for (int i = 0; i < 16; ++i) {
+                    LBU(x3, vback, vyoffset + i);
+                    LBU(x4, wback, fixedaddress + i);
+                    if (cpuext.zbb) {
+                        MAXU(x3, x3, x4);
+                    } else {
+                        BLTU(x4, x3, 8);
+                        MV(x3, x4);
+                    }
+                    SB(x3, gback, gyoffset + i);
+                }
+            } else
+                YMM0(gd);
+            break;
+        case 0xDF:
+            INST_NAME("VPANDN Gx, Vx, Ex");
+            nextop = F8;
+            GETEX(x2, 0, 8);
+            GETGX();
+            GETGY();
+            GETVX();
+            GETVY();
+            for (int i = 0; i < 2; ++i) {
+                LD(x3, vback, vxoffset + 8 * i);
+                LD(x4, wback, fixedaddress + 8 * i);
+                NOT(x3, x3);
+                AND(x3, x3, x4);
+                SD(x3, gback, gdoffset + 8 * i);
+            }
+            if (vex.l) {
+                GETEY();
+                for (int i = 0; i < 2; ++i) {
+                    LD(x3, vback, vyoffset + 8 * i);
+                    LD(x4, wback, fixedaddress + 8 * i);
+                    NOT(x3, x3);
+                    AND(x3, x3, x4);
+                    SD(x3, gback, gyoffset + 8 * i);
+                }
+            } else
+                YMM0(gd);
+            break;
+        case 0xE0:
+            INST_NAME("VPAVGB Gx, Vx, Ex");
+            nextop = F8;
+            GETEX(x1, 0, vex.l ? 31 : 15);
+            GETGX();
+            GETGY();
+            GETVX();
+            GETVY();
+            for (int i = 0; i < 16; ++i) {
+                LBU(x3, vback, vxoffset + i);
+                LBU(x4, wback, fixedaddress + i);
+                ADDW(x3, x3, x4);
+                ADDIW(x3, x3, 1);
+                SRAIW(x3, x3, 1);
+                SB(x3, gback, gdoffset + i);
+            }
+            if (vex.l) {
+                GETEY();
+                for (int i = 0; i < 16; ++i) {
+                    LBU(x3, vback, vyoffset + i);
+                    LBU(x4, wback, fixedaddress + i);
+                    ADDW(x3, x3, x4);
+                    ADDIW(x3, x3, 1);
+                    SRAIW(x3, x3, 1);
+                    SB(x3, gback, gyoffset + i);
+                }
+            } else
+                YMM0(gd);
+            break;
         case 0xE1:
             INST_NAME("VPSRAW Gx, Vx, Ex");
             nextop = F8;
@@ -1565,6 +2112,35 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
             } else
                 YMM0(gd);
             break;
+        case 0xE3:
+            INST_NAME("VPAVGW Gx, Vx, Ex");
+            nextop = F8;
+            GETEX(x1, 0, vex.l ? 30 : 14);
+            GETGX();
+            GETGY();
+            GETVX();
+            GETVY();
+            for (int i = 0; i < 8; ++i) {
+                LHU(x3, vback, vxoffset + i * 2);
+                LHU(x4, wback, fixedaddress + i * 2);
+                ADDW(x3, x3, x4);
+                ADDIW(x3, x3, 1);
+                SRAIW(x3, x3, 1);
+                SH(x3, gback, gdoffset + i * 2);
+            }
+            if (vex.l) {
+                GETEY();
+                for (int i = 0; i < 8; ++i) {
+                    LHU(x3, vback, vyoffset + i * 2);
+                    LHU(x4, wback, fixedaddress + i * 2);
+                    ADDW(x3, x3, x4);
+                    ADDIW(x3, x3, 1);
+                    SRAIW(x3, x3, 1);
+                    SH(x3, gback, gyoffset + i * 2);
+                }
+            } else
+                YMM0(gd);
+            break;
         case 0xEF:
             INST_NAME("VPXOR Gx, Vx, Ex");
             nextop = F8;
@@ -1699,6 +2275,31 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
             } else
                 YMM0(gd);
             break;
+        case 0xFB:
+            INST_NAME("VPSUBQ Gx, Vx, Ex");
+            nextop = F8;
+            GETEX(x2, 0, 8);
+            GETGX();
+            GETGY();
+            GETVX();
+            GETVY();
+            for (int i = 0; i < 2; ++i) {
+                LD(x3, vback, vxoffset + 8 * i);
+                LD(x4, wback, fixedaddress + 8 * i);
+                SUB(x3, x3, x4);
+                SD(x3, gback, gdoffset + 8 * i);
+            }
+            if (vex.l) {
+                GETEY();
+                for (int i = 0; i < 2; ++i) {
+                    LD(x3, vback, vyoffset + 8 * i);
+                    LD(x4, wback, fixedaddress + 8 * i);
+                    SUB(x3, x3, x4);
+                    SD(x3, gback, gyoffset + 8 * i);
+                }
+            } else
+                YMM0(gd);
+            break;
         default:
             DEFAULT;
     }