about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2025-08-29 15:26:17 +0800
committerGitHub <noreply@github.com>2025-08-29 09:26:17 +0200
commitd0ce4ef9138f7afcf844cd8f0f1b9258891642e0 (patch)
tree777ecb88a76f94f83068eeb6e5c9f2627adbb9df /src
parente863acf0a88958dec00e7d1ee3ce891aa5ddd6b6 (diff)
downloadbox64-d0ce4ef9138f7afcf844cd8f0f1b9258891642e0.tar.gz
box64-d0ce4ef9138f7afcf844cd8f0f1b9258891642e0.zip
[RV64_DYNAREC] Added more scalar avx opcodes (#2978)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_66_0f.c2
-rw-r--r--src/dynarec/rv64/dynarec_rv64_avx_66_0f.c178
2 files changed, 179 insertions, 1 deletions
diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f.c b/src/dynarec/la64/dynarec_la64_avx_66_0f.c
index a2ae0d0f..660ac6a8 100644
--- a/src/dynarec/la64/dynarec_la64_avx_66_0f.c
+++ b/src/dynarec/la64/dynarec_la64_avx_66_0f.c
@@ -1266,7 +1266,7 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
             VAND_Vxy(v0, v0, d0);
             break;
         case 0xF4:
-            INST_NAME("VPMULLUDQ Gx, Ex");
+            INST_NAME("VPMULUDQ Gx, Vx, Ex");
             nextop = F8;
             GETGY_empty_VYEY_xy(v0, v1, v2, 0);
             VMULWEVxy(D_WU, v0, v1, v2);
diff --git a/src/dynarec/rv64/dynarec_rv64_avx_66_0f.c b/src/dynarec/rv64/dynarec_rv64_avx_66_0f.c
index f19a3f32..bcb56e91 100644
--- a/src/dynarec/rv64/dynarec_rv64_avx_66_0f.c
+++ b/src/dynarec/rv64/dynarec_rv64_avx_66_0f.c
@@ -2261,6 +2261,159 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
             } else
                 YMM0(gd);
             break;
+        case 0xEA:
+            INST_NAME("VPMINSW Gx, Vx, Ex");
+            nextop = F8;
+            GETEX(x1, 0, vex.l ? 30 : 14);
+            GETGX();
+            GETGY();
+            GETVX();
+            GETVY();
+            for (int i = 0; i < 8; ++i) {
+                LH(x3, gback, vxoffset + 2 * i);
+                LH(x4, wback, fixedaddress + 2 * i);
+                if (cpuext.zbb) {
+                    MIN(x3, x3, x4);
+                } else {
+                    BLT(x3, x4, 8);
+                    MV(x3, x4);
+                }
+                SH(x3, gback, gdoffset + 2 * i);
+            }
+            if (vex.l) {
+                GETEY();
+                for (int i = 0; i < 8; ++i) {
+                    LH(x3, gback, vyoffset + 2 * i);
+                    LH(x4, wback, fixedaddress + 2 * i);
+                    if (cpuext.zbb) {
+                        MIN(x3, x3, x4);
+                    } else {
+                        BLT(x3, x4, 8);
+                        MV(x3, x4);
+                    }
+                    SH(x3, gback, gyoffset + 2 * i);
+                }
+            } else
+                YMM0(gd);
+            break;
+        case 0xEB:
+            INST_NAME("VPOR Gx, Vx, Ex");
+            nextop = F8;
+            GETEX(x1, 0, vex.l ? 24 : 8);
+            GETGX();
+            GETGY();
+            GETVX();
+            GETVY();
+            for (int i = 0; i < 2; ++i) {
+                LD(x3, vback, vxoffset + 8 * i);
+                LD(x4, wback, fixedaddress + 8 * i);
+                OR(x3, x3, x4);
+                SD(x3, gback, gdoffset + 8 * i);
+            }
+            if (vex.l) {
+                GETEY();
+                for (int i = 0; i < 2; ++i) {
+                    LD(x3, vback, vyoffset + 8 * i);
+                    LD(x4, wback, fixedaddress + 8 * i);
+                    OR(x3, x3, x4);
+                    SD(x3, gback, gyoffset + 8 * i);
+                }
+            } else
+                YMM0(gd);
+            break;
+        case 0xEC:
+            INST_NAME("VPADDSB Gx, Vx, Ex");
+            nextop = F8;
+            GETEX(x1, 0, vex.l ? 31 : 15);
+            GETGX();
+            GETGY();
+            GETVX();
+            GETVY();
+            ADDIW(x6, xZR, 0xF80); // -128
+            ADDIW(x7, xZR, 0x80);  // 128
+            for (int i = 0; i < 16; ++i) {
+                LB(x3, vback, vxoffset + i);
+                LB(x4, wback, fixedaddress + i);
+                ADD(x3, x3, x4);
+                SATw(x3, x6, x7);
+                SB(x3, gback, gdoffset + i);
+            }
+            if (vex.l) {
+                GETEY();
+                for (int i = 0; i < 16; ++i) {
+                    LB(x3, vback, vyoffset + i);
+                    LB(x4, wback, fixedaddress + i);
+                    ADD(x3, x3, x4);
+                    SATw(x3, x6, x7);
+                    SB(x3, gback, gyoffset + i);
+                }
+            } else
+                YMM0(gd);
+            break;
+        case 0xED:
+            INST_NAME("VPADDSW Gx, Vx, Ex");
+            nextop = F8;
+            GETEX(x1, 0, vex.l ? 30 : 14);
+            GETGX();
+            GETGY();
+            GETVX();
+            GETVY();
+            LUI(x6, 0xFFFF8); // -32768
+            LUI(x7, 0x8);     // 32768
+            for (int i = 0; i < 8; ++i) {
+                LH(x3, vback, vxoffset + i * 2);
+                LH(x4, wback, fixedaddress + i * 2);
+                ADD(x3, x3, x4);
+                SATw(x3, x6, x7);
+                SH(x3, gback, gdoffset + i * 2);
+            }
+            if (vex.l) {
+                GETEY();
+                for (int i = 0; i < 8; ++i) {
+                    LH(x3, vback, vyoffset + i * 2);
+                    LH(x4, wback, fixedaddress + i * 2);
+                    ADD(x3, x3, x4);
+                    SATw(x3, x6, x7);
+                    SH(x3, gback, gyoffset + i * 2);
+                }
+            } else
+                YMM0(gd);
+            break;
+        case 0xEE:
+            INST_NAME("VPMAXSW Gx, Vx, Ex");
+            nextop = F8;
+            GETEX(x1, 0, vex.l ? 30 : 14);
+            GETGX();
+            GETGY();
+            GETVX();
+            GETVY();
+            for (int i = 0; i < 8; ++i) {
+                LH(x3, gback, vxoffset + 2 * i);
+                LH(x4, wback, fixedaddress + 2 * i);
+                if (cpuext.zbb) {
+                    MAX(x3, x3, x4);
+                } else {
+                    BLT(x4, x3, 8);
+                    MV(x3, x4);
+                }
+                SH(x3, gback, gdoffset + 2 * i);
+            }
+            if (vex.l) {
+                GETEY();
+                for (int i = 0; i < 8; ++i) {
+                    LH(x3, gback, vyoffset + 2 * i);
+                    LH(x4, wback, fixedaddress + 2 * i);
+                    if (cpuext.zbb) {
+                        MAX(x3, x3, x4);
+                    } else {
+                        BLT(x4, x3, 8);
+                        MV(x3, x4);
+                    }
+                    SH(x3, gback, gyoffset + 2 * i);
+                }
+            } else
+                YMM0(gd);
+            break;
         case 0xEF:
             INST_NAME("VPXOR Gx, Vx, Ex");
             nextop = F8;
@@ -2395,6 +2548,31 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
             } else
                 YMM0(gd);
             break;
+        case 0xF4:
+            INST_NAME("VPMULUDQ Gx, Vx, Ex");
+            nextop = F8;
+            GETEX(x1, 0, vex.l ? 24 : 8);
+            GETGX();
+            GETGY();
+            GETVX();
+            GETVY();
+            for (int i = 0; i < 2; ++i) {
+                LWU(x3, gback, vxoffset + i * 8);
+                LWU(x4, wback, fixedaddress + i * 8);
+                MUL(x3, x3, x4);
+                SD(x3, gback, gdoffset + i * 8);
+            }
+            if (vex.l) {
+                GETEY();
+                for (int i = 0; i < 2; ++i) {
+                    LWU(x3, gback, vyoffset + i * 8);
+                    LWU(x4, wback, fixedaddress + i * 8);
+                    MUL(x3, x3, x4);
+                    SD(x3, gback, gyoffset + i * 8);
+                }
+            } else
+                YMM0(gd);
+            break;
         case 0xFB:
             INST_NAME("VPSUBQ Gx, Vx, Ex");
             nextop = F8;