about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2025-08-21 19:19:56 +0800
committerGitHub <noreply@github.com>2025-08-21 13:19:56 +0200
commit392255594bde68507b6d6feac57f3014d8efcd1b (patch)
tree812ecc8120c3e09f639217abf772eb494265df59 /src
parent8d94d21716b3b3aadef4a9e6f92230bad8eccd84 (diff)
downloadbox64-392255594bde68507b6d6feac57f3014d8efcd1b.tar.gz
box64-392255594bde68507b6d6feac57f3014d8efcd1b.zip
[RV64_DYNAREC] Added more scalar avx 66 0F3A opcodes (#2955)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_avx_66_0f3a.c184
1 files changed, 184 insertions, 0 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_avx_66_0f3a.c b/src/dynarec/rv64/dynarec_rv64_avx_66_0f3a.c
index d3a3b702..1f649e22 100644
--- a/src/dynarec/rv64/dynarec_rv64_avx_66_0f3a.c
+++ b/src/dynarec/rv64/dynarec_rv64_avx_66_0f3a.c
@@ -46,6 +46,190 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
     rex_t rex = vex.rex;
 
     switch (opcode) {
+        case 0x0E:
+            INST_NAME("VPBLENDW Gx, Vx, Ex, Ib");
+            nextop = F8;
+            GETGX();
+            GETEX(x2, 1, vex.l ? 30 : 14);
+            GETVX();
+            GETVY();
+            GETGY();
+            u8 = F8;
+            for (int i = 0; i < 8; ++i) {
+                if (u8 & (1 << i)) {
+                    if (gd != ed) {
+                        LHU(x3, wback, fixedaddress + 2 * i);
+                        SH(x3, gback, gdoffset + 2 * i);
+                    }
+                } else if (gd != vex.v) {
+                    LHU(x3, vback, vxoffset + 2 * i);
+                    SH(x3, gback, gdoffset + 2 * i);
+                }
+            }
+            if (vex.l) {
+                GETEY();
+                for (int i = 0; i < 8; ++i) {
+                    if (u8 & (1 << i)) {
+                        if (gd != ed) {
+                            LHU(x3, wback, fixedaddress + 2 * i);
+                            SH(x3, gback, gyoffset + 2 * i);
+                        }
+                    } else if (gd != vex.v) {
+                        LHU(x3, vback, vyoffset + 2 * i);
+                        SH(x3, gback, gyoffset + 2 * i);
+                    }
+                }
+            } else {
+                SD(xZR, gback, gyoffset + 0);
+                SD(xZR, gback, gyoffset + 8);
+            }
+            break;
+        case 0x0F:
+            INST_NAME("VPALIGNR Gx, Vx, Ex, Ib");
+            nextop = F8;
+            GETGX();
+            GETEX(x2, 1, vex.l ? 24 : 8);
+            GETVX();
+            GETVY();
+            GETGY();
+            u8 = F8;
+            if (u8 > 31) {
+                SD(xZR, gback, gdoffset + 0);
+                SD(xZR, gback, gdoffset + 8);
+            } else if (u8 > 23) {
+                LD(x5, vback, vxoffset + 8);
+                if (u8 > 24) {
+                    SRLI(x5, x5, 8 * (u8 - 24));
+                }
+                SD(x5, gback, gdoffset + 0);
+                SD(xZR, gback, gdoffset + 8);
+            } else if (u8 > 15) {
+                if (u8 > 16) {
+                    LD(x5, vback, vxoffset + 8);
+                    LD(x4, vback, vxoffset + 0);
+                    SRLI(x3, x5, 8 * (u8 - 16)); // lower of higher 64 bits
+                    SLLI(x5, x5, 8 * (24 - u8)); // higher of lower 64 bits
+                    SD(x3, gback, gdoffset + 8);
+                    SRLI(x4, x4, 8 * (u8 - 16)); // lower of lower 64 bits
+                    OR(x4, x4, x5);              // lower 64 bits
+                    SD(x4, gback, gdoffset + 0);
+                } else if (gd != vex.v) {
+                    LD(x4, vback, vxoffset + 0);
+                    LD(x5, vback, vxoffset + 8);
+                    SD(x4, gback, gdoffset + 0);
+                    SD(x5, gback, gdoffset + 8);
+                }
+            } else if (u8 > 7) {
+                if (u8 > 8) {
+                    LD(x5, vback, vxoffset + 8);
+                    LD(x4, vback, vxoffset + 0);
+                    LD(x3, wback, fixedaddress + 8);
+                    SLLI(x5, x5, 8 * (16 - u8)); // higher of higher 64 bits
+                    SRLI(x1, x4, 8 * (u8 - 8));  // lower of higher 64 bits
+                    SLLI(x4, x4, 8 * (16 - u8)); // higher of lower 64 bits
+                    OR(x5, x1, x5);              // higher 64 bits
+                    SRLI(x3, x3, 8 * (u8 - 8));  // lower of lower 64 bits
+                    SD(x5, gback, gdoffset + 8);
+                    OR(x4, x4, x3); // lower 64 bits
+                    SD(x4, gback, gdoffset + 0);
+                } else {
+                    LD(x5, vback, vxoffset + 0);
+                    LD(x4, wback, fixedaddress + 8);
+                    SD(x5, gback, gdoffset + 8);
+                    SD(x4, gback, gdoffset + 0);
+                }
+            } else {
+                if (u8 > 0) {
+                    LD(x5, vback, vxoffset + 0);
+                    LD(x4, wback, fixedaddress + 8);
+                    LD(x3, wback, fixedaddress + 0);
+                    SLLI(x5, x5, 8 * (8 - u8)); // higher of higher 64 bits
+                    SRLI(x1, x4, 8 * (u8 - 0)); // lower of higher 64 bits
+                    SLLI(x4, x4, 8 * (8 - u8)); // higher of lower 64 bits
+                    OR(x5, x1, x5);             // higher 64 bits
+                    SRLI(x3, x3, 8 * (u8 - 0)); // lower of lower 64 bits
+                    SD(x5, gback, gdoffset + 8);
+                    OR(x4, x4, x3); // lower 64 bits
+                    SD(x4, gback, gdoffset + 0);
+                } else {
+                    LD(x5, wback, fixedaddress + 8);
+                    LD(x4, wback, fixedaddress + 0);
+                    SD(x5, gback, gdoffset + 8);
+                    SD(x4, gback, gdoffset + 0);
+                }
+            }
+            if (vex.l) {
+                GETEY();
+                if (u8 > 31) {
+                    SD(xZR, gback, gyoffset + 0);
+                    SD(xZR, gback, gyoffset + 8);
+                } else if (u8 > 23) {
+                    LD(x5, vback, vyoffset + 8);
+                    if (u8 > 24) {
+                        SRLI(x5, x5, 8 * (u8 - 24));
+                    }
+                    SD(x5, gback, gyoffset + 0);
+                    SD(xZR, gback, gyoffset + 8);
+                } else if (u8 > 15) {
+                    if (u8 > 16) {
+                        LD(x5, vback, vyoffset + 8);
+                        LD(x4, vback, vyoffset + 0);
+                        SRLI(x3, x5, 8 * (u8 - 16)); // lower of higher 64 bits
+                        SLLI(x5, x5, 8 * (24 - u8)); // higher of lower 64 bits
+                        SD(x3, gback, gyoffset + 8);
+                        SRLI(x4, x4, 8 * (u8 - 16)); // lower of lower 64 bits
+                        OR(x4, x4, x5);              // lower 64 bits
+                        SD(x4, gback, gyoffset + 0);
+                    } else if (gd != vex.v) {
+                        LD(x4, vback, vyoffset + 0);
+                        LD(x5, vback, vyoffset + 8);
+                        SD(x4, gback, gyoffset + 0);
+                        SD(x5, gback, gyoffset + 8);
+                    }
+                } else if (u8 > 7) {
+                    if (u8 > 8) {
+                        LD(x5, vback, vyoffset + 8);
+                        LD(x4, vback, vyoffset + 0);
+                        LD(x3, wback, fixedaddress + 8);
+                        SLLI(x5, x5, 8 * (16 - u8)); // higher of higher 64 bits
+                        SRLI(x1, x4, 8 * (u8 - 8));  // lower of higher 64 bits
+                        SLLI(x4, x4, 8 * (16 - u8)); // higher of lower 64 bits
+                        OR(x5, x1, x5);              // higher 64 bits
+                        SRLI(x3, x3, 8 * (u8 - 8));  // lower of lower 64 bits
+                        SD(x5, gback, gyoffset + 8);
+                        OR(x4, x4, x3); // lower 64 bits
+                        SD(x4, gback, gyoffset + 0);
+                    } else {
+                        LD(x5, vback, vyoffset + 0);
+                        LD(x4, wback, fixedaddress + 8);
+                        SD(x5, gback, gyoffset + 8);
+                        SD(x4, gback, gyoffset + 0);
+                    }
+                } else {
+                    if (u8 > 0) {
+                        LD(x5, vback, vyoffset + 0);
+                        LD(x4, wback, fixedaddress + 8);
+                        LD(x3, wback, fixedaddress + 0);
+                        SLLI(x5, x5, 8 * (8 - u8)); // higher of higher 64 bits
+                        SRLI(x1, x4, 8 * (u8 - 0)); // lower of higher 64 bits
+                        SLLI(x4, x4, 8 * (8 - u8)); // higher of lower 64 bits
+                        OR(x5, x1, x5);             // higher 64 bits
+                        SRLI(x3, x3, 8 * (u8 - 0)); // lower of lower 64 bits
+                        SD(x5, gback, gyoffset + 8);
+                        OR(x4, x4, x3); // lower 64 bits
+                        SD(x4, gback, gyoffset + 0);
+                    } else {
+                        LD(x5, wback, fixedaddress + 8);
+                        LD(x4, wback, fixedaddress + 0);
+                        SD(x5, gback, gyoffset + 8);
+                        SD(x4, gback, gyoffset + 0);
+                    }
+                }
+            } else {
+                SD(xZR, gback, gyoffset + 0);
+                SD(xZR, gback, gyoffset + 8);
+            }
+            break;
         case 0x4A:
             INST_NAME("VBLENDVPS Gx, Vx, Ex, XMMImm8");
             nextop = F8;