about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2025-08-19 20:17:10 +0800
committerGitHub <noreply@github.com>2025-08-19 14:17:10 +0200
commit44448774a7be9ad7dbc97ff566bd1166974dbf75 (patch)
treee20d50bfbd713d44cb370324fa0d51d87fd98805
parenta280884f0986f9658547d2bdd4b515e0beb28259 (diff)
downloadbox64-44448774a7be9ad7dbc97ff566bd1166974dbf75.tar.gz
box64-44448774a7be9ad7dbc97ff566bd1166974dbf75.zip
[RV64_DYNAREC] Added more scaalr avx opcodes (#2952)
-rw-r--r--CMakeLists.txt1
-rw-r--r--src/dynarec/rv64/dynarec_rv64_660f38.c30
-rw-r--r--src/dynarec/rv64/dynarec_rv64_avx.c2
-rw-r--r--src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c58
-rw-r--r--src/dynarec/rv64/dynarec_rv64_avx_66_0f3a.c87
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h2
6 files changed, 164 insertions, 16 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 527e913f..b46ac047 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1059,6 +1059,7 @@ if(RV64_DYNAREC)
     "${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_avx_0f.c"
     "${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_avx_66_0f.c"
     "${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c"
+    "${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_avx_66_0f3a.c"
     "${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c"
     )
 endif()
diff --git a/src/dynarec/rv64/dynarec_rv64_660f38.c b/src/dynarec/rv64/dynarec_rv64_660f38.c
index 4e33ca33..aca87f82 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f38.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f38.c
@@ -313,31 +313,29 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode,
                     nextop = F8;
                     SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION);
                     GETGX();
-                    GETEX(x2, 0, 8);
+                    GETEX(x1, 0, 8);
                     CLEAR_FLAGS();
                     SET_DFNONE();
                     IFX (X_ZF | X_CF) {
-                        LD(x5, wback, fixedaddress + 0);
-                        LD(x6, wback, fixedaddress + 8);
+                        LD(x2, wback, fixedaddress + 0);
+                        LD(x3, wback, fixedaddress + 8);
+                        LD(x4, gback, gdoffset + 0);
+                        LD(x5, gback, gdoffset + 8);
 
                         IFX (X_ZF) {
-                            LD(x3, gback, gdoffset + 0);
-                            LD(x4, gback, gdoffset + 8);
-                            AND(x3, x3, x5);
-                            AND(x4, x4, x6);
-                            OR(x3, x3, x4);
-                            BNEZ(x3, 8);
+                            AND(x6, x4, x2);
+                            AND(x7, x5, x3);
+                            OR(x6, x6, x7);
+                            BNEZ(x6, 4 + 4);
                             ORI(xFlags, xFlags, 1 << F_ZF);
                         }
                         IFX (X_CF) {
-                            LD(x3, gback, gdoffset + 0);
-                            NOT(x3, x3);
-                            LD(x4, gback, gdoffset + 8);
                             NOT(x4, x4);
-                            AND(x3, x3, x5);
-                            AND(x4, x4, x6);
-                            OR(x3, x3, x4);
-                            BNEZ(x3, 8);
+                            NOT(x5, x5);
+                            AND(x6, x4, x2);
+                            AND(x7, x5, x3);
+                            OR(x6, x6, x7);
+                            BNEZ(x6, 4 + 4);
                             ORI(xFlags, xFlags, 1 << F_CF);
                         }
                     }
diff --git a/src/dynarec/rv64/dynarec_rv64_avx.c b/src/dynarec/rv64/dynarec_rv64_avx.c
index 6209ccff..12cce037 100644
--- a/src/dynarec/rv64/dynarec_rv64_avx.c
+++ b/src/dynarec/rv64/dynarec_rv64_avx.c
@@ -56,6 +56,8 @@ uintptr_t dynarec64_AVX(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int n
         addr = dynarec64_AVX_F3_0F(dyn, addr, ip, ninst, vex, ok, need_epilog);
     else if ((vex.m == VEX_M_0F38) && (vex.p == VEX_P_66))
         addr = dynarec64_AVX_66_0F38(dyn, addr, ip, ninst, vex, ok, need_epilog);
+    else if ((vex.m == VEX_M_0F3A) && (vex.p == VEX_P_66))
+        addr = dynarec64_AVX_66_0F3A(dyn, addr, ip, ninst, vex, ok, need_epilog);
     else {
         DEFAULT;
     }
diff --git a/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c b/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c
index fc263e8f..bef35991 100644
--- a/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c
+++ b/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c
@@ -580,6 +580,64 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
                 SD(xZR, gback, gyoffset + 8);
             }
             break;
+        case 0x17:
+            INST_NAME("VPTEST Gx, Ex");
+            nextop = F8;
+            SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION);
+            GETEX(x1, 0, vex.l ? 24 : 8);
+            GETGX();
+            CLEAR_FLAGS();
+            SET_DFNONE();
+            IFX (X_ZF | X_CF) {
+                LD(x2, wback, fixedaddress + 0);
+                LD(x3, wback, fixedaddress + 8);
+                LD(x4, gback, gdoffset + 0);
+                LD(x5, gback, gdoffset + 8);
+
+                IFX (X_ZF) {
+                    AND(x6, x4, x2);
+                    AND(x7, x5, x3);
+                    OR(x6, x6, x7);
+                    BNEZ(x6, 4 + 4);
+                    ORI(xFlags, xFlags, 1 << F_ZF);
+                }
+                IFX (X_CF) {
+                    NOT(x4, x4);
+                    NOT(x5, x5);
+                    AND(x6, x4, x2);
+                    AND(x7, x5, x3);
+                    OR(x6, x6, x7);
+                    BNEZ(x3, 4 + 4);
+                    ORI(xFlags, xFlags, 1 << F_CF);
+                }
+            }
+            if (vex.l) {
+                GETEY();
+                LD(x2, wback, fixedaddress + 0);
+                LD(x3, wback, fixedaddress + 8);
+                LD(x4, gback, gyoffset + 0);
+                LD(x5, gback, gyoffset + 8);
+
+                IFX (X_ZF) {
+                    AND(x6, x4, x2);
+                    AND(x7, x5, x3);
+                    OR(x6, x6, x7);
+                    BNEZ(x6, 4 + 2 * 4);
+                    ANDI(x6, xFlags, 1 << F_ZF);
+                    OR(xFlags, xFlags, x6);
+                }
+                IFX (X_CF) {
+                    NOT(x4, x4);
+                    NOT(x5, x5);
+                    AND(x6, x4, x2);
+                    AND(x7, x5, x3);
+                    OR(x6, x6, x7);
+                    BNEZ(x6, 4 + 2 * 4);
+                    ANDI(x6, xFlags, 1 << F_CF);
+                    OR(xFlags, xFlags, x6);
+                }
+            }
+            break;
         default:
             DEFAULT;
     }
diff --git a/src/dynarec/rv64/dynarec_rv64_avx_66_0f3a.c b/src/dynarec/rv64/dynarec_rv64_avx_66_0f3a.c
new file mode 100644
index 00000000..d3a3b702
--- /dev/null
+++ b/src/dynarec/rv64/dynarec_rv64_avx_66_0f3a.c
@@ -0,0 +1,87 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <errno.h>
+
+#include "debug.h"
+#include "box64context.h"
+#include "box64cpu.h"
+#include "emu/x64emu_private.h"
+#include "x64emu.h"
+#include "box64stack.h"
+#include "callback.h"
+#include "emu/x64run_private.h"
+#include "x64trace.h"
+#include "dynarec_native.h"
+#include "my_cpuid.h"
+#include "emu/x87emu_private.h"
+#include "emu/x64shaext.h"
+
+#include "rv64_printer.h"
+#include "dynarec_rv64_private.h"
+#include "dynarec_rv64_functions.h"
+#include "../dynarec_helper.h"
+
+uintptr_t dynarec64_AVX_66_0F3A(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog)
+{
+    (void)ip;
+    (void)need_epilog;
+
+    uint8_t opcode = F8;
+    uint8_t nextop, u8;
+    uint8_t gd, ed, vd;
+    uint8_t wback, wb1, wb2, gback, vback;
+    uint8_t eb1, eb2, gb1, gb2;
+    int32_t i32, i32_;
+    int cacheupd = 0;
+    int v0, v1, v2;
+    int q0, q1, q2;
+    int d0, d1, d2;
+    int s0;
+    uint64_t tmp64u, u64;
+    int64_t j64;
+    int64_t fixedaddress, gdoffset, vxoffset, gyoffset, vyoffset;
+    int unscaled;
+
+    rex_t rex = vex.rex;
+
+    switch (opcode) {
+        case 0x4A:
+            INST_NAME("VBLENDVPS Gx, Vx, Ex, XMMImm8");
+            nextop = F8;
+            u8 = geted_ib(dyn, addr, ninst, nextop) >> 4;
+            GETEX(x1, 1, vex.l ? 28 : 12);
+            GETGX();
+            GETVX();
+            GETGY();
+            F8;
+            for (int i = 0; i < 4; ++i) {
+                LW(x3, xEmu, offsetof(x64emu_t, xmm) + u8 * 16 + i * 4);
+                LWU(x4, wback, fixedaddress + i * 4);
+                LWU(x5, vback, vxoffset + i * 4);
+                BGE(x3, xZR, 4 + 4);
+                MV(x5, x4);
+                MV(x3, x5);
+                SW(x3, gback, gdoffset + i * 4);
+            }
+            if (vex.l) {
+                GETEY();
+                for (int i = 0; i < 4; ++i) {
+                    LW(x3, xEmu, offsetof(x64emu_t, ymm) + u8 * 16 + i * 4);
+                    LWU(x4, wback, fixedaddress + i * 4);
+                    LWU(x5, vback, vyoffset + i * 4);
+                    BGE(x3, xZR, 4 + 4);
+                    MV(x5, x4);
+                    MV(x3, x5);
+                    SW(x3, gback, gyoffset + i * 4);
+                }
+            } else {
+                SD(xZR, gback, gyoffset + 0);
+                SD(xZR, gback, gyoffset + 8);
+            }
+            break;
+        default:
+            DEFAULT;
+    }
+    return addr;
+}
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index 8f01750d..b5ac7225 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -1293,6 +1293,7 @@
 #define dynarec64_AVX_0F      STEPNAME(dynarec64_AVX_0F)
 #define dynarec64_AVX_66_0F   STEPNAME(dynarec64_AVX_66_0F)
 #define dynarec64_AVX_66_0F38 STEPNAME(dynarec64_AVX_66_0F38)
+#define dynarec64_AVX_66_0F3A STEPNAME(dynarec64_AVX_66_0F3A)
 #define dynarec64_AVX_F3_0F   STEPNAME(dynarec64_AVX_F3_0F)
 
 #define geted               STEPNAME(geted)
@@ -1745,6 +1746,7 @@ uintptr_t dynarec64_AVX(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int n
 uintptr_t dynarec64_AVX_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
 uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
 uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
+uintptr_t dynarec64_AVX_66_0F3A(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
 uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
 
 #if STEP < 2