about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2025-08-21 21:25:42 +0800
committerGitHub <noreply@github.com>2025-08-21 15:25:42 +0200
commit5327489f397e332e1702ef6c65e0de4a9f756bc7 (patch)
tree38342a6c24bf86733ddaf490444053e44b91f659 /src
parent392255594bde68507b6d6feac57f3014d8efcd1b (diff)
downloadbox64-5327489f397e332e1702ef6c65e0de4a9f756bc7.tar.gz
box64-5327489f397e332e1702ef6c65e0de4a9f756bc7.zip
[RV64_DYNAREC] Fixed some scalar avx opcodes (#2956)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_avx_66_0f.c5
-rw-r--r--src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c13
-rw-r--r--src/dynarec/rv64/dynarec_rv64_avx_66_0f3a.c1
3 files changed, 12 insertions, 7 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_avx_66_0f.c b/src/dynarec/rv64/dynarec_rv64_avx_66_0f.c
index 7a530d9e..e16e54b0 100644
--- a/src/dynarec/rv64/dynarec_rv64_avx_66_0f.c
+++ b/src/dynarec/rv64/dynarec_rv64_avx_66_0f.c
@@ -102,6 +102,10 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
             GETED(0);
             GETGX();
             GETGY();
+            if (MODREG && !rex.w) {
+                ZEXTW2(x3, ed);
+                ed = x3;
+            }
             SD(ed, gback, gdoffset);
             SD(xZR, gback, gdoffset + 8);
             SD(xZR, gback, gyoffset);
@@ -170,6 +174,7 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
                 LD(x3, gback, gyoffset + 8);
                 SD(x3, wback, fixedaddress + 8);
             } else if (MODREG) {
+                GETEY();
                 SD(xZR, wback, fixedaddress);
                 SD(xZR, wback, fixedaddress + 8);
             }
diff --git a/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c b/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c
index dfbb7c14..28558f14 100644
--- a/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c
+++ b/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c
@@ -54,7 +54,6 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
             GETVX();
             GETGY();
             GETVY();
-
             if (gd == vex.v) {
                 ADDI(x5, xEmu, offsetof(x64emu_t, scratch));
                 LD(x3, vback, vxoffset + 0);
@@ -523,24 +522,24 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
             GETGY();
             GETVY();
             for (int i = 0; i < 4; ++i) {
-                LH(x3, vback, vxoffset + i * 4);
-                LH(x4, wback, fixedaddress + i * 4);
+                LW(x3, vback, vxoffset + i * 4);
+                LW(x4, wback, fixedaddress + i * 4);
                 SLT(x1, xZR, x4);
                 SRAI(x5, x4, 63);
                 OR(x1, x1, x5);
                 MUL(x3, x1, x3);
-                SH(x3, gback, gdoffset + i * 4);
+                SW(x3, gback, gdoffset + i * 4);
             }
             if (vex.l) {
                 GETEY();
                 for (int i = 0; i < 4; ++i) {
-                    LH(x3, vback, vyoffset + i * 4);
-                    LH(x4, wback, fixedaddress + i * 4);
+                    LW(x3, vback, vyoffset + i * 4);
+                    LW(x4, wback, fixedaddress + i * 4);
                     SLT(x1, xZR, x4);
                     SRAI(x5, x4, 63);
                     OR(x1, x1, x5);
                     MUL(x3, x1, x3);
-                    SH(x3, gback, gyoffset + i * 4);
+                    SW(x3, gback, gyoffset + i * 4);
                 }
             } else {
                 SD(xZR, gback, gyoffset + 0);
diff --git a/src/dynarec/rv64/dynarec_rv64_avx_66_0f3a.c b/src/dynarec/rv64/dynarec_rv64_avx_66_0f3a.c
index 1f649e22..6b8cde4f 100644
--- a/src/dynarec/rv64/dynarec_rv64_avx_66_0f3a.c
+++ b/src/dynarec/rv64/dynarec_rv64_avx_66_0f3a.c
@@ -238,6 +238,7 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
             GETGX();
             GETVX();
             GETGY();
+            GETVY();
             F8;
             for (int i = 0; i < 4; ++i) {
                 LW(x3, xEmu, offsetof(x64emu_t, xmm) + u8 * 16 + i * 4);