about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2024-08-26 01:52:50 +0800
committerGitHub <noreply@github.com>2024-08-25 19:52:50 +0200
commit4988fb27dc115e89146a017d0dff2a33abbc25e1 (patch)
tree6b6995104979e923a19ed56a71d55e9bf92683fc /src
parentdb1f0825ce26c1c49f61c01072598c52bbe9d6bc (diff)
downloadbox64-4988fb27dc115e89146a017d0dff2a33abbc25e1.tar.gz
box64-4988fb27dc115e89146a017d0dff2a33abbc25e1.zip
[RV64_DYNAREC] Fixed more issues in the vector infrastructure (#1755)
* [RV64_DYNAREC] Fixed SEW transformation for vector

* more tweaks

* more fixes

* More fixes

* more fixes

* re-enable vector extension by default
Diffstat (limited to 'src')
-rw-r--r--src/core.c8
-rw-r--r--src/dynarec/dynarec_arch.h2
-rw-r--r--src/dynarec/rv64/dynarec_rv64_660f_vector.c26
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.c56
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h21
-rw-r--r--src/dynarec/rv64/dynarec_rv64_pass0.h25
-rw-r--r--src/dynarec/rv64/dynarec_rv64_pass3.h16
-rw-r--r--src/dynarec/rv64/dynarec_rv64_private.h1
-rw-r--r--src/dynarec/rv64/rv64_emitter.h4
9 files changed, 86 insertions, 73 deletions
diff --git a/src/core.c b/src/core.c
index 5e345568..03859f12 100644
--- a/src/core.c
+++ b/src/core.c
@@ -512,13 +512,7 @@ HWCAP2_AFP
     if(rv64_zbb) printf_log(LOG_INFO, " Zbb");
     if(rv64_zbc) printf_log(LOG_INFO, " Zbc");
     if(rv64_zbs) printf_log(LOG_INFO, " Zbs");
-    if (rv64_vector) {
-        char* p = getenv("BOX64_DYNAREC_RV64VEXT");
-        if (p != NULL && p[0] == '1')
-            printf_log(LOG_INFO, " Vector (vlen: %d)", rv64_vlen);
-        else
-            rv64_vector = 0;
-    }
+    if (rv64_vector) printf_log(LOG_INFO, " Vector (vlen: %d)", rv64_vlen);
     if(rv64_xtheadba) printf_log(LOG_INFO, " XTheadBa");
     if(rv64_xtheadbb) printf_log(LOG_INFO, " XTheadBb");
     if(rv64_xtheadbs) printf_log(LOG_INFO, " XTheadBs");
diff --git a/src/dynarec/dynarec_arch.h b/src/dynarec/dynarec_arch.h
index 6a5c4977..c9de4b8f 100644
--- a/src/dynarec/dynarec_arch.h
+++ b/src/dynarec/dynarec_arch.h
@@ -44,7 +44,7 @@
 

 #define OTHER_CACHE()                                 \

     if (fpuCacheNeedsTransform(dyn, ninst)) ret |= 2; \

-    if (sewNeedsTransform(dyn, ninst)) ret |= 3;

+    if (sewNeedsTransform(dyn, ninst)) ret |= 4;

 

 #include "rv64/rv64_printer.h"

 #include "rv64/dynarec_rv64_private.h"

diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c
index df823f23..36b629a7 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c
@@ -51,17 +51,17 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
             INST_NAME("MOVAPD Gx, Ex");
             nextop = F8;
             GETG;
-            SET_ELEMENT_WIDTH(x1, VECTOR_SEW8);
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY, 1);
             if (MODREG) {
                 ed = (nextop & 7) + (rex.b << 3);
-                v1 = sse_get_reg_vector(dyn, ninst, x1, ed, 0, VECTOR_SEW8);
+                v1 = sse_get_reg_vector(dyn, ninst, x1, ed, 0, dyn->vector_eew);
                 v0 = sse_get_reg_empty_vector(dyn, ninst, x1, gd);
                 VMV_V_V(v0, v1);
             } else {
                 SMREAD();
                 v0 = sse_get_reg_empty_vector(dyn, ninst, x1, gd);
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 0, 0);
-                VLE8_V(v0, ed, VECTOR_UNMASKED, VECTOR_NFIELD1);
+                VLE_V(v0, ed, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1);
             }
             break;
         case 0x38: // SSSE3 opcodes
@@ -70,7 +70,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
                 case 0x00:
                     INST_NAME("PSHUFB Gx, Ex");
                     nextop = F8;
-                    SET_ELEMENT_WIDTH(x1, VECTOR_SEW8);
+                    SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
                     GETGX_vector(q0, 1, VECTOR_SEW8);
                     GETEX_vector(q1, 0, 0, VECTOR_SEW8);
                     v0 = fpu_get_scratch(dyn);
@@ -87,21 +87,21 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
                 case 0x08 ... 0x0a:
                     if (nextop == 0x08) {
                         INST_NAME("PSIGNB Gx, Ex");
-                        SET_ELEMENT_WIDTH(x1, VECTOR_SEW8);
+                        SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
                         i32 = 7;
                         nextop = F8;
                         GETGX_vector(q0, 1, VECTOR_SEW8);
                         GETEX_vector(q1, 0, 0, VECTOR_SEW8);
                     } else if (nextop == 0x09) {
                         INST_NAME("PSIGNW Gx, Ex");
-                        SET_ELEMENT_WIDTH(x1, VECTOR_SEW16);
+                        SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
                         i32 = 15;
                         nextop = F8;
                         GETGX_vector(q0, 1, VECTOR_SEW16);
                         GETEX_vector(q1, 0, 0, VECTOR_SEW16);
                     } else {
                         INST_NAME("PSIGND Gx, Ex");
-                        SET_ELEMENT_WIDTH(x1, VECTOR_SEW32);
+                        SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
                         i32 = 31;
                         nextop = F8;
                         GETGX_vector(q0, 1, VECTOR_SEW32);
@@ -128,16 +128,16 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
         case 0x6F:
             INST_NAME("MOVDQA Gx, Ex");
             nextop = F8;
-            SET_ELEMENT_WIDTH(x1, VECTOR_SEW8);
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY, 1);
             if (MODREG) {
-                v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW8);
+                v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, dyn->vector_eew);
                 GETGX_empty_vector(v0);
                 VMV_V_V(v0, v1);
             } else {
                 GETGX_empty_vector(v0);
                 SMREAD();
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 0, 0);
-                VLE8_V(v0, ed, VECTOR_UNMASKED, VECTOR_NFIELD1);
+                VLE_V(v0, ed, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1);
             }
             break;
         case 0x7E:
@@ -147,13 +147,13 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
             nextop = F8;
             GETG;
             if (MODREG && gd == (nextop & 7) + (rex.b << 3)) {
-                SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY);
+                SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY, 1);
                 // special case
                 q0 = sse_get_reg_empty_vector(dyn, ninst, x1, gd);
                 VXOR_VV(q0, q0, q0, VECTOR_UNMASKED);
             } else {
-                SET_ELEMENT_WIDTH(x1, VECTOR_SEW8);
-                q0 = sse_get_reg_vector(dyn, ninst, x1, gd, 1, VECTOR_SEW8);
+                SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY, 1);
+                q0 = sse_get_reg_vector(dyn, ninst, x1, gd, 1, dyn->vector_eew);
                 GETEX_vector(q1, 0, 0, VECTOR_SEW8);
                 VXOR_VV(q0, q0, q1, VECTOR_UNMASKED);
             }
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c
index 0ec15c43..6c86d94a 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.c
+++ b/src/dynarec/rv64/dynarec_rv64_helper.c
@@ -1732,9 +1732,9 @@ void sse_forget_reg_vector(dynarec_rv64_t* dyn, int ninst, int s1, int a)
     if (dyn->e.ssecache[a].vector == 0)
         return sse_forget_reg(dyn, ninst, s1, a);
     if (dyn->e.extcache[EXTIDX(dyn->e.ssecache[a].reg)].t == EXT_CACHE_XMMW) {
-        SET_ELEMENT_WIDTH(s1, VECTOR_SEW8);
+        SET_ELEMENT_WIDTH(s1, VECTOR_SEWANY, 1);
         ADDI(s1, xEmu, offsetof(x64emu_t, xmm[a]));
-        VSE8_V(dyn->e.ssecache[a].reg, s1, VECTOR_UNMASKED, VECTOR_NFIELD1);
+        VSE_V(dyn->e.ssecache[a].reg, s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1);
     }
     fpu_free_reg(dyn, dyn->e.ssecache[a].reg);
     dyn->e.olds[a].changed = 0;
@@ -1756,9 +1756,9 @@ void sse_purge07cache(dynarec_rv64_t* dyn, int ninst, int s1)
                 ++old;
             }
             if (dyn->e.ssecache[i].vector) {
-                SET_ELEMENT_WIDTH(s1, VECTOR_SEW8);
+                SET_ELEMENT_WIDTH(s1, VECTOR_SEWANY, 0);
                 ADDI(s1, xEmu, offsetof(x64emu_t, xmm[i]));
-                VSE8_V(dyn->e.ssecache[i].reg, s1, VECTOR_UNMASKED, VECTOR_NFIELD1);
+                VSE_V(dyn->e.ssecache[i].reg, s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1);
             } else if (dyn->e.ssecache[i].single)
                 FSW(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i]));
             else
@@ -1782,9 +1782,11 @@ static void sse_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1)
                 ++old;
             }
             if (dyn->e.ssecache[i].vector) {
-                SET_ELEMENT_WIDTH(s1, VECTOR_SEW8);
-                ADDI(s1, xEmu, offsetof(x64emu_t, xmm[i]));
-                VSE8_V(dyn->e.ssecache[i].reg, s1, VECTOR_UNMASKED, VECTOR_NFIELD1);
+                if (dyn->e.ssecache[i].write) {
+                    SET_ELEMENT_WIDTH(s1, VECTOR_SEWANY, 0);
+                    ADDI(s1, xEmu, offsetof(x64emu_t, xmm[i]));
+                    VSE_V(dyn->e.ssecache[i].reg, s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1);
+                }
             } else if (dyn->e.ssecache[i].single)
                 FSW(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i]));
             else
@@ -1808,9 +1810,9 @@ static void sse_reflectcache(dynarec_rv64_t* dyn, int ninst, int s1)
     for (int i = 0; i < 16; ++i)
         if (dyn->e.ssecache[i].v != -1) {
             if (dyn->e.ssecache[i].vector) {
-                SET_ELEMENT_WIDTH(s1, VECTOR_SEW8);
+                SET_ELEMENT_WIDTH(s1, VECTOR_SEWANY, 0);
                 ADDI(s1, xEmu, offsetof(x64emu_t, xmm[i]));
-                VSE8_V(dyn->e.ssecache[i].reg, s1, VECTOR_UNMASKED, VECTOR_NFIELD1);
+                VSE_V(dyn->e.ssecache[i].reg, s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1);
             } else if (dyn->e.ssecache[i].single)
                 FSW(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i]));
             else
@@ -1823,9 +1825,9 @@ void sse_reflect_reg(dynarec_rv64_t* dyn, int ninst, int s1, int a)
     if (dyn->e.ssecache[a].v == -1)
         return;
     if (dyn->e.ssecache[a].vector) {
-        SET_ELEMENT_WIDTH(s1, VECTOR_SEW8);
+        SET_ELEMENT_WIDTH(s1, VECTOR_SEWANY, 0);
         ADDI(s1, xEmu, offsetof(x64emu_t, xmm[a]));
-        VSE8_V(dyn->e.ssecache[a].reg, s1, VECTOR_UNMASKED, VECTOR_NFIELD1);
+        VSE_V(dyn->e.ssecache[a].reg, s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1);
     } else if (dyn->e.ssecache[a].single)
         FSW(dyn->e.ssecache[a].reg, xEmu, offsetof(x64emu_t, xmm[a]));
     else
@@ -1847,9 +1849,9 @@ void fpu_pushcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07)
         for (int i=start; i<8; ++i)
             if(dyn->e.ssecache[i].v!=-1) {
                 if (dyn->e.ssecache[i].vector) {
-                    SET_ELEMENT_WIDTH(s1, VECTOR_SEW8);
+                    SET_ELEMENT_WIDTH(s1, VECTOR_SEWANY, 0);
                     ADDI(s1, xEmu, offsetof(x64emu_t, xmm[i]));
-                    VSE8_V(dyn->e.ssecache[i].reg, s1, VECTOR_UNMASKED, VECTOR_NFIELD1);
+                    VSE_V(dyn->e.ssecache[i].reg, s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1);
                 } else if (dyn->e.ssecache[i].single)
                     FSW(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i]));
                 else
@@ -1894,9 +1896,9 @@ void fpu_popcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07)
         for (int i=start; i<8; ++i)
             if(dyn->e.ssecache[i].v!=-1) {
                 if (dyn->e.ssecache[i].vector) {
-                    SET_ELEMENT_WIDTH(s1, VECTOR_SEW8);
+                    SET_ELEMENT_WIDTH(s1, VECTOR_SEWANY, 0);
                     ADDI(s1, xEmu, offsetof(x64emu_t, xmm[i]));
-                    VLE8_V(dyn->e.ssecache[i].reg, s1, VECTOR_UNMASKED, VECTOR_NFIELD1);
+                    VLE_V(dyn->e.ssecache[i].reg, s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1);
                 } else if (dyn->e.ssecache[i].single)
                     FLW(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i]));
                 else
@@ -2077,9 +2079,9 @@ static void loadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, int
         case EXT_CACHE_XMMR:
         case EXT_CACHE_XMMW:
             MESSAGE(LOG_DUMP, "\t  - Loading %s\n", getCacheName(t, n));
-            SET_ELEMENT_WIDTH(s1, VECTOR_SEW8);
+            SET_ELEMENT_WIDTH(s1, VECTOR_SEWANY, 0);
             ADDI(s1, xEmu, offsetof(x64emu_t, xmm[n]));
-            VLE8_V(reg, s1, VECTOR_UNMASKED, VECTOR_NFIELD1);
+            VLE_V(reg, s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1);
             break;
         case EXT_CACHE_SS:
             MESSAGE(LOG_DUMP, "\t  - Loading %s\n", getCacheName(t, n));
@@ -2137,9 +2139,9 @@ static void unloadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, i
             break;
         case EXT_CACHE_XMMW:
             MESSAGE(LOG_DUMP, "\t  - Unloading %s\n", getCacheName(t, n));
-            SET_ELEMENT_WIDTH(s1, VECTOR_SEW8);
+            SET_ELEMENT_WIDTH(s1, VECTOR_SEWANY, 0);
             ADDI(s1, xEmu, offsetof(x64emu_t, xmm[n]));
-            VSE8_V(reg, s1, VECTOR_UNMASKED, VECTOR_NFIELD1);
+            VSE_V(reg, s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1);
             break;
         case EXT_CACHE_SS:
             MESSAGE(LOG_DUMP, "\t  - Unloading %s\n", getCacheName(t, n));
@@ -2292,6 +2294,15 @@ static void fpuCacheTransform(dynarec_rv64_t* dyn, int ninst, int s1, int s2, in
                     FMVXD(s1, EXTREG(i));
                     FCVTDL(EXTREG(i), s1, RD_RTZ);
                     cache.extcache[i].t = EXT_CACHE_ST_D;
+                } else if (cache.extcache[i].t == EXT_CACHE_XMMR && cache_i2.extcache[i].t == EXT_CACHE_XMMW) {
+                    cache.extcache[i].t = EXT_CACHE_XMMW;
+                } else if (cache.extcache[i].t == EXT_CACHE_XMMW && cache_i2.extcache[i].t == EXT_CACHE_XMMR) {
+                    // refresh cache...
+                    MESSAGE(LOG_DUMP, "\t  - Refreh %s\n", getCacheName(cache.extcache[i].t, cache.extcache[i].n));
+                    SET_ELEMENT_WIDTH(s1, VECTOR_SEWANY, 0);
+                    ADDI(s1, xEmu, offsetof(x64emu_t, xmm[cache.extcache[i].n]));
+                    VSE_V(EXTREG(i), s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1);
+                    cache.extcache[i].t = EXT_CACHE_XMMR;
                 }
             }
         }
@@ -2378,7 +2389,7 @@ static void sewTransform(dynarec_rv64_t* dyn, int ninst, int s1)
 }
 
 void CacheTransform(dynarec_rv64_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3) {
-    if (cacheupd & 3)
+    if (cacheupd & 4)
         sewTransform(dyn, ninst, s1);
     if (cacheupd & 2)
         fpuCacheTransform(dyn, ninst, s1, s2, s3);
@@ -2532,9 +2543,9 @@ void fpu_propagate_stack(dynarec_rv64_t* dyn, int ninst)
 
 // Use vector extension as like SIMD for now, this function sets the specified element width,
 // other configs are set automatically.
-void vector_vsetvl_emul1(dynarec_rv64_t* dyn, int ninst, int s1, int sew)
+int vector_vsetvl_emul1(dynarec_rv64_t* dyn, int ninst, int s1, int sew)
 {
-    if (sew == VECTOR_SEWNA) return;
+    if (sew == VECTOR_SEWNA) return VECTOR_SEW8;
     if (sew == VECTOR_SEWANY) sew = VECTOR_SEW8;
     /* mu:   mask undisturbed
      * tu:   tail undisturbed
@@ -2545,4 +2556,5 @@ void vector_vsetvl_emul1(dynarec_rv64_t* dyn, int ninst, int s1, int sew)
     uint32_t vtypei = (0b0 << 7) | (0b0 << 6) | (sew << 3) | 0b000;
     ADDI(s1, xZR, 16 >> sew);
     VSETVLI(xZR, s1, vtypei);
+    return sew;
 }
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index acd9875d..db71985d 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -1081,15 +1081,16 @@
 #define MODREG ((nextop & 0xC0) == 0xC0)
 
 #ifndef SET_ELEMENT_WIDTH
-#define SET_ELEMENT_WIDTH(s1, sew)                                            \
-    do {                                                                      \
-        if (sew == VECTOR_SEWNA) {                                            \
-        } else if (sew == VECTOR_SEWANY && dyn->vector_sew != VECTOR_SEWNA) { \
-        } else if (sew == dyn->vector_sew) {                                  \
-        } else {                                                              \
-            vector_vsetvl_emul1(dyn, ninst, s1, sew);                         \
-        }                                                                     \
-        dyn->vector_sew = sew;                                                \
+#define SET_ELEMENT_WIDTH(s1, sew, set)                                 \
+    do {                                                                \
+        if (sew == VECTOR_SEWANY && dyn->vector_sew != VECTOR_SEWNA) {  \
+            dyn->vector_eew = dyn->vector_sew;                          \
+        } else if (sew == dyn->vector_sew) {                            \
+            dyn->vector_eew = dyn->vector_sew;                          \
+        } else {                                                        \
+            dyn->vector_eew = vector_vsetvl_emul1(dyn, ninst, s1, sew); \
+        }                                                               \
+        if (set) dyn->vector_sew = dyn->vector_eew;                     \
     } while (0)
 #endif
 
@@ -1440,7 +1441,7 @@ void CacheTransform(dynarec_rv64_t* dyn, int ninst, int cacheupd, int s1, int s2
 void rv64_move64(dynarec_rv64_t* dyn, int ninst, int reg, int64_t val);
 void rv64_move32(dynarec_rv64_t* dyn, int ninst, int reg, int32_t val, int zeroup);
 
-void vector_vsetvl_emul1(dynarec_rv64_t* dyn, int ninst, int s1, int sew);
+int vector_vsetvl_emul1(dynarec_rv64_t* dyn, int ninst, int s1, int sew);
 
 #if STEP < 2
 #define CHECK_CACHE() 0
diff --git a/src/dynarec/rv64/dynarec_rv64_pass0.h b/src/dynarec/rv64/dynarec_rv64_pass0.h
index 90d383ee..782dae0b 100644
--- a/src/dynarec/rv64/dynarec_rv64_pass0.h
+++ b/src/dynarec/rv64/dynarec_rv64_pass0.h
@@ -56,21 +56,10 @@
         dynarec_log(LOG_NONE, "\n");    \
         }
 
-#define DEFAULT_VECTOR                                                                                       \
-    if (box64_dynarec_log >= LOG_INFO || box64_dynarec_dump || box64_dynarec_missing) {                      \
-        dynarec_log(LOG_NONE, "%p: Dynarec fallback to scalar version because of %s Opcode"                  \
-                              " %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X", \
-            (void*)ip, rex.is32bits ? "x86 " : "x64 ",                                                       \
-            PKip(0),                                                                                         \
-            PKip(1), PKip(2), PKip(3),                                                                       \
-            PKip(4), PKip(5), PKip(6),                                                                       \
-            PKip(7), PKip(8), PKip(9),                                                                       \
-            PKip(10), PKip(11), PKip(12),                                                                    \
-            PKip(13), PKip(14));                                                                             \
-        printFunctionAddr(ip, " => ");                                                                       \
-        dynarec_log(LOG_NONE, "\n");                                                                         \
-    }                                                                                                        \
-    return 0
-
-#define SET_ELEMENT_WIDTH(s1, sew) \
-    dyn->vector_sew = sew;
+#define SET_ELEMENT_WIDTH(s1, sew, set)                  \
+    do {                                                 \
+        if (sew != VECTOR_SEWANY && set)                 \
+            dyn->vector_sew = sew;                       \
+        else if (dyn->vector_sew == VECTOR_SEWNA && set) \
+            dyn->vector_sew = VECTOR_SEW8;               \
+    } while (0)
diff --git a/src/dynarec/rv64/dynarec_rv64_pass3.h b/src/dynarec/rv64/dynarec_rv64_pass3.h
index 1dce2bc4..5dc088a2 100644
--- a/src/dynarec/rv64/dynarec_rv64_pass3.h
+++ b/src/dynarec/rv64/dynarec_rv64_pass3.h
@@ -25,3 +25,19 @@
 
 #define TABLE64(A, V)   {int val64offset = Table64(dyn, (V), 3); MESSAGE(LOG_DUMP, "  Table64: 0x%lx\n", (V)); AUIPC(A, SPLIT20(val64offset)); LD(A, A, SPLIT12(val64offset));}
 #define FTABLE64(A, V)  {mmx87_regs_t v = {.d = V}; int val64offset = Table64(dyn, v.q, 3); MESSAGE(LOG_DUMP, "  FTable64: %g\n", v.d); AUIPC(x1, SPLIT20(val64offset)); FLD(A, x1, SPLIT12(val64offset));}
+
+#define DEFAULT_VECTOR                                                                                       \
+    if (box64_dynarec_log >= LOG_INFO || box64_dynarec_dump || box64_dynarec_missing) {                      \
+        dynarec_log(LOG_NONE, "%p: Dynarec fallback to scalar version because of %s Opcode"                  \
+                              " %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X", \
+            (void*)ip, rex.is32bits ? "x86 " : "x64 ",                                                       \
+            PKip(0),                                                                                         \
+            PKip(1), PKip(2), PKip(3),                                                                       \
+            PKip(4), PKip(5), PKip(6),                                                                       \
+            PKip(7), PKip(8), PKip(9),                                                                       \
+            PKip(10), PKip(11), PKip(12),                                                                    \
+            PKip(13), PKip(14));                                                                             \
+        printFunctionAddr(ip, " => ");                                                                       \
+        dynarec_log(LOG_NONE, "\n");                                                                         \
+    }                                                                                                        \
+    return 0
diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h
index fa02ab9d..61737deb 100644
--- a/src/dynarec/rv64/dynarec_rv64_private.h
+++ b/src/dynarec/rv64/dynarec_rv64_private.h
@@ -154,6 +154,7 @@ typedef struct dynarec_rv64_s {
     uint8_t             always_test;
     uint8_t             abort;
     uint8_t             vector_sew;
+    uint8_t             vector_eew; // effective element width
 } dynarec_rv64_t;
 
 // convert idx (0..24) to reg index (10..31 0..1)
diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h
index 55384ed7..fa27dd8b 100644
--- a/src/dynarec/rv64/rv64_emitter.h
+++ b/src/dynarec/rv64/rv64_emitter.h
@@ -1256,8 +1256,8 @@ f28–31  ft8–11  FP temporaries                  Caller
 #define VSE32_V(vs3, rs1, vm, nf) EMIT(I_type(((nf) << 9) | (vm << 5), rs1, 0b110, vs3, 0b0100111)) // ...000.00000.....110.....0100111
 #define VSE64_V(vs3, rs1, vm, nf) EMIT(I_type(((nf) << 9) | (vm << 5), rs1, 0b111, vs3, 0b0100111)) // ...000.00000.....111.....0100111
 
-#define VLE_V(vd, rs1, sew, vm, nf) EMIT(I_type(((nf) << 9) | (vm << 5), rs1, (sew == 0b000 ? 0b000 : (0b100 | sew)), vd, 0b0000111))
-#define VSE_V(vd, rs1, sew, vm, nf) EMIT(I_type(((nf) << 9) | (vm << 5), rs1, (sew == 0b000 ? 0b000 : (0b100 | sew)), vs3, 0b0100111))
+#define VLE_V(vd, rs1, sew, vm, nf)  EMIT(I_type(((nf) << 9) | (vm << 5), rs1, (sew == 0b000 ? 0b000 : (0b100 | sew)), vd, 0b0000111))
+#define VSE_V(vs3, rs1, sew, vm, nf) EMIT(I_type(((nf) << 9) | (vm << 5), rs1, (sew == 0b000 ? 0b000 : (0b100 | sew)), vs3, 0b0100111))
 
 //  Vector Indexed-Unordered Instructions (including segment part)
 //  https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#76-vector-indexed-instructions