about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_functions.c112
-rw-r--r--src/dynarec/arm64/dynarec_arm64_functions.h9
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.h6
-rw-r--r--src/dynarec/dynarec_arch.h2
-rw-r--r--src/dynarec/dynarec_native.c83
-rw-r--r--src/dynarec/dynarec_native_functions.c28
-rw-r--r--src/dynarec/dynarec_native_functions.h7
-rw-r--r--src/dynarec/dynarec_native_pass.c3
-rw-r--r--src/dynarec/la64/dynarec_la64_private.h5
-rw-r--r--src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c41
-rw-r--r--src/dynarec/rv64/dynarec_rv64_functions.c6
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.c131
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h13
-rw-r--r--src/dynarec/rv64/dynarec_rv64_private.h9
14 files changed, 131 insertions, 324 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_functions.c b/src/dynarec/arm64/dynarec_arm64_functions.c
index 15c0f3f0..00c34afc 100644
--- a/src/dynarec/arm64/dynarec_arm64_functions.c
+++ b/src/dynarec/arm64/dynarec_arm64_functions.c
@@ -1148,6 +1148,7 @@ void updateNativeFlags(dynarec_native_t* dyn)
 void rasNativeState(dynarec_arm_t* dyn, int ninst)
 {
     dyn->insts[ninst].nat_flags_op = dyn->insts[ninst].set_nat_flags = dyn->insts[ninst].use_nat_flags = dyn->insts[ninst].need_nat_flags = 0;
+    dyn->insts[ninst].ymm0_in = dyn->insts[ninst].ymm0_sub = dyn->insts[ninst].ymm0_add = dyn->insts[ninst].ymm0_out = dyn->insts[ninst].purge_ymm = 0;
 }
 
 int nativeFlagsNeedsTransform(dynarec_arm_t* dyn, int ninst)
@@ -1276,4 +1277,113 @@ void propagateFpuBarrier(dynarec_arm_t* dyn)
             last_fpu_used = -1;  // reset the last_fpu_used...
         }
     }
-}
\ No newline at end of file
+}
+
+
+void updateYmm0s(dynarec_arm_t* dyn, int ninst, int max_ninst_reached)
+{
+    int can_incr = ninst == max_ninst_reached; // Are we the top-level call?
+    int ok = 1;
+    while ((can_incr || ok) && ninst < dyn->size) {
+        // if(dyn->need_dump) dynarec_log(LOG_NONE, "update ninst=%d (%d): can_incr=%d\n", ninst, max_ninst_reached, can_incr);
+        uint16_t new_purge_ymm, new_ymm0_in, new_ymm0_out;
+
+        if (dyn->insts[ninst].pred_sz && dyn->insts[ninst].x64.alive) {
+            // The union of the empty set is empty (0), the intersection is the universe (-1)
+            // The first instruction is the entry point, which has a virtual pred with ymm0_out = 0
+            // Similarly, float barriers reset ymm0s
+            uint16_t ymm0_union = 0;
+            uint16_t ymm0_inter = (ninst && !(dyn->insts[ninst].x64.barrier & BARRIER_FLOAT)) ? ((uint16_t)-1) : (uint16_t)0;
+            for (int i = 0; i < dyn->insts[ninst].pred_sz; ++i) {
+                int pred = dyn->insts[ninst].pred[i];
+                // if(dyn->need_dump) dynarec_log(LOG_NONE, "\twith pred[%d] = %d", i, pred);
+                if (pred >= max_ninst_reached) {
+                    // if(dyn->need_dump) dynarec_log(LOG_NONE, " (skipped)\n");
+                    continue;
+                }
+
+                int pred_out = dyn->insts[pred].x64.has_callret ? 0 : dyn->insts[pred].ymm0_out;
+                // if(dyn->need_dump) dynarec_log(LOG_NONE, " ~> %04X\n", pred_out);
+                ymm0_union |= pred_out;
+                ymm0_inter &= pred_out;
+            }
+            // if(dyn->need_dump) dynarec_log(LOG_NONE, "\t=> %04X,%04X\n", ymm0_union, ymm0_inter);
+            //  Notice the default values yield something coherent here (if all pred are after ninst)
+            new_purge_ymm = ymm0_union & ~ymm0_inter;
+            new_ymm0_in = ymm0_inter;
+            new_ymm0_out = (ymm0_inter | dyn->insts[ninst].ymm0_add) & ~dyn->insts[ninst].ymm0_sub;
+
+            if ((dyn->insts[ninst].purge_ymm != new_purge_ymm) || (dyn->insts[ninst].ymm0_in != new_ymm0_in) || (dyn->insts[ninst].ymm0_out != new_ymm0_out)) {
+                // Need to update self and next(s)
+                dyn->insts[ninst].purge_ymm = new_purge_ymm;
+                dyn->insts[ninst].ymm0_in = new_ymm0_in;
+                dyn->insts[ninst].ymm0_out = new_ymm0_out;
+
+                if (can_incr) {
+                    // We always have ninst == max_ninst_reached when can_incr == 1
+                    ++max_ninst_reached;
+                } else {
+                    // We need to stop here if the opcode has no "real" next or if we reached the ninst of the toplevel
+                    ok = (max_ninst_reached - 1 != ninst) && dyn->insts[ninst].x64.has_next && !dyn->insts[ninst].x64.has_callret;
+                }
+
+                int jmp = (dyn->insts[ninst].x64.jmp) ? dyn->insts[ninst].x64.jmp_insts : -1;
+                if ((jmp != -1) && (jmp < max_ninst_reached)) {
+                    // if(dyn->need_dump) dynarec_log(LOG_NONE, "\t! jump to %d\n", jmp);
+                    //  The jump goes before the last instruction reached, update the destination
+                    //  If this is the top level call, this means the jump goes backward (jmp != ninst)
+                    //  Otherwise, since we don't update all instructions, we may miss the update (don't use jmp < ninst)
+                    updateYmm0s(dyn, jmp, max_ninst_reached);
+                }
+            } else {
+                if (can_incr) {
+                    // We always have ninst == max_ninst_reached when can_incr == 1
+                    ++max_ninst_reached;
+
+                    // Also update jumps to before (they are skipped otherwise)
+                    int jmp = (dyn->insts[ninst].x64.jmp) ? dyn->insts[ninst].x64.jmp_insts : -1;
+                    if ((jmp != -1) && (jmp < max_ninst_reached)) {
+                        // if(dyn->need_dump) dynarec_log(LOG_NONE, "\t! jump to %d\n", jmp);
+                        updateYmm0s(dyn, jmp, max_ninst_reached);
+                    }
+                } else {
+                    // We didn't update anything, we can leave
+                    ok = 0;
+                }
+            }
+        } else if (can_incr) {
+            // We always have ninst == max_ninst_reached when can_incr == 1
+            ++max_ninst_reached;
+        } else {
+            // We didn't update anything, we can leave
+            ok = 0;
+        }
+        ++ninst;
+    }
+}
+
+
+// AVX helpers
+void avx_mark_zero(dynarec_arm_t* dyn, int ninst, int reg)
+{
+    dyn->ymm_zero |= (1<<reg);
+}
+
+int is_avx_zero(dynarec_arm_t* dyn, int ninst, int reg)
+{
+    return (dyn->ymm_zero>>reg)&1;
+}
+
+int is_avx_zero_unset(dynarec_arm_t* dyn, int ninst, int reg)
+{
+    if((dyn->ymm_zero>>reg)&1) {
+        dyn->ymm_zero &= ~(1<<reg);
+        return 1;
+    }
+    return 0;
+}
+
+void avx_mark_zero_reset(dynarec_arm_t* dyn, int ninst)
+{
+    dyn->ymm_zero = 0;
+}
diff --git a/src/dynarec/arm64/dynarec_arm64_functions.h b/src/dynarec/arm64/dynarec_arm64_functions.h
index d32dbddd..a197eeec 100644
--- a/src/dynarec/arm64/dynarec_arm64_functions.h
+++ b/src/dynarec/arm64/dynarec_arm64_functions.h
@@ -90,4 +90,13 @@ int fpu_is_st_freed(dynarec_native_t* dyn, int ninst, int st);
 void propagateFpuBarrier(dynarec_arm_t* dyn);
 // propage the uneeded flags on XMM/YMM regs (done between step 0 and step 1)
 void updateUneeded(dynarec_arm_t* dyn);
+
+void updateYmm0s(dynarec_arm_t* dyn, int ninst, int max_ninst_reached);
+
+// AVX helpers
+void avx_mark_zero(dynarec_arm_t* dyn, int ninst, int reg);
+int is_avx_zero(dynarec_arm_t* dyn, int ninst, int reg);
+int is_avx_zero_unset(dynarec_arm_t* dyn, int ninst, int reg);
+void avx_mark_zero_reset(dynarec_arm_t* dyn, int ninst);
+
 #endif //__DYNAREC_ARM_FUNCTIONS_H__
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h
index 040031fc..8ba2407f 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.h
+++ b/src/dynarec/arm64/dynarec_arm64_helper.h
@@ -2002,6 +2002,10 @@ uintptr_t dynarec64_AVX_F3_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip
         }                                       \
     }
 
-#define PURGE_YMM()    avx_purge_ymm(dyn, ninst, dyn->insts[ninst+1].purge_ymm, x1)
+#define PURGE_YMM()                                                         \
+    do {                                                                    \
+        if ((ok > 0) && reset_n == -1 && dyn->insts[ninst + 1].purge_ymm)   \
+            avx_purge_ymm(dyn, ninst, dyn->insts[ninst + 1].purge_ymm, x1); \
+    } while (0)
 
 #endif //__DYNAREC_ARM64_HELPER_H__
diff --git a/src/dynarec/dynarec_arch.h b/src/dynarec/dynarec_arch.h
index 994eebc3..761f8166 100644
--- a/src/dynarec/dynarec_arch.h
+++ b/src/dynarec/dynarec_arch.h
@@ -24,7 +24,7 @@
 #define MAXBLOCK_SIZE ((1<<20)-200)

 

 #define RAZ_SPECIFIC(A, N)      rasNativeState(A, N)

-#define UPDATE_SPECIFICS(A)     updateNativeFlags(A); propagateFpuBarrier(A)

+#define UPDATE_SPECIFICS(A)     updateYmm0s(dyn, 0, 0); updateNativeFlags(A); propagateFpuBarrier(A)

 #define PREUPDATE_SPECIFICS(A)

 #define POSTUPDATE_SPECIFICS(A) updateUneeded(A)

 #define ARCH_SIZE(A)    get_size_arch(A)

diff --git a/src/dynarec/dynarec_native.c b/src/dynarec/dynarec_native.c
index c92f251f..17ad5a02 100644
--- a/src/dynarec/dynarec_native.c
+++ b/src/dynarec/dynarec_native.c
@@ -444,87 +444,6 @@ static int updateNeed(dynarec_native_t* dyn, int ninst, uint8_t need) {
     return ninst;
 }
 
-static void updateYmm0s(dynarec_native_t* dyn, int ninst, int max_ninst_reached) {
-    int can_incr = ninst == max_ninst_reached; // Are we the top-level call?
-    int ok = 1;
-    while ((can_incr || ok) && ninst<dyn->size) {
-        //if(dyn->need_dump) dynarec_log(LOG_NONE, "update ninst=%d (%d): can_incr=%d\n", ninst, max_ninst_reached, can_incr);
-        uint16_t new_purge_ymm, new_ymm0_in, new_ymm0_out;
-
-        if (dyn->insts[ninst].pred_sz && dyn->insts[ninst].x64.alive) {
-            // The union of the empty set is empty (0), the intersection is the universe (-1)
-            // The first instruction is the entry point, which has a virtual pred with ymm0_out = 0
-            // Similarly, float barriers reset ymm0s
-            uint16_t ymm0_union = 0;
-            uint16_t ymm0_inter = (ninst && !(dyn->insts[ninst].x64.barrier & BARRIER_FLOAT)) ? ((uint16_t)-1) : (uint16_t)0;
-            for (int i = 0; i < dyn->insts[ninst].pred_sz; ++i) {
-                int pred = dyn->insts[ninst].pred[i];
-                //if(dyn->need_dump) dynarec_log(LOG_NONE, "\twith pred[%d] = %d", i, pred);
-                if (pred >= max_ninst_reached) {
-                    //if(dyn->need_dump) dynarec_log(LOG_NONE, " (skipped)\n");
-                    continue;
-                }
-
-                int pred_out = dyn->insts[pred].x64.has_callret ? 0 : dyn->insts[pred].ymm0_out;
-                //if(dyn->need_dump) dynarec_log(LOG_NONE, " ~> %04X\n", pred_out);
-                ymm0_union |= pred_out;
-                ymm0_inter &= pred_out;
-            }
-            //if(dyn->need_dump) dynarec_log(LOG_NONE, "\t=> %04X,%04X\n", ymm0_union, ymm0_inter);
-            // Notice the default values yield something coherent here (if all pred are after ninst)
-            new_purge_ymm = ymm0_union & ~ymm0_inter;
-            new_ymm0_in = ymm0_inter;
-            new_ymm0_out = (ymm0_inter | dyn->insts[ninst].ymm0_add) & ~dyn->insts[ninst].ymm0_sub;
-
-            if ((dyn->insts[ninst].purge_ymm != new_purge_ymm) || (dyn->insts[ninst].ymm0_in != new_ymm0_in) || (dyn->insts[ninst].ymm0_out != new_ymm0_out)) {
-                // Need to update self and next(s)
-                dyn->insts[ninst].purge_ymm = new_purge_ymm;
-                dyn->insts[ninst].ymm0_in = new_ymm0_in;
-                dyn->insts[ninst].ymm0_out = new_ymm0_out;
-
-                if (can_incr) {
-                    // We always have ninst == max_ninst_reached when can_incr == 1
-                    ++max_ninst_reached;
-                } else {
-                    // We need to stop here if the opcode has no "real" next or if we reached the ninst of the toplevel
-                    ok = (max_ninst_reached - 1 != ninst) && dyn->insts[ninst].x64.has_next && !dyn->insts[ninst].x64.has_callret;
-                }
-
-                int jmp = (dyn->insts[ninst].x64.jmp)?dyn->insts[ninst].x64.jmp_insts:-1;
-                if((jmp!=-1) && (jmp < max_ninst_reached)) {
-                    //if(dyn->need_dump) dynarec_log(LOG_NONE, "\t! jump to %d\n", jmp);
-                    // The jump goes before the last instruction reached, update the destination
-                    // If this is the top level call, this means the jump goes backward (jmp != ninst)
-                    // Otherwise, since we don't update all instructions, we may miss the update (don't use jmp < ninst)
-                    updateYmm0s(dyn, jmp, max_ninst_reached);
-                }
-            } else {
-                if (can_incr) {
-                    // We always have ninst == max_ninst_reached when can_incr == 1
-                    ++max_ninst_reached;
-
-                    // Also update jumps to before (they are skipped otherwise)
-                    int jmp = (dyn->insts[ninst].x64.jmp)?dyn->insts[ninst].x64.jmp_insts:-1;
-                    if((jmp!=-1) && (jmp < max_ninst_reached)) {
-                        //if(dyn->need_dump) dynarec_log(LOG_NONE, "\t! jump to %d\n", jmp);
-                        updateYmm0s(dyn, jmp, max_ninst_reached);
-                    }
-                } else {
-                    // We didn't update anything, we can leave
-                    ok = 0;
-                }
-            }
-        } else if (can_incr) {
-            // We always have ninst == max_ninst_reached when can_incr == 1
-            ++max_ninst_reached;
-        } else {
-            // We didn't update anything, we can leave
-            ok = 0;
-        }
-        ++ninst;
-    }
-}
-
 void* current_helper = NULL;
 static int static_jmps[MAX_INSTS+2];
 static uintptr_t static_next[MAX_INSTS+2];
@@ -779,7 +698,6 @@ dynablock_t* FillBlock64(uintptr_t addr, int alternate, int is32bits, int inst_m
             int ii = i;
             while(ii<helper.size && !helper.insts[ii].pred_sz) {
                 fpu_reset_ninst(&helper, ii);
-                helper.insts[ii].ymm0_in = helper.insts[ii].ymm0_sub = helper.insts[ii].ymm0_add = helper.insts[ii].ymm0_out = helper.insts[ii].purge_ymm = 0;
                 RAZ_SPECIFIC(&helper, ii);
                 ++ii;
             }
@@ -796,7 +714,6 @@ dynablock_t* FillBlock64(uintptr_t addr, int alternate, int is32bits, int inst_m
         CancelBlock64(0);
         return CreateEmptyBlock(addr, is32bits, is_new);
     }
-    updateYmm0s(&helper, 0, 0);
     UPDATE_SPECIFICS(&helper);
     // check for still valid close loop
     for(int ii=0; ii<helper.jmp_sz && !helper.always_test; ++ii) {
diff --git a/src/dynarec/dynarec_native_functions.c b/src/dynarec/dynarec_native_functions.c
index bb696ac2..79e558d6 100644
--- a/src/dynarec/dynarec_native_functions.c
+++ b/src/dynarec/dynarec_native_functions.c
@@ -620,34 +620,6 @@ uint8_t geted_ib(dynarec_native_t* dyn, uintptr_t addr, int ninst, uint8_t nexto
 }
 #undef F8
 
-// AVX
-void avx_mark_zero(dynarec_native_t* dyn, int ninst, int reg)
-{
-    dyn->ymm_zero |= (1<<reg);
-}
-
-int is_avx_zero(dynarec_native_t* dyn, int ninst, int reg)
-{
-    return (dyn->ymm_zero>>reg)&1;
-}
-int is_avx_zero_unset(dynarec_native_t* dyn, int ninst, int reg)
-{
-    if((dyn->ymm_zero>>reg)&1) {
-        dyn->ymm_zero &= ~(1<<reg);
-        return 1;
-    }
-    return 0;
-}
-void avx_mark_zero_reset(dynarec_native_t* dyn, int ninst)
-{
-    dyn->ymm_zero = 0;
-}
-
-void avx_unmark_zero(dynarec_native_t* dyn, int ninst, int reg)
-{
-    dyn->ymm_zero &= ~(1<<reg);
-}
-
 void propagate_nodf(dynarec_native_t* dyn, int ninst)
 {
     while(ninst>=0) {
diff --git a/src/dynarec/dynarec_native_functions.h b/src/dynarec/dynarec_native_functions.h
index eca8568f..9601fc18 100644
--- a/src/dynarec/dynarec_native_functions.h
+++ b/src/dynarec/dynarec_native_functions.h
@@ -74,13 +74,6 @@ uintptr_t fakeed(dynarec_native_t* dyn, uintptr_t addr, int ninst, uint8_t nexto
 // return Ib on a mod/rm opcode without emitting anything
 uint8_t geted_ib(dynarec_native_t* dyn, uintptr_t addr, int ninst, uint8_t nextop);
 
-// AVX utilities
-void avx_mark_zero(dynarec_native_t* dyn, int ninst, int reg);
-int is_avx_zero(dynarec_native_t* dyn, int ninst, int reg);
-int is_avx_zero_unset(dynarec_native_t* dyn, int ninst, int reg);
-void avx_mark_zero_reset(dynarec_native_t* dyn, int ninst);
-void avx_unmark_zero(dynarec_native_t* dyn, int ninst, int reg);
-
 typedef struct register_mapping_s {
     const char* name;
     const char* native;
diff --git a/src/dynarec/dynarec_native_pass.c b/src/dynarec/dynarec_native_pass.c
index cb7da3fb..b88cfb41 100644
--- a/src/dynarec/dynarec_native_pass.c
+++ b/src/dynarec/dynarec_native_pass.c
@@ -346,8 +346,7 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int
         }
         if((ok>0) && dyn->insts[ninst].x64.has_callret)
             reset_n = -2;
-        if((ok>0) && reset_n==-1 && dyn->insts[ninst+1].purge_ymm)
-            PURGE_YMM();
+        PURGE_YMM();
         ++ninst;
         #if STEP == 0
         memset(&dyn->insts[ninst], 0, sizeof(instruction_native_t));
diff --git a/src/dynarec/la64/dynarec_la64_private.h b/src/dynarec/la64/dynarec_la64_private.h
index a71557cc..f5cf5f29 100644
--- a/src/dynarec/la64/dynarec_la64_private.h
+++ b/src/dynarec/la64/dynarec_la64_private.h
@@ -100,11 +100,6 @@ typedef struct instruction_la64_s {
     int                 pass2choice;// value for choices that are fixed on pass2 for pass3
     uintptr_t           natcall;
     uint16_t            retn;
-    uint16_t            purge_ymm;  // need to purge some ymm
-    uint16_t            ymm0_in;    // bitmap of ymm to zero at purge
-    uint16_t            ymm0_add;   // the ymm0 added by the opcode
-    uint16_t            ymm0_sub;   // the ymm0 removed by the opcode
-    uint16_t            ymm0_out;   // the ymm0 at th end of the opcode
     uint16_t            ymm0_pass2, ymm0_pass3;
     uint8_t             barrier_maybe;
     uint8_t             will_write:2;    // [strongmem] will write to memory
diff --git a/src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c b/src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c
index 6bbc746d..b45f7abb 100644
--- a/src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c
+++ b/src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c
@@ -46,47 +46,6 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
     rex_t rex = vex.rex;
 
     switch (opcode) {
-        case 0x10:
-            INST_NAME("VMOVSS Gx, [Vx,] Ex");
-            nextop = F8;
-            GETG;
-            if (MODREG) {
-                if (gd == vex.v) {
-                    v0 = sse_get_reg(dyn, ninst, x1, gd, 1);
-                    q0 = sse_get_reg(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 1);
-                    FMVS(v0, q0);
-                } else {
-                    GETGX();
-                    GETVX();
-                    GETEX(x2, 0, 1);
-                    if (cpuext.xtheadmempair) {
-                        ADD(x1, vback, vxoffset);
-                        TH_LDD(x3, x4, x1, 0);
-                    } else {
-                        LD(x3, vback, vxoffset);
-                        LD(x4, vback, vxoffset + 8);
-                    }
-                    LWU(x5, wback, fixedaddress);
-                    if (cpuext.xtheadmempair) {
-                        ADDI(x1, gback, gdoffset);
-                        TH_SDD(x3, x4, x1, 0);
-                    } else {
-                        SD(x3, gback, gdoffset);
-                        SD(x4, gback, gdoffset + 8);
-                    }
-                    SW(x5, gback, gdoffset);
-                }
-            } else {
-                v0 = sse_get_reg_empty(dyn, ninst, x1, gd, 1);
-                SMREAD();
-                addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0);
-                FLW(v0, ed, fixedaddress);
-                // reset upper part
-                SW(xZR, xEmu, offsetof(x64emu_t, xmm[gd]) + 4);
-                SD(xZR, xEmu, offsetof(x64emu_t, xmm[gd]) + 8);
-            }
-            YMM0(gd);
-            break;
         default:
             DEFAULT;
     }
diff --git a/src/dynarec/rv64/dynarec_rv64_functions.c b/src/dynarec/rv64/dynarec_rv64_functions.c
index e86e0a0c..86f34710 100644
--- a/src/dynarec/rv64/dynarec_rv64_functions.c
+++ b/src/dynarec/rv64/dynarec_rv64_functions.c
@@ -521,8 +521,6 @@ void extcacheUnwind(extcache_t* cache)
                     break;
                 case EXT_CACHE_XMMR:
                 case EXT_CACHE_XMMW:
-                case EXT_CACHE_YMMR:
-                case EXT_CACHE_YMMW:
                     cache->ssecache[cache->extcache[i].n].reg = EXTREG(i);
                     cache->ssecache[cache->extcache[i].n].vector = 1;
                     cache->ssecache[cache->extcache[i].n].write = (cache->extcache[i].t == EXT_CACHE_XMMW) ? 1 : 0;
@@ -612,8 +610,6 @@ const char* getCacheName(int t, int n)
         case EXT_CACHE_SCR: sprintf(buff, "Scratch"); break;
         case EXT_CACHE_XMMW: sprintf(buff, "XMM%d", n); break;
         case EXT_CACHE_XMMR: sprintf(buff, "xmm%d", n); break;
-        case EXT_CACHE_YMMW: sprintf(buff, "YMM%d", n); break;
-        case EXT_CACHE_YMMR: sprintf(buff, "ymm%d", n); break;
         case EXT_CACHE_NONE: buff[0] = '\0'; break;
     }
     return buff;
@@ -734,8 +730,6 @@ void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t r
             case EXT_CACHE_SD: length += sprintf(buf + length, " f%d:%s", EXTREG(ii), getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break;
             case EXT_CACHE_XMMR: length += sprintf(buf + length, " v%d:%s", EXTREG(ii), getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break;
             case EXT_CACHE_XMMW: length += sprintf(buf + length, " v%d:%s", EXTREG(ii), getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break;
-            case EXT_CACHE_YMMW: length += sprintf(buf + length, " v%d:%s", EXTREG(ii), getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break;
-            case EXT_CACHE_YMMR: length += sprintf(buf + length, " v%d:%s", EXTREG(ii), getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break;
             case EXT_CACHE_SCR: length += sprintf(buf + length, " f%d:%s", EXTREG(ii), getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break;
             case EXT_CACHE_NONE:
             default: break;
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c
index 13f87c85..17f02a17 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.c
+++ b/src/dynarec/rv64/dynarec_rv64_helper.c
@@ -2061,25 +2061,6 @@ static void sse_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1)
             }
         }
 
-    // AVX
-    if (dyn->ymm_zero) {
-        if (old == -1) {
-            MESSAGE(LOG_DUMP, "\tPurge %sSSE Cache ------\n", next ? "locally " : "");
-            ++old;
-        }
-        for (int i = 0; i < 16; ++i)
-            if (is_avx_zero(dyn, ninst, i)) {
-                if (cpuext.xtheadmempair) {
-                    ADDI(s1, xEmu, offsetof(x64emu_t, ymm[i]));
-                    TH_SDD(xZR, xZR, s1, 0);
-                } else {
-                    SD(xZR, xEmu, offsetof(x64emu_t, ymm[i]));
-                    SD(xZR, xEmu, offsetof(x64emu_t, ymm[i]) + 8);
-                }
-            }
-        if (!next)
-            avx_mark_zero_reset(dyn, ninst);
-    }
     if (old != -1) {
         MESSAGE(LOG_DUMP, "\t------ Purge SSE Cache\n");
     }
@@ -2098,32 +2079,10 @@ static void sse_reflectcache(dynarec_rv64_t* dyn, int ninst, int s1)
             else
                 FSD(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i]));
         }
-
-    // AVX
-    if (dyn->ymm_zero)
-        for (int i = 0; i < 16; ++i)
-            if (is_avx_zero(dyn, ninst, i)) {
-                if (cpuext.xtheadmempair) {
-                    ADDI(s1, xEmu, offsetof(x64emu_t, ymm[i]));
-                    TH_SDD(xZR, xZR, s1, 0);
-                } else {
-                    SD(xZR, xEmu, offsetof(x64emu_t, ymm[i]));
-                    SD(xZR, xEmu, offsetof(x64emu_t, ymm[i]) + 8);
-                }
-            }
 }
 
 void sse_reflect_reg(dynarec_rv64_t* dyn, int ninst, int s1, int a)
 {
-    if (is_avx_zero(dyn, ninst, a)) {
-        if (cpuext.xtheadmempair) {
-            ADDI(s1, xEmu, offsetof(x64emu_t, ymm[a]));
-            TH_SDD(xZR, xZR, s1, 0);
-        } else {
-            SD(xZR, xEmu, offsetof(x64emu_t, ymm[a]));
-            SD(xZR, xEmu, offsetof(x64emu_t, ymm[a]) + 8);
-        }
-    }
     if (dyn->e.ssecache[a].v == -1)
         return;
     if (dyn->e.ssecache[a].vector) {
@@ -2136,14 +2095,6 @@ void sse_reflect_reg(dynarec_rv64_t* dyn, int ninst, int s1, int a)
         FSD(dyn->e.ssecache[a].reg, xEmu, offsetof(x64emu_t, xmm[a]));
 }
 
-void ymm_mark_zero(dynarec_rv64_t* dyn, int ninst, int a)
-{
-#if STEP == 0
-    dyn->insts[ninst].ymm0_add |= (1 << a);
-#endif
-    avx_mark_zero(dyn, ninst, a);
-}
-
 void fpu_pushcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07)
 {
     // for float registers, we might lost f0..f7, f10..f17 and f28..f31, that means
@@ -2162,15 +2113,6 @@ void fpu_pushcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07)
                     FSW(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i]));
                 else
                     FSD(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i]));
-                if (is_avx_zero(dyn, ninst, i)) {
-                    if (cpuext.xtheadmempair) {
-                        ADDI(s1, xEmu, offsetof(x64emu_t, ymm[i]));
-                        TH_SDD(xZR, xZR, s1, 0);
-                    } else {
-                        SD(xZR, xEmu, offsetof(x64emu_t, ymm[i]));
-                        SD(xZR, xEmu, offsetof(x64emu_t, ymm[i]) + 8);
-                    }
-                }
             }
         MESSAGE(LOG_DUMP, "\t------- Push (float) XMM Cache (%d)\n", n);
     }
@@ -2210,15 +2152,6 @@ void fpu_pushcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07)
                     ADDI(s1, xEmu, offsetof(x64emu_t, xmm[i]));
                     VSE_V(dyn->e.ssecache[i].reg, s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1);
                 }
-                if (is_avx_zero(dyn, ninst, i)) {
-                    if (cpuext.xtheadmempair) {
-                        ADDI(s1, xEmu, offsetof(x64emu_t, ymm[i]));
-                        TH_SDD(xZR, xZR, s1, 0);
-                    } else {
-                        SD(xZR, xEmu, offsetof(x64emu_t, ymm[i]));
-                        SD(xZR, xEmu, offsetof(x64emu_t, ymm[i]) + 8);
-                    }
-                }
             }
         MESSAGE(LOG_DUMP, "\t------- Push (vector) XMM Cache (%d)\n", n);
     }
@@ -2330,10 +2263,6 @@ int fpu_needpurgecache(dynarec_rv64_t* dyn, int ninst)
     // sse
     for (int i = 0; i < 16; ++i)
         if (dyn->e.ssecache[i].v != -1) return 1;
-    // avx
-    if (dyn->ymm_zero)
-        for (int i = 0; i < 16; ++i)
-            if (is_avx_zero(dyn, ninst, i)) return 1;
     return 0;
 }
 
@@ -2372,13 +2301,6 @@ static int findCacheSlot(dynarec_rv64_t* dyn, int ninst, int t, int n, extcache_
                 case EXT_CACHE_XMMW:
                     if (t == EXT_CACHE_XMMR)
                         return i;
-                case EXT_CACHE_YMMR:
-                    if (t == EXT_CACHE_YMMW)
-                        return i;
-                    break;
-                case EXT_CACHE_YMMW:
-                    if (t == EXT_CACHE_YMMR)
-                        return i;
                     break;
             }
         }
@@ -2391,9 +2313,7 @@ static void swapCache(dynarec_rv64_t* dyn, int ninst, int i, int j, extcache_t*
     if (i == j) return;
 
     if (cache->extcache[i].t == EXT_CACHE_XMMR || cache->extcache[i].t == EXT_CACHE_XMMW
-        || cache->extcache[j].t == EXT_CACHE_XMMR || cache->extcache[j].t == EXT_CACHE_XMMW
-        || cache->extcache[i].t == EXT_CACHE_YMMR || cache->extcache[i].t == EXT_CACHE_YMMW
-        || cache->extcache[j].t == EXT_CACHE_YMMR || cache->extcache[j].t == EXT_CACHE_YMMW) {
+        || cache->extcache[j].t == EXT_CACHE_XMMR || cache->extcache[j].t == EXT_CACHE_XMMW) {
         int reg_i = EXTREG(i);
         int reg_j = EXTREG(j);
         if (!cache->extcache[i].v) {
@@ -2451,7 +2371,7 @@ static void swapCache(dynarec_rv64_t* dyn, int ninst, int i, int j, extcache_t*
 static void loadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, int s2, int s3, int* s1_val, int* s2_val, int* s3_top, extcache_t* cache, int i, int t, int n)
 {
     int reg = EXTREG(i);
-    if (cache->extcache[i].v && (cache->extcache[i].t == EXT_CACHE_XMMR || cache->extcache[i].t == EXT_CACHE_XMMW || cache->extcache[i].t == EXT_CACHE_YMMR || cache->extcache[i].t == EXT_CACHE_YMMW)) {
+    if (cache->extcache[i].v && (cache->extcache[i].t == EXT_CACHE_XMMR || cache->extcache[i].t == EXT_CACHE_XMMW)) {
         int j = i + 1;
         while (cache->extcache[j].v)
             ++j;
@@ -2483,13 +2403,6 @@ static void loadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, int
             ADDI(s1, xEmu, offsetof(x64emu_t, xmm[n]));
             VLE_V(reg, s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1);
             break;
-        case EXT_CACHE_YMMR:
-        case EXT_CACHE_YMMW:
-            MESSAGE(LOG_DUMP, "\t  - Loading %s\n", getCacheName(t, n));
-            SET_ELEMENT_WIDTH(s1, VECTOR_SEWANY, 0);
-            ADDI(s1, xEmu, offsetof(x64emu_t, ymm[n]));
-            VLE_V(reg, s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1);
-            break;
         case EXT_CACHE_SS:
             MESSAGE(LOG_DUMP, "\t  - Loading %s\n", getCacheName(t, n));
             FLW(reg, xEmu, offsetof(x64emu_t, xmm[n]));
@@ -2552,7 +2465,6 @@ static void unloadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, i
     int reg = EXTREG(i);
     switch (t) {
         case EXT_CACHE_XMMR:
-        case EXT_CACHE_YMMR:
             MESSAGE(LOG_DUMP, "\t  - ignoring %s\n", getCacheName(t, n));
             break;
         case EXT_CACHE_XMMW:
@@ -2561,12 +2473,6 @@ static void unloadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, i
             ADDI(s1, xEmu, offsetof(x64emu_t, xmm[n]));
             VSE_V(reg, s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1);
             break;
-        case EXT_CACHE_YMMW:
-            MESSAGE(LOG_DUMP, "\t  - Unloading %s\n", getCacheName(t, n));
-            SET_ELEMENT_WIDTH(s1, VECTOR_SEWANY, 0);
-            ADDI(s1, xEmu, offsetof(x64emu_t, ymm[n]));
-            VSE_V(reg, s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1);
-            break;
         case EXT_CACHE_SS:
             MESSAGE(LOG_DUMP, "\t  - Unloading %s\n", getCacheName(t, n));
             FSW(reg, xEmu, offsetof(x64emu_t, xmm[n]));
@@ -2733,8 +2639,6 @@ static void fpuCacheTransform(dynarec_rv64_t* dyn, int ninst, int s1, int s2, in
                     cache.extcache[i].t = EXT_CACHE_ST_D;
                 } else if (cache.extcache[i].t == EXT_CACHE_XMMR && cache_i2.extcache[i].t == EXT_CACHE_XMMW) {
                     cache.extcache[i].t = EXT_CACHE_XMMW;
-                } else if (cache.extcache[i].t == EXT_CACHE_YMMR && cache_i2.extcache[i].t == EXT_CACHE_YMMW) {
-                    cache.extcache[i].t = EXT_CACHE_YMMW;
                 } else if (cache.extcache[i].t == EXT_CACHE_XMMW && cache_i2.extcache[i].t == EXT_CACHE_XMMR) {
                     // refresh cache...
                     MESSAGE(LOG_DUMP, "\t  - Refreh %s\n", getCacheName(cache.extcache[i].t, cache.extcache[i].n));
@@ -2742,13 +2646,6 @@ static void fpuCacheTransform(dynarec_rv64_t* dyn, int ninst, int s1, int s2, in
                     ADDI(s1, xEmu, offsetof(x64emu_t, xmm[cache.extcache[i].n]));
                     VSE_V(EXTREG(i), s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1);
                     cache.extcache[i].t = EXT_CACHE_XMMR;
-                } else if (cache.extcache[i].t == EXT_CACHE_YMMW && cache_i2.extcache[i].t == EXT_CACHE_YMMR) {
-                    // refresh cache...
-                    MESSAGE(LOG_DUMP, "\t  - Refreh %s\n", getCacheName(cache.extcache[i].t, cache.extcache[i].n));
-                    SET_ELEMENT_WIDTH(s1, VECTOR_SEWANY, 0);
-                    ADDI(s1, xEmu, offsetof(x64emu_t, ymm[cache.extcache[i].n]));
-                    VSE_V(EXTREG(i), s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1);
-                    cache.extcache[i].t = EXT_CACHE_YMMR;
                 }
             }
         }
@@ -3256,27 +3153,3 @@ void vector_loadmask(dynarec_rv64_t* dyn, int ninst, int vreg, uint64_t imm, int
     }
 #endif
 }
-
-
-void avx_purge_ymm(dynarec_rv64_t* dyn, int ninst, uint16_t mask, int s1)
-{
-    int do_something = 0;
-    for (int i = 0; i < 16; ++i)
-        if (mask & (1 << i)) {
-            if (is_avx_zero_unset(dyn, ninst, i)) {
-                if (!do_something) {
-                    MESSAGE(LOG_NONE, "Purge YMM mask=%04x --------\n", mask);
-                    do_something = 1;
-                }
-                if (cpuext.xtheadmempair) {
-                    ADDI(s1, xEmu, offsetof(x64emu_t, ymm[i]));
-                    TH_SDD(xZR, xZR, s1, 0);
-                } else {
-                    SD(xZR, xEmu, offsetof(x64emu_t, ymm[i]));
-                    SD(xZR, xEmu, offsetof(x64emu_t, ymm[i]) + 8);
-                }
-            }
-        }
-    if (do_something)
-        MESSAGE(LOG_NONE, "---------- Purge YMM\n");
-}
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index 0d3a976e..e665d420 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -397,8 +397,6 @@
 
 #define GB_EQ_EB() (MODREG && ((nextop & 0x38) >> 3) == (nextop & 7) && (rex.r == rex.b))
 
-#define YMM0(a) ymm_mark_zero(dyn, ninst, a);
-
 // Get direction with size Z and based of F_DF flag, on register r ready for load/store fetching
 // using s as scratch.
 #define GETDIR(r, s, Z)            \
@@ -1408,8 +1406,6 @@
 #define sse_purge07cache         STEPNAME(sse_purge07cache)
 #define sse_reflect_reg          STEPNAME(sse_reflect_reg)
 
-#define ymm_mark_zero STEPNAME(ymm_mark_zero)
-
 #define mmx_get_reg_vector       STEPNAME(mmx_get_reg_vector)
 #define mmx_get_reg_empty_vector STEPNAME(mmx_get_reg_empty_vector)
 #define sse_get_reg_empty_vector STEPNAME(sse_get_reg_empty_vector)
@@ -1429,7 +1425,6 @@
 #define fpu_unreflectcache  STEPNAME(fpu_unreflectcache)
 #define x87_reflectcount    STEPNAME(x87_reflectcount)
 #define x87_unreflectcount  STEPNAME(x87_unreflectcount)
-#define avx_purge_ymm       STEPNAME(avx_purge_ymm)
 
 #define CacheTransform STEPNAME(CacheTransform)
 #define rv64_move64    STEPNAME(rv64_move64)
@@ -1592,9 +1587,6 @@ void x87_restoreround(dynarec_rv64_t* dyn, int ninst, int s1);
 // Set rounding according to mxcsr flags, return reg to restore flags
 int sse_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2);
 
-// purge ymm_zero mask according to purge_ymm
-void avx_purge_ymm(dynarec_rv64_t* dyn, int ninst, uint16_t mask, int s1);
-
 void CacheTransform(dynarec_rv64_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3);
 
 void rv64_move64(dynarec_rv64_t* dyn, int ninst, int reg, int64_t val);
@@ -1667,9 +1659,6 @@ void sse_purge07cache(dynarec_rv64_t* dyn, int ninst, int s1);
 // Push current value to the cache
 void sse_reflect_reg(dynarec_rv64_t* dyn, int ninst, int s1, int a);
 
-// mark an ymm upper part has zero (forgetting upper part if needed)
-void ymm_mark_zero(dynarec_rv64_t* dyn, int ninst, int a);
-
 // common coproc helpers
 // reset the cache with n
 void fpu_reset_cache(dynarec_rv64_t* dyn, int ninst, int reset_n);
@@ -1931,7 +1920,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
 #define FCOMIS(v1, v2, s1, s2, s3, s4, s5) FCOMI(S, v1, v2, s1, s2, s3, s4, s5)
 #define FCOMID(v1, v2, s1, s2, s3, s4, s5) FCOMI(D, v1, v2, s1, s2, s3, s4, s5)
 
-#define PURGE_YMM() avx_purge_ymm(dyn, ninst, dyn->insts[ninst + 1].purge_ymm, x1)
+#define PURGE_YMM()
 
 // reg = (reg < -32768) ? -32768 : ((reg > 32767) ? 32767 : reg)
 #define SAT16(reg, s)             \
diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h
index d50bafab..de7ba7be 100644
--- a/src/dynarec/rv64/dynarec_rv64_private.h
+++ b/src/dynarec/rv64/dynarec_rv64_private.h
@@ -21,9 +21,7 @@ typedef struct box64env_s box64env_t;
 #define EXT_CACHE_SCR    7
 #define EXT_CACHE_XMMW   8
 #define EXT_CACHE_XMMR   9
-#define EXT_CACHE_YMMW   10
-#define EXT_CACHE_YMMR   11
-#define EXT_CACHE_MMV    12
+#define EXT_CACHE_MMV    10
 
 #define EXT_CACHE_OLD_SD   0
 #define EXT_CACHE_OLD_SS   1
@@ -116,11 +114,6 @@ typedef struct instruction_rv64_s {
     int                 pass2choice;// value for choices that are fixed on pass2 for pass3
     uintptr_t           natcall;
     uint16_t            retn;
-    uint16_t            purge_ymm;  // need to purge some ymm
-    uint16_t            ymm0_in;    // bitmap of ymm to zero at purge
-    uint16_t            ymm0_add;   // the ymm0 added by the opcode
-    uint16_t            ymm0_sub;   // the ymm0 removed by the opcode
-    uint16_t            ymm0_out;   // the ymm0 at th end of the opcode
     uint16_t            ymm0_pass2, ymm0_pass3;
     int                 barrier_maybe;
     uint8_t             will_write:2;    // [strongmem] will write to memory