about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2024-11-09 18:58:41 +0800
committerGitHub <noreply@github.com>2024-11-09 11:58:41 +0100
commit7b2e77807dc46480986ddbbd053a5aa983e150d1 (patch)
tree26b74600057b32ee29c0a6cd999d61813e4721a9 /src
parent7a623ef19c3b032a015084b029705dd55e0af751 (diff)
downloadbox64-7b2e77807dc46480986ddbbd053a5aa983e150d1.tar.gz
box64-7b2e77807dc46480986ddbbd053a5aa983e150d1.zip
[RV64_DYNAREC] Added mmx infra for vector (#2011)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f.c2
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f_vector.c19
-rw-r--r--src/dynarec/rv64/dynarec_rv64_functions.c35
-rw-r--r--src/dynarec/rv64/dynarec_rv64_functions.h2
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.c151
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h19
-rw-r--r--src/dynarec/rv64/dynarec_rv64_private.h11
7 files changed, 185 insertions, 54 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c
index fd210e2a..0320a75b 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f.c
@@ -1366,8 +1366,8 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
         case 0x6A:
             INST_NAME("PUNPCKHDQ Gm,Em");
             nextop = F8;
-            GETEM(x1, 0, 4);
             GETGM();
+            GETEM(x1, 0, 4);
             // GM->ud[0] = GM->ud[1];
             LWU(x3, gback, gdoffset + 1 * 4);
             SW(x3, gback, gdoffset + 0 * 4);
diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c
index 5e54bbdc..b20767cc 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c
@@ -483,6 +483,22 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
                 VADD_VX(q0, q1, xZR, VECTOR_MASKED);
             }
             break;
+        case 0x6F:
+            INST_NAME("MOVQ Gm, Em");
+            nextop = F8;
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
+            GETG;
+            if (MODREG) {
+                v1 = mmx_get_reg_vector(dyn, ninst, x1, x2, x3, nextop & 7);
+                v0 = mmx_get_reg_empty_vector(dyn, ninst, x1, x2, x3, gd);
+                VMV_V_V(v0, v1);
+            } else {
+                v0 = mmx_get_reg_empty_vector(dyn, ninst, x1, x2, x3, gd);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0);
+                LD(x4, ed, fixedaddress);
+                VMV_S_X(v0, x4);
+            }
+            break;
         case 0xC2:
             INST_NAME("CMPPS Gx, Ex, Ib");
             nextop = F8;
@@ -567,11 +583,10 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
         case 0x00 ... 0x0F:
         case 0x18:
         case 0x1F:
-        case 0x2C ... 0x2F:
         case 0x31:
         case 0x40 ... 0x4F:
-        case 0x60 ... 0x7F:
         case 0x80 ... 0xBF:
+        case 0xC0 ... 0xC1:
         case 0xC3 ... 0xC5:
         case 0xC7 ... 0xCF:
             return 0;
diff --git a/src/dynarec/rv64/dynarec_rv64_functions.c b/src/dynarec/rv64/dynarec_rv64_functions.c
index 234f3b6d..5e85e735 100644
--- a/src/dynarec/rv64/dynarec_rv64_functions.c
+++ b/src/dynarec/rv64/dynarec_rv64_functions.c
@@ -72,23 +72,26 @@ void fpu_free_reg(dynarec_rv64_t* dyn, int reg)
     if (dyn->e.extcache[idx].t != EXT_CACHE_ST_F && dyn->e.extcache[idx].t != EXT_CACHE_ST_D && dyn->e.extcache[idx].t != EXT_CACHE_ST_I64)
         dyn->e.extcache[idx].v = 0;
 }
-// Get an MMX double reg
-int fpu_get_reg_emm(dynarec_rv64_t* dyn, int emm)
+
+// Get an MMX reg
+int fpu_get_reg_emm(dynarec_rv64_t* dyn, int t, int emm)
 {
-    dyn->e.fpuused[EMM0 + emm] = 1;
-    dyn->e.extcache[EMM0 + emm].t = EXT_CACHE_MM;
-    dyn->e.extcache[EMM0 + emm].n = emm;
-    dyn->e.news |= (1<<(EMM0 + emm));
-    return EXTREG(EMM0 + emm);
+    int i = EMM0 + emm;
+    dyn->e.fpuused[i] = 1;
+    dyn->e.extcache[i].t = t;
+    dyn->e.extcache[i].n = emm;
+    dyn->e.news |= (1 << (i));
+    return EXTREG(i);
 }
+
 // Get an XMM reg
 int fpu_get_reg_xmm(dynarec_rv64_t* dyn, int t, int xmm)
 {
-    int i = XMM0+xmm;
+    int i = XMM0 + xmm;
     dyn->e.fpuused[i] = 1;
     dyn->e.extcache[i].t = t;
     dyn->e.extcache[i].n = xmm;
-    dyn->e.news |= (1<<i);
+    dyn->e.news |= (1 << i);
     return EXTREG(i);
 }
 // Reset fpu regs counter
@@ -484,9 +487,9 @@ void extcacheUnwind(extcache_t* cache)
     cache->fpu_scratch = 0;
     cache->fpu_extra_qscratch = 0;
     cache->fpu_reg = 0;
-    for(int i=0; i<8; ++i) {
+    for (int i = 0; i < 8; ++i) {
         cache->x87cache[i] = -1;
-        cache->mmxcache[i] = -1;
+        cache->mmxcache[i].v = -1;
         cache->x87reg[i] = 0;
         cache->ssecache[i*2].v = -1;
         cache->ssecache[i*2+1].v = -1;
@@ -497,7 +500,9 @@ void extcacheUnwind(extcache_t* cache)
             cache->fpuused[i] = 1;
             switch (cache->extcache[i].t) {
                 case EXT_CACHE_MM:
-                    cache->mmxcache[cache->extcache[i].n] = EXTREG(i);
+                case EXT_CACHE_MMV:
+                    cache->mmxcache[cache->extcache[i].n].reg = EXTREG(i);
+                    cache->mmxcache[cache->extcache[i].n].vector = cache->extcache[i].t == EXT_CACHE_MMV;
                     ++cache->mmxcount;
                     ++cache->fpu_reg;
                     break;
@@ -602,6 +607,7 @@ const char* getCacheName(int t, int n)
         case EXT_CACHE_ST_F: sprintf(buff, "st%d", n); break;
         case EXT_CACHE_ST_I64: sprintf(buff, "STi%d", n); break;
         case EXT_CACHE_MM: sprintf(buff, "MM%d", n); break;
+        case EXT_CACHE_MMV: sprintf(buff, "MMV%d", n); break;
         case EXT_CACHE_SS: sprintf(buff, "SS%d", n); break;
         case EXT_CACHE_SD: sprintf(buff, "SD%d", n); break;
         case EXT_CACHE_SCR: sprintf(buff, "Scratch"); break;
@@ -664,6 +670,7 @@ void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t r
                 case EXT_CACHE_ST_F: dynarec_log(LOG_NONE, " %s:%s", fnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break;
                 case EXT_CACHE_ST_I64: dynarec_log(LOG_NONE, " %s:%s", fnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break;
                 case EXT_CACHE_MM: dynarec_log(LOG_NONE, " %s:%s", fnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break;
+                case EXT_CACHE_MMV: dynarec_log(LOG_NONE, " %s:%s", vnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break;
                 case EXT_CACHE_SS: dynarec_log(LOG_NONE, " %s:%s", fnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break;
                 case EXT_CACHE_SD: dynarec_log(LOG_NONE, " %s:%s", fnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break;
                 case EXT_CACHE_XMMR: dynarec_log(LOG_NONE, " %s:%s", vnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break;
@@ -725,8 +732,8 @@ static void x87_reset(extcache_t* e)
 static void mmx_reset(extcache_t* e)
 {
     e->mmxcount = 0;
-    for (int i=0; i<8; ++i)
-        e->mmxcache[i] = -1;
+    for (int i = 0; i < 8; ++i)
+        e->mmxcache[i].v = -1;
 }
 
 static void sse_reset(extcache_t* e)
diff --git a/src/dynarec/rv64/dynarec_rv64_functions.h b/src/dynarec/rv64/dynarec_rv64_functions.h
index 03a20925..fa618381 100644
--- a/src/dynarec/rv64/dynarec_rv64_functions.h
+++ b/src/dynarec/rv64/dynarec_rv64_functions.h
@@ -18,7 +18,7 @@ void fpu_reset_scratch(dynarec_rv64_t* dyn);
 // Get an x87 double reg
 int fpu_get_reg_x87(dynarec_rv64_t* dyn, int t, int n);
 // Get an MMX double reg
-int fpu_get_reg_emm(dynarec_rv64_t* dyn, int emm);
+int fpu_get_reg_emm(dynarec_rv64_t* dyn, int t, int emm);
 // Get an XMM quad reg
 int fpu_get_reg_xmm(dynarec_rv64_t* dyn, int t, int xmm);
 // Free a FPU/MMX/XMM reg
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c
index 538df9cf..5081a653 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.c
+++ b/src/dynarec/rv64/dynarec_rv64_helper.c
@@ -1566,38 +1566,108 @@ static int isx87Empty(dynarec_rv64_t* dyn)
 }
 
 // forget ext register for a MMX reg, does nothing if the regs is not loaded
-void mmx_forget_reg(dynarec_rv64_t* dyn, int ninst, int a)
+void mmx_forget_reg(dynarec_rv64_t* dyn, int ninst, int s1, int a)
 {
-    if (dyn->e.mmxcache[a] == -1)
+    if (dyn->e.mmxcache[a].v == -1)
         return;
-    FSD(dyn->e.mmxcache[a], xEmu, offsetof(x64emu_t, mmx[a]));
-    fpu_free_reg(dyn, dyn->e.mmxcache[a]);
+    if (dyn->e.mmxcache[a].vector) {
+        SET_ELEMENT_WIDTH(s1, VECTOR_SEW64, 1);
+        VFMV_F_S(dyn->e.mmxcache[a].reg, dyn->e.mmxcache[a].reg);
+    }
+    FSD(dyn->e.mmxcache[a].reg, xEmu, offsetof(x64emu_t, mmx[a]));
+    fpu_free_reg(dyn, dyn->e.mmxcache[a].reg);
+    dyn->e.mmxcache[a].v = -1;
     return;
 }
 
-// get neon register for a MMX reg, create the entry if needed
+static void mmx_transfer_reg(dynarec_rv64_t* dyn, int ninst, int s1, int a)
+{
+    if (dyn->e.mmxcache[a].v == -1)
+        return;
+
+    SET_ELEMENT_WIDTH(s1, VECTOR_SEW64, 1);
+    if (dyn->e.mmxcache[a].vector) {
+        VFMV_F_S(dyn->e.mmxcache[a].reg, dyn->e.mmxcache[a].reg);
+    } else {
+        VFMV_S_F(dyn->e.mmxcache[a].reg, dyn->e.mmxcache[a].reg);
+    }
+    dyn->e.mmxcache[a].vector = 1 - dyn->e.mmxcache[a].vector;
+    dyn->e.extcache[EXTIDX(dyn->e.mmxcache[a].reg)].t = dyn->e.mmxcache[a].vector ? EXT_CACHE_MMV : EXT_CACHE_MM;
+    return;
+}
+
+// get float register for a MMX reg, create the entry if needed
 int mmx_get_reg(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int a)
 {
     if(!dyn->e.x87stack && isx87Empty(dyn))
         x87_purgecache(dyn, ninst, 0, s1, s2, s3);
-    if(dyn->e.mmxcache[a]!=-1)
-        return dyn->e.mmxcache[a];
+    if (dyn->e.mmxcache[a].v != -1) {
+        if (dyn->e.mmxcache[a].vector) {
+            mmx_transfer_reg(dyn, ninst, s1, a);
+        }
+        return dyn->e.mmxcache[a].reg;
+    }
+
     ++dyn->e.mmxcount;
-    int ret = dyn->e.mmxcache[a] = fpu_get_reg_emm(dyn, a);
-    FLD(ret, xEmu, offsetof(x64emu_t, mmx[a]));
-    return ret;
+    dyn->e.mmxcache[a].reg = fpu_get_reg_emm(dyn, EXT_CACHE_MM, a);
+    dyn->e.mmxcache[a].vector = 0;
+    FLD(dyn->e.mmxcache[a].reg, xEmu, offsetof(x64emu_t, mmx[a]));
+    return dyn->e.mmxcache[a].reg;
 }
-// get neon register for a MMX reg, but don't try to synch it if it needed to be created
+
+// get vector register for a MMX reg, create the entry if needed
+int mmx_get_reg_vector(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int a)
+{
+    if (!dyn->e.x87stack && isx87Empty(dyn))
+        x87_purgecache(dyn, ninst, 0, s1, s2, s3);
+    if (dyn->e.mmxcache[a].v != -1) {
+        if (!dyn->e.mmxcache[a].vector) {
+            mmx_transfer_reg(dyn, ninst, s1, a);
+        }
+        return dyn->e.mmxcache[a].reg;
+    }
+
+    ++dyn->e.mmxcount;
+    dyn->e.mmxcache[a].reg = fpu_get_reg_emm(dyn, EXT_CACHE_MMV, a);
+    dyn->e.mmxcache[a].vector = 1;
+    FLD(dyn->e.mmxcache[a].reg, xEmu, offsetof(x64emu_t, mmx[a]));
+    SET_ELEMENT_WIDTH(s1, VECTOR_SEW64, 1);
+    VFMV_S_F(dyn->e.mmxcache[a].reg, dyn->e.mmxcache[a].reg);
+    return dyn->e.mmxcache[a].reg;
+}
+
+// get float register for a MMX reg, but don't try to synch it if it needed to be created
 int mmx_get_reg_empty(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int a)
 {
+    if (!dyn->e.x87stack && isx87Empty(dyn))
+        x87_purgecache(dyn, ninst, 0, s1, s2, s3);
+    if (dyn->e.mmxcache[a].v != -1) {
+        dyn->e.mmxcache[a].vector = 0;
+        dyn->e.extcache[EXTIDX(dyn->e.mmxcache[a].reg)].t = EXT_CACHE_MM;
+        return dyn->e.mmxcache[a].reg;
+    }
+
+    ++dyn->e.mmxcount;
+    dyn->e.mmxcache[a].vector = 0;
+    return dyn->e.mmxcache[a].reg = fpu_get_reg_emm(dyn, EXT_CACHE_MM, a);
+}
+
+// get vector register for a MMX reg, but don't try to synch it if it needed to be created
+int mmx_get_reg_empty_vector(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int a)
+{
     if(!dyn->e.x87stack && isx87Empty(dyn))
         x87_purgecache(dyn, ninst, 0, s1, s2, s3);
-    if(dyn->e.mmxcache[a]!=-1)
-        return dyn->e.mmxcache[a];
+    if (dyn->e.mmxcache[a].v != -1) {
+        dyn->e.mmxcache[a].vector = 1;
+        dyn->e.extcache[EXTIDX(dyn->e.mmxcache[a].reg)].t = EXT_CACHE_MMV;
+        return dyn->e.mmxcache[a].reg;
+    }
+
     ++dyn->e.mmxcount;
-    int ret = dyn->e.mmxcache[a] = fpu_get_reg_emm(dyn, a);
-    return ret;
+    dyn->e.mmxcache[a].vector = 1;
+    return dyn->e.mmxcache[a].reg = fpu_get_reg_emm(dyn, EXT_CACHE_MMV, a);
 }
+
 // purge the MMX cache only(needs 3 scratch registers)
 void mmx_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1)
 {
@@ -1606,29 +1676,39 @@ void mmx_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1)
     if(!next)
         dyn->e.mmxcount = 0;
     int old = -1;
-    for (int i=0; i<8; ++i)
-        if(dyn->e.mmxcache[i]!=-1) {
-            if (old==-1) {
-                MESSAGE(LOG_DUMP, "\tPurge %sMMX Cache ------\n", next?"locally ":"");
+    for (int i = 0; i < 8; ++i) {
+        if (dyn->e.mmxcache[i].v != -1) {
+            if (old == -1) {
+                MESSAGE(LOG_DUMP, "\tPurge %sMMX Cache ------\n", next ? "locally " : "");
                 ++old;
             }
-            FSD(dyn->e.mmxcache[i], xEmu, offsetof(x64emu_t, mmx[i]));
-            if(!next) {
-                fpu_free_reg(dyn, dyn->e.mmxcache[i]);
-                dyn->e.mmxcache[i] = -1;
+            if (dyn->e.mmxcache[i].vector) {
+                SET_ELEMENT_WIDTH(s1, VECTOR_SEW64, 1);
+                VFMV_F_S(dyn->e.mmxcache[i].reg, dyn->e.mmxcache[i].reg);
+            }
+            FSD(dyn->e.mmxcache[i].reg, xEmu, offsetof(x64emu_t, mmx[i]));
+            if (!next) {
+                fpu_free_reg(dyn, dyn->e.mmxcache[i].reg);
+                dyn->e.mmxcache[i].v = -1;
             }
         }
-    if(old!=-1) {
+    }
+    if (old != -1) {
         MESSAGE(LOG_DUMP, "\t------ Purge MMX Cache\n");
     }
 }
 
 static void mmx_reflectcache(dynarec_rv64_t* dyn, int ninst, int s1)
 {
-    for (int i=0; i<8; ++i)
-        if(dyn->e.mmxcache[i]!=-1) {
-            FLD(dyn->e.mmxcache[i], xEmu, offsetof(x64emu_t, mmx[i]));
+    for (int i = 0; i < 8; ++i) {
+        if (dyn->e.mmxcache[i].v != -1) {
+            if (dyn->e.mmxcache[i].vector) {
+                SET_ELEMENT_WIDTH(s1, VECTOR_SEW64, 1);
+                VFMV_F_S(dyn->e.mmxcache[i].reg, dyn->e.mmxcache[i].reg);
+            }
+            FSD(dyn->e.mmxcache[i].reg, xEmu, offsetof(x64emu_t, mmx[i]));
         }
+    }
 }
 
 // SSE / SSE2 helpers
@@ -1671,7 +1751,7 @@ int sse_get_reg_empty(dynarec_rv64_t* dyn, int ninst, int s1, int a, int single)
 {
     if (dyn->e.ssecache[a].v != -1) {
         if (dyn->e.ssecache[a].vector == 1) {
-            // it's in the fpu, forget it first...
+            // it's in the vpu, forget it first...
             sse_forget_reg_vector(dyn, ninst, s1, a);
             // update olds after the forget...
             dyn->e.olds[a].changed = 1;
@@ -1706,7 +1786,7 @@ int sse_get_reg_size_changed(dynarec_rv64_t* dyn, int ninst, int s1, int a, int
 {
     if (dyn->e.ssecache[a].v != -1) {
         if (dyn->e.ssecache[a].vector == 1) {
-            // it's in the fpu, forget it first...
+            // it's in the vpu, forget it first...
             sse_forget_reg_vector(dyn, ninst, s1, a);
             // update olds after the forget...
             dyn->e.olds[a].changed = 1;
@@ -2302,8 +2382,13 @@ static void loadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, int
             FLD(reg, xEmu, offsetof(x64emu_t, xmm[n]));
             break;
         case EXT_CACHE_MM:
+        case EXT_CACHE_MMV:
             MESSAGE(LOG_DUMP, "\t  - Loading %s\n", getCacheName(t, n));
             FLD(reg, xEmu, offsetof(x64emu_t, mmx[n]));
+            if (t == EXT_CACHE_MMV) {
+                SET_ELEMENT_WIDTH(s1, VECTOR_SEW64, 0);
+                VFMV_S_F(reg, reg);
+            }
             break;
         case EXT_CACHE_ST_D:
         case EXT_CACHE_ST_F:
@@ -2369,7 +2454,12 @@ static void unloadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, i
             FSD(reg, xEmu, offsetof(x64emu_t, xmm[n]));
             break;
         case EXT_CACHE_MM:
+        case EXT_CACHE_MMV:
             MESSAGE(LOG_DUMP, "\t  - Unloading %s\n", getCacheName(t, n));
+            if (t == EXT_CACHE_MMV) {
+                SET_ELEMENT_WIDTH(s1, VECTOR_SEW64, 0);
+                VFMV_F_S(reg, reg);
+            }
             FSD(reg, xEmu, offsetof(x64emu_t, mmx[n]));
             break;
         case EXT_CACHE_ST_D:
@@ -2463,6 +2553,9 @@ static void fpuCacheTransform(dynarec_rv64_t* dyn, int ninst, int s1, int s2, in
         int j = findCacheSlot(dyn, ninst, EXT_CACHE_MM, i, &cache);
         if (j >= 0 && findCacheSlot(dyn, ninst, EXT_CACHE_MM, i, &cache_i2) == -1)
             unloadCache(dyn, ninst, stack_cnt, s1, s2, s3, &s1_val, &s2_val, &s3_top, &cache, j, cache.extcache[j].t, cache.extcache[j].n);
+        j = findCacheSlot(dyn, ninst, EXT_CACHE_MMV, i, &cache);
+        if (j >= 0 && findCacheSlot(dyn, ninst, EXT_CACHE_MMV, i, &cache_i2) == -1)
+            unloadCache(dyn, ninst, stack_cnt, s1, s2, s3, &s1_val, &s2_val, &s3_top, &cache, j, cache.extcache[j].t, cache.extcache[j].n);
     }
     for (int i = 0; i < 24; ++i) {
         if(cache.extcache[i].v)
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index d69addee..d4f4b102 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -524,17 +524,18 @@
         SMWRITE2();                                         \
     }
 
-#define GETGM()                     \
-    gd = ((nextop & 0x38) >> 3);    \
-    mmx_forget_reg(dyn, ninst, gd); \
-    gback = xEmu;                   \
+// Get GM, might use x1 as a scratch
+#define GETGM()                         \
+    gd = ((nextop & 0x38) >> 3);        \
+    mmx_forget_reg(dyn, ninst, x1, gd); \
+    gback = xEmu;                       \
     gdoffset = offsetof(x64emu_t, mmx[gd])
 
 // Get EM, might use x3
 #define GETEM(a, D, I12)                                                                         \
     if (MODREG) {                                                                                \
         ed = (nextop & 7);                                                                       \
-        mmx_forget_reg(dyn, ninst, ed);                                                          \
+        mmx_forget_reg(dyn, ninst, a, ed);                                                       \
         fixedaddress = offsetof(x64emu_t, mmx[ed]);                                              \
         wback = xEmu;                                                                            \
     } else {                                                                                     \
@@ -1292,6 +1293,8 @@ void* rv64_next(x64emu_t* emu, uintptr_t addr);
 
 #define ymm_mark_zero STEPNAME(ymm_mark_zero)
 
+#define mmx_get_reg_vector       STEPNAME(mmx_get_reg_vector)
+#define mmx_get_reg_empty_vector STEPNAME(mmx_get_reg_empty_vector)
 #define sse_get_reg_empty_vector STEPNAME(sse_get_reg_empty_vector)
 #define sse_get_reg_vector       STEPNAME(sse_get_reg_vector)
 #define sse_forget_reg_vector    STEPNAME(sse_forget_reg_vector)
@@ -1510,10 +1513,14 @@ int extcache_st_coherency(dynarec_rv64_t* dyn, int ninst, int a, int b);
 // MMX helpers
 //  get float register for a MMX reg, create the entry if needed
 int mmx_get_reg(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int a);
+//  get vector register for a MMX reg, create the entry if needed
+int mmx_get_reg_vector(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int a);
 // get float register for a MMX reg, but don't try to synch it if it needed to be created
 int mmx_get_reg_empty(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int a);
+// get vector register for a MMX reg, but don't try to synch it if it needed to be created
+int mmx_get_reg_empty_vector(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int a);
 // forget float register for a MMX reg, create the entry if needed
-void mmx_forget_reg(dynarec_rv64_t* dyn, int ninst, int a);
+void mmx_forget_reg(dynarec_rv64_t* dyn, int ninst, int s1, int a);
 
 // SSE/SSE2 helpers
 //  get float register for a SSE reg, create the entry if needed
diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h
index b591ecee..0beaf11c 100644
--- a/src/dynarec/rv64/dynarec_rv64_private.h
+++ b/src/dynarec/rv64/dynarec_rv64_private.h
@@ -22,6 +22,7 @@ typedef struct instsize_s instsize_t;
 #define EXT_CACHE_XMMR   9
 #define EXT_CACHE_YMMW   10
 #define EXT_CACHE_YMMR   11
+#define EXT_CACHE_MMV    12
 
 #define EXT_CACHE_OLD_SD   0
 #define EXT_CACHE_OLD_SS   1
@@ -36,6 +37,14 @@ typedef union ext_cache_s {
     };
 } ext_cache_t;
 
+typedef union mmx_cache_s {
+    int8_t v;
+    struct {
+        uint8_t reg : 7;
+        uint8_t vector : 1;
+    };
+} mmx_cache_t;
+
 typedef union sse_cache_s {
     int16_t v;
     struct {
@@ -75,7 +84,7 @@ typedef struct extcache_s {
     int8_t              x87cache[8];    // cache status for the 8 x87 register behind the fpu stack
     int8_t              x87reg[8];      // reg used for x87cache entry
     int16_t             tags;           // similar to fpu_tags
-    int8_t              mmxcache[8];    // cache status for the 8 MMX registers
+    mmx_cache_t         mmxcache[8];    // cache status for the 8 MMX registers
     sse_cache_t         ssecache[16];   // cache status for the 16 SSE(2) registers
     int8_t              fpuused[32];    // all double reg from fpu, used by x87, mmx, sse and avx
     int8_t              x87stack;       // cache stack counter