diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-11-09 18:58:41 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-11-09 11:58:41 +0100 |
| commit | 7b2e77807dc46480986ddbbd053a5aa983e150d1 (patch) | |
| tree | 26b74600057b32ee29c0a6cd999d61813e4721a9 /src | |
| parent | 7a623ef19c3b032a015084b029705dd55e0af751 (diff) | |
| download | box64-7b2e77807dc46480986ddbbd053a5aa983e150d1.tar.gz box64-7b2e77807dc46480986ddbbd053a5aa983e150d1.zip | |
[RV64_DYNAREC] Added mmx infra for vector (#2011)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f.c | 2 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f_vector.c | 19 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_functions.c | 35 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_functions.h | 2 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.c | 151 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 19 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_private.h | 11 |
7 files changed, 185 insertions, 54 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index fd210e2a..0320a75b 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -1366,8 +1366,8 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x6A: INST_NAME("PUNPCKHDQ Gm,Em"); nextop = F8; - GETEM(x1, 0, 4); GETGM(); + GETEM(x1, 0, 4); // GM->ud[0] = GM->ud[1]; LWU(x3, gback, gdoffset + 1 * 4); SW(x3, gback, gdoffset + 0 * 4); diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c index 5e54bbdc..b20767cc 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c @@ -483,6 +483,22 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, VADD_VX(q0, q1, xZR, VECTOR_MASKED); } break; + case 0x6F: + INST_NAME("MOVQ Gm, Em"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETG; + if (MODREG) { + v1 = mmx_get_reg_vector(dyn, ninst, x1, x2, x3, nextop & 7); + v0 = mmx_get_reg_empty_vector(dyn, ninst, x1, x2, x3, gd); + VMV_V_V(v0, v1); + } else { + v0 = mmx_get_reg_empty_vector(dyn, ninst, x1, x2, x3, gd); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); + LD(x4, ed, fixedaddress); + VMV_S_X(v0, x4); + } + break; case 0xC2: INST_NAME("CMPPS Gx, Ex, Ib"); nextop = F8; @@ -567,11 +583,10 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, case 0x00 ... 0x0F: case 0x18: case 0x1F: - case 0x2C ... 0x2F: case 0x31: case 0x40 ... 0x4F: - case 0x60 ... 0x7F: case 0x80 ... 0xBF: + case 0xC0 ... 0xC1: case 0xC3 ... 0xC5: case 0xC7 ... 0xCF: return 0; diff --git a/src/dynarec/rv64/dynarec_rv64_functions.c b/src/dynarec/rv64/dynarec_rv64_functions.c index 234f3b6d..5e85e735 100644 --- a/src/dynarec/rv64/dynarec_rv64_functions.c +++ b/src/dynarec/rv64/dynarec_rv64_functions.c @@ -72,23 +72,26 @@ void fpu_free_reg(dynarec_rv64_t* dyn, int reg) if (dyn->e.extcache[idx].t != EXT_CACHE_ST_F && dyn->e.extcache[idx].t != EXT_CACHE_ST_D && dyn->e.extcache[idx].t != EXT_CACHE_ST_I64) dyn->e.extcache[idx].v = 0; } -// Get an MMX double reg -int fpu_get_reg_emm(dynarec_rv64_t* dyn, int emm) + +// Get an MMX reg +int fpu_get_reg_emm(dynarec_rv64_t* dyn, int t, int emm) { - dyn->e.fpuused[EMM0 + emm] = 1; - dyn->e.extcache[EMM0 + emm].t = EXT_CACHE_MM; - dyn->e.extcache[EMM0 + emm].n = emm; - dyn->e.news |= (1<<(EMM0 + emm)); - return EXTREG(EMM0 + emm); + int i = EMM0 + emm; + dyn->e.fpuused[i] = 1; + dyn->e.extcache[i].t = t; + dyn->e.extcache[i].n = emm; + dyn->e.news |= (1 << (i)); + return EXTREG(i); } + // Get an XMM reg int fpu_get_reg_xmm(dynarec_rv64_t* dyn, int t, int xmm) { - int i = XMM0+xmm; + int i = XMM0 + xmm; dyn->e.fpuused[i] = 1; dyn->e.extcache[i].t = t; dyn->e.extcache[i].n = xmm; - dyn->e.news |= (1<<i); + dyn->e.news |= (1 << i); return EXTREG(i); } // Reset fpu regs counter @@ -484,9 +487,9 @@ void extcacheUnwind(extcache_t* cache) cache->fpu_scratch = 0; cache->fpu_extra_qscratch = 0; cache->fpu_reg = 0; - for(int i=0; i<8; ++i) { + for (int i = 0; i < 8; ++i) { cache->x87cache[i] = -1; - cache->mmxcache[i] = -1; + cache->mmxcache[i].v = -1; cache->x87reg[i] = 0; cache->ssecache[i*2].v = -1; cache->ssecache[i*2+1].v = -1; @@ -497,7 +500,9 @@ void extcacheUnwind(extcache_t* cache) cache->fpuused[i] = 1; switch (cache->extcache[i].t) { case EXT_CACHE_MM: - cache->mmxcache[cache->extcache[i].n] = EXTREG(i); + case EXT_CACHE_MMV: + cache->mmxcache[cache->extcache[i].n].reg = EXTREG(i); + cache->mmxcache[cache->extcache[i].n].vector = cache->extcache[i].t == EXT_CACHE_MMV; ++cache->mmxcount; ++cache->fpu_reg; break; @@ -602,6 +607,7 @@ const char* getCacheName(int t, int n) case EXT_CACHE_ST_F: sprintf(buff, "st%d", n); break; case EXT_CACHE_ST_I64: sprintf(buff, "STi%d", n); break; case EXT_CACHE_MM: sprintf(buff, "MM%d", n); break; + case EXT_CACHE_MMV: sprintf(buff, "MMV%d", n); break; case EXT_CACHE_SS: sprintf(buff, "SS%d", n); break; case EXT_CACHE_SD: sprintf(buff, "SD%d", n); break; case EXT_CACHE_SCR: sprintf(buff, "Scratch"); break; @@ -664,6 +670,7 @@ void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t r case EXT_CACHE_ST_F: dynarec_log(LOG_NONE, " %s:%s", fnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break; case EXT_CACHE_ST_I64: dynarec_log(LOG_NONE, " %s:%s", fnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break; case EXT_CACHE_MM: dynarec_log(LOG_NONE, " %s:%s", fnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break; + case EXT_CACHE_MMV: dynarec_log(LOG_NONE, " %s:%s", vnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break; case EXT_CACHE_SS: dynarec_log(LOG_NONE, " %s:%s", fnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break; case EXT_CACHE_SD: dynarec_log(LOG_NONE, " %s:%s", fnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break; case EXT_CACHE_XMMR: dynarec_log(LOG_NONE, " %s:%s", vnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break; @@ -725,8 +732,8 @@ static void x87_reset(extcache_t* e) static void mmx_reset(extcache_t* e) { e->mmxcount = 0; - for (int i=0; i<8; ++i) - e->mmxcache[i] = -1; + for (int i = 0; i < 8; ++i) + e->mmxcache[i].v = -1; } static void sse_reset(extcache_t* e) diff --git a/src/dynarec/rv64/dynarec_rv64_functions.h b/src/dynarec/rv64/dynarec_rv64_functions.h index 03a20925..fa618381 100644 --- a/src/dynarec/rv64/dynarec_rv64_functions.h +++ b/src/dynarec/rv64/dynarec_rv64_functions.h @@ -18,7 +18,7 @@ void fpu_reset_scratch(dynarec_rv64_t* dyn); // Get an x87 double reg int fpu_get_reg_x87(dynarec_rv64_t* dyn, int t, int n); // Get an MMX double reg -int fpu_get_reg_emm(dynarec_rv64_t* dyn, int emm); +int fpu_get_reg_emm(dynarec_rv64_t* dyn, int t, int emm); // Get an XMM quad reg int fpu_get_reg_xmm(dynarec_rv64_t* dyn, int t, int xmm); // Free a FPU/MMX/XMM reg diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index 538df9cf..5081a653 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -1566,38 +1566,108 @@ static int isx87Empty(dynarec_rv64_t* dyn) } // forget ext register for a MMX reg, does nothing if the regs is not loaded -void mmx_forget_reg(dynarec_rv64_t* dyn, int ninst, int a) +void mmx_forget_reg(dynarec_rv64_t* dyn, int ninst, int s1, int a) { - if (dyn->e.mmxcache[a] == -1) + if (dyn->e.mmxcache[a].v == -1) return; - FSD(dyn->e.mmxcache[a], xEmu, offsetof(x64emu_t, mmx[a])); - fpu_free_reg(dyn, dyn->e.mmxcache[a]); + if (dyn->e.mmxcache[a].vector) { + SET_ELEMENT_WIDTH(s1, VECTOR_SEW64, 1); + VFMV_F_S(dyn->e.mmxcache[a].reg, dyn->e.mmxcache[a].reg); + } + FSD(dyn->e.mmxcache[a].reg, xEmu, offsetof(x64emu_t, mmx[a])); + fpu_free_reg(dyn, dyn->e.mmxcache[a].reg); + dyn->e.mmxcache[a].v = -1; return; } -// get neon register for a MMX reg, create the entry if needed +static void mmx_transfer_reg(dynarec_rv64_t* dyn, int ninst, int s1, int a) +{ + if (dyn->e.mmxcache[a].v == -1) + return; + + SET_ELEMENT_WIDTH(s1, VECTOR_SEW64, 1); + if (dyn->e.mmxcache[a].vector) { + VFMV_F_S(dyn->e.mmxcache[a].reg, dyn->e.mmxcache[a].reg); + } else { + VFMV_S_F(dyn->e.mmxcache[a].reg, dyn->e.mmxcache[a].reg); + } + dyn->e.mmxcache[a].vector = 1 - dyn->e.mmxcache[a].vector; + dyn->e.extcache[EXTIDX(dyn->e.mmxcache[a].reg)].t = dyn->e.mmxcache[a].vector ? EXT_CACHE_MMV : EXT_CACHE_MM; + return; +} + +// get float register for a MMX reg, create the entry if needed int mmx_get_reg(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int a) { if(!dyn->e.x87stack && isx87Empty(dyn)) x87_purgecache(dyn, ninst, 0, s1, s2, s3); - if(dyn->e.mmxcache[a]!=-1) - return dyn->e.mmxcache[a]; + if (dyn->e.mmxcache[a].v != -1) { + if (dyn->e.mmxcache[a].vector) { + mmx_transfer_reg(dyn, ninst, s1, a); + } + return dyn->e.mmxcache[a].reg; + } + ++dyn->e.mmxcount; - int ret = dyn->e.mmxcache[a] = fpu_get_reg_emm(dyn, a); - FLD(ret, xEmu, offsetof(x64emu_t, mmx[a])); - return ret; + dyn->e.mmxcache[a].reg = fpu_get_reg_emm(dyn, EXT_CACHE_MM, a); + dyn->e.mmxcache[a].vector = 0; + FLD(dyn->e.mmxcache[a].reg, xEmu, offsetof(x64emu_t, mmx[a])); + return dyn->e.mmxcache[a].reg; } -// get neon register for a MMX reg, but don't try to synch it if it needed to be created + +// get vector register for a MMX reg, create the entry if needed +int mmx_get_reg_vector(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int a) +{ + if (!dyn->e.x87stack && isx87Empty(dyn)) + x87_purgecache(dyn, ninst, 0, s1, s2, s3); + if (dyn->e.mmxcache[a].v != -1) { + if (!dyn->e.mmxcache[a].vector) { + mmx_transfer_reg(dyn, ninst, s1, a); + } + return dyn->e.mmxcache[a].reg; + } + + ++dyn->e.mmxcount; + dyn->e.mmxcache[a].reg = fpu_get_reg_emm(dyn, EXT_CACHE_MMV, a); + dyn->e.mmxcache[a].vector = 1; + FLD(dyn->e.mmxcache[a].reg, xEmu, offsetof(x64emu_t, mmx[a])); + SET_ELEMENT_WIDTH(s1, VECTOR_SEW64, 1); + VFMV_S_F(dyn->e.mmxcache[a].reg, dyn->e.mmxcache[a].reg); + return dyn->e.mmxcache[a].reg; +} + +// get float register for a MMX reg, but don't try to synch it if it needed to be created int mmx_get_reg_empty(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int a) { + if (!dyn->e.x87stack && isx87Empty(dyn)) + x87_purgecache(dyn, ninst, 0, s1, s2, s3); + if (dyn->e.mmxcache[a].v != -1) { + dyn->e.mmxcache[a].vector = 0; + dyn->e.extcache[EXTIDX(dyn->e.mmxcache[a].reg)].t = EXT_CACHE_MM; + return dyn->e.mmxcache[a].reg; + } + + ++dyn->e.mmxcount; + dyn->e.mmxcache[a].vector = 0; + return dyn->e.mmxcache[a].reg = fpu_get_reg_emm(dyn, EXT_CACHE_MM, a); +} + +// get vector register for a MMX reg, but don't try to synch it if it needed to be created +int mmx_get_reg_empty_vector(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int a) +{ if(!dyn->e.x87stack && isx87Empty(dyn)) x87_purgecache(dyn, ninst, 0, s1, s2, s3); - if(dyn->e.mmxcache[a]!=-1) - return dyn->e.mmxcache[a]; + if (dyn->e.mmxcache[a].v != -1) { + dyn->e.mmxcache[a].vector = 1; + dyn->e.extcache[EXTIDX(dyn->e.mmxcache[a].reg)].t = EXT_CACHE_MMV; + return dyn->e.mmxcache[a].reg; + } + ++dyn->e.mmxcount; - int ret = dyn->e.mmxcache[a] = fpu_get_reg_emm(dyn, a); - return ret; + dyn->e.mmxcache[a].vector = 1; + return dyn->e.mmxcache[a].reg = fpu_get_reg_emm(dyn, EXT_CACHE_MMV, a); } + // purge the MMX cache only(needs 3 scratch registers) void mmx_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1) { @@ -1606,29 +1676,39 @@ void mmx_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1) if(!next) dyn->e.mmxcount = 0; int old = -1; - for (int i=0; i<8; ++i) - if(dyn->e.mmxcache[i]!=-1) { - if (old==-1) { - MESSAGE(LOG_DUMP, "\tPurge %sMMX Cache ------\n", next?"locally ":""); + for (int i = 0; i < 8; ++i) { + if (dyn->e.mmxcache[i].v != -1) { + if (old == -1) { + MESSAGE(LOG_DUMP, "\tPurge %sMMX Cache ------\n", next ? "locally " : ""); ++old; } - FSD(dyn->e.mmxcache[i], xEmu, offsetof(x64emu_t, mmx[i])); - if(!next) { - fpu_free_reg(dyn, dyn->e.mmxcache[i]); - dyn->e.mmxcache[i] = -1; + if (dyn->e.mmxcache[i].vector) { + SET_ELEMENT_WIDTH(s1, VECTOR_SEW64, 1); + VFMV_F_S(dyn->e.mmxcache[i].reg, dyn->e.mmxcache[i].reg); + } + FSD(dyn->e.mmxcache[i].reg, xEmu, offsetof(x64emu_t, mmx[i])); + if (!next) { + fpu_free_reg(dyn, dyn->e.mmxcache[i].reg); + dyn->e.mmxcache[i].v = -1; } } - if(old!=-1) { + } + if (old != -1) { MESSAGE(LOG_DUMP, "\t------ Purge MMX Cache\n"); } } static void mmx_reflectcache(dynarec_rv64_t* dyn, int ninst, int s1) { - for (int i=0; i<8; ++i) - if(dyn->e.mmxcache[i]!=-1) { - FLD(dyn->e.mmxcache[i], xEmu, offsetof(x64emu_t, mmx[i])); + for (int i = 0; i < 8; ++i) { + if (dyn->e.mmxcache[i].v != -1) { + if (dyn->e.mmxcache[i].vector) { + SET_ELEMENT_WIDTH(s1, VECTOR_SEW64, 1); + VFMV_F_S(dyn->e.mmxcache[i].reg, dyn->e.mmxcache[i].reg); + } + FSD(dyn->e.mmxcache[i].reg, xEmu, offsetof(x64emu_t, mmx[i])); } + } } // SSE / SSE2 helpers @@ -1671,7 +1751,7 @@ int sse_get_reg_empty(dynarec_rv64_t* dyn, int ninst, int s1, int a, int single) { if (dyn->e.ssecache[a].v != -1) { if (dyn->e.ssecache[a].vector == 1) { - // it's in the fpu, forget it first... + // it's in the vpu, forget it first... sse_forget_reg_vector(dyn, ninst, s1, a); // update olds after the forget... dyn->e.olds[a].changed = 1; @@ -1706,7 +1786,7 @@ int sse_get_reg_size_changed(dynarec_rv64_t* dyn, int ninst, int s1, int a, int { if (dyn->e.ssecache[a].v != -1) { if (dyn->e.ssecache[a].vector == 1) { - // it's in the fpu, forget it first... + // it's in the vpu, forget it first... sse_forget_reg_vector(dyn, ninst, s1, a); // update olds after the forget... dyn->e.olds[a].changed = 1; @@ -2302,8 +2382,13 @@ static void loadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, int FLD(reg, xEmu, offsetof(x64emu_t, xmm[n])); break; case EXT_CACHE_MM: + case EXT_CACHE_MMV: MESSAGE(LOG_DUMP, "\t - Loading %s\n", getCacheName(t, n)); FLD(reg, xEmu, offsetof(x64emu_t, mmx[n])); + if (t == EXT_CACHE_MMV) { + SET_ELEMENT_WIDTH(s1, VECTOR_SEW64, 0); + VFMV_S_F(reg, reg); + } break; case EXT_CACHE_ST_D: case EXT_CACHE_ST_F: @@ -2369,7 +2454,12 @@ static void unloadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, i FSD(reg, xEmu, offsetof(x64emu_t, xmm[n])); break; case EXT_CACHE_MM: + case EXT_CACHE_MMV: MESSAGE(LOG_DUMP, "\t - Unloading %s\n", getCacheName(t, n)); + if (t == EXT_CACHE_MMV) { + SET_ELEMENT_WIDTH(s1, VECTOR_SEW64, 0); + VFMV_F_S(reg, reg); + } FSD(reg, xEmu, offsetof(x64emu_t, mmx[n])); break; case EXT_CACHE_ST_D: @@ -2463,6 +2553,9 @@ static void fpuCacheTransform(dynarec_rv64_t* dyn, int ninst, int s1, int s2, in int j = findCacheSlot(dyn, ninst, EXT_CACHE_MM, i, &cache); if (j >= 0 && findCacheSlot(dyn, ninst, EXT_CACHE_MM, i, &cache_i2) == -1) unloadCache(dyn, ninst, stack_cnt, s1, s2, s3, &s1_val, &s2_val, &s3_top, &cache, j, cache.extcache[j].t, cache.extcache[j].n); + j = findCacheSlot(dyn, ninst, EXT_CACHE_MMV, i, &cache); + if (j >= 0 && findCacheSlot(dyn, ninst, EXT_CACHE_MMV, i, &cache_i2) == -1) + unloadCache(dyn, ninst, stack_cnt, s1, s2, s3, &s1_val, &s2_val, &s3_top, &cache, j, cache.extcache[j].t, cache.extcache[j].n); } for (int i = 0; i < 24; ++i) { if(cache.extcache[i].v) diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index d69addee..d4f4b102 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -524,17 +524,18 @@ SMWRITE2(); \ } -#define GETGM() \ - gd = ((nextop & 0x38) >> 3); \ - mmx_forget_reg(dyn, ninst, gd); \ - gback = xEmu; \ +// Get GM, might use x1 as a scratch +#define GETGM() \ + gd = ((nextop & 0x38) >> 3); \ + mmx_forget_reg(dyn, ninst, x1, gd); \ + gback = xEmu; \ gdoffset = offsetof(x64emu_t, mmx[gd]) // Get EM, might use x3 #define GETEM(a, D, I12) \ if (MODREG) { \ ed = (nextop & 7); \ - mmx_forget_reg(dyn, ninst, ed); \ + mmx_forget_reg(dyn, ninst, a, ed); \ fixedaddress = offsetof(x64emu_t, mmx[ed]); \ wback = xEmu; \ } else { \ @@ -1292,6 +1293,8 @@ void* rv64_next(x64emu_t* emu, uintptr_t addr); #define ymm_mark_zero STEPNAME(ymm_mark_zero) +#define mmx_get_reg_vector STEPNAME(mmx_get_reg_vector) +#define mmx_get_reg_empty_vector STEPNAME(mmx_get_reg_empty_vector) #define sse_get_reg_empty_vector STEPNAME(sse_get_reg_empty_vector) #define sse_get_reg_vector STEPNAME(sse_get_reg_vector) #define sse_forget_reg_vector STEPNAME(sse_forget_reg_vector) @@ -1510,10 +1513,14 @@ int extcache_st_coherency(dynarec_rv64_t* dyn, int ninst, int a, int b); // MMX helpers // get float register for a MMX reg, create the entry if needed int mmx_get_reg(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int a); +// get vector register for a MMX reg, create the entry if needed +int mmx_get_reg_vector(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int a); // get float register for a MMX reg, but don't try to synch it if it needed to be created int mmx_get_reg_empty(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int a); +// get vector register for a MMX reg, but don't try to synch it if it needed to be created +int mmx_get_reg_empty_vector(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int a); // forget float register for a MMX reg, create the entry if needed -void mmx_forget_reg(dynarec_rv64_t* dyn, int ninst, int a); +void mmx_forget_reg(dynarec_rv64_t* dyn, int ninst, int s1, int a); // SSE/SSE2 helpers // get float register for a SSE reg, create the entry if needed diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h index b591ecee..0beaf11c 100644 --- a/src/dynarec/rv64/dynarec_rv64_private.h +++ b/src/dynarec/rv64/dynarec_rv64_private.h @@ -22,6 +22,7 @@ typedef struct instsize_s instsize_t; #define EXT_CACHE_XMMR 9 #define EXT_CACHE_YMMW 10 #define EXT_CACHE_YMMR 11 +#define EXT_CACHE_MMV 12 #define EXT_CACHE_OLD_SD 0 #define EXT_CACHE_OLD_SS 1 @@ -36,6 +37,14 @@ typedef union ext_cache_s { }; } ext_cache_t; +typedef union mmx_cache_s { + int8_t v; + struct { + uint8_t reg : 7; + uint8_t vector : 1; + }; +} mmx_cache_t; + typedef union sse_cache_s { int16_t v; struct { @@ -75,7 +84,7 @@ typedef struct extcache_s { int8_t x87cache[8]; // cache status for the 8 x87 register behind the fpu stack int8_t x87reg[8]; // reg used for x87cache entry int16_t tags; // similar to fpu_tags - int8_t mmxcache[8]; // cache status for the 8 MMX registers + mmx_cache_t mmxcache[8]; // cache status for the 8 MMX registers sse_cache_t ssecache[16]; // cache status for the 16 SSE(2) registers int8_t fpuused[32]; // all double reg from fpu, used by x87, mmx, sse and avx int8_t x87stack; // cache stack counter |