diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-08-26 01:52:50 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-08-25 19:52:50 +0200 |
| commit | 4988fb27dc115e89146a017d0dff2a33abbc25e1 (patch) | |
| tree | 6b6995104979e923a19ed56a71d55e9bf92683fc /src | |
| parent | db1f0825ce26c1c49f61c01072598c52bbe9d6bc (diff) | |
| download | box64-4988fb27dc115e89146a017d0dff2a33abbc25e1.tar.gz box64-4988fb27dc115e89146a017d0dff2a33abbc25e1.zip | |
[RV64_DYNAREC] Fixed more issues in the vector infrastructure (#1755)
* [RV64_DYNAREC] Fixed SEW transformation for vector * more tweaks * more fixes * More fixes * more fixes * re-enable vector extension by default
Diffstat (limited to 'src')
| -rw-r--r-- | src/core.c | 8 | ||||
| -rw-r--r-- | src/dynarec/dynarec_arch.h | 2 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f_vector.c | 26 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.c | 56 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 21 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_pass0.h | 25 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_pass3.h | 16 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_private.h | 1 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_emitter.h | 4 |
9 files changed, 86 insertions, 73 deletions
diff --git a/src/core.c b/src/core.c index 5e345568..03859f12 100644 --- a/src/core.c +++ b/src/core.c @@ -512,13 +512,7 @@ HWCAP2_AFP if(rv64_zbb) printf_log(LOG_INFO, " Zbb"); if(rv64_zbc) printf_log(LOG_INFO, " Zbc"); if(rv64_zbs) printf_log(LOG_INFO, " Zbs"); - if (rv64_vector) { - char* p = getenv("BOX64_DYNAREC_RV64VEXT"); - if (p != NULL && p[0] == '1') - printf_log(LOG_INFO, " Vector (vlen: %d)", rv64_vlen); - else - rv64_vector = 0; - } + if (rv64_vector) printf_log(LOG_INFO, " Vector (vlen: %d)", rv64_vlen); if(rv64_xtheadba) printf_log(LOG_INFO, " XTheadBa"); if(rv64_xtheadbb) printf_log(LOG_INFO, " XTheadBb"); if(rv64_xtheadbs) printf_log(LOG_INFO, " XTheadBs"); diff --git a/src/dynarec/dynarec_arch.h b/src/dynarec/dynarec_arch.h index 6a5c4977..c9de4b8f 100644 --- a/src/dynarec/dynarec_arch.h +++ b/src/dynarec/dynarec_arch.h @@ -44,7 +44,7 @@ #define OTHER_CACHE() \ if (fpuCacheNeedsTransform(dyn, ninst)) ret |= 2; \ - if (sewNeedsTransform(dyn, ninst)) ret |= 3; + if (sewNeedsTransform(dyn, ninst)) ret |= 4; #include "rv64/rv64_printer.h" #include "rv64/dynarec_rv64_private.h" diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c index df823f23..36b629a7 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c @@ -51,17 +51,17 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i INST_NAME("MOVAPD Gx, Ex"); nextop = F8; GETG; - SET_ELEMENT_WIDTH(x1, VECTOR_SEW8); + SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY, 1); if (MODREG) { ed = (nextop & 7) + (rex.b << 3); - v1 = sse_get_reg_vector(dyn, ninst, x1, ed, 0, VECTOR_SEW8); + v1 = sse_get_reg_vector(dyn, ninst, x1, ed, 0, dyn->vector_eew); v0 = sse_get_reg_empty_vector(dyn, ninst, x1, gd); VMV_V_V(v0, v1); } else { SMREAD(); v0 = sse_get_reg_empty_vector(dyn, ninst, x1, gd); addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 0, 0); - VLE8_V(v0, ed, VECTOR_UNMASKED, VECTOR_NFIELD1); + VLE_V(v0, ed, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1); } break; case 0x38: // SSSE3 opcodes @@ -70,7 +70,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x00: INST_NAME("PSHUFB Gx, Ex"); nextop = F8; - SET_ELEMENT_WIDTH(x1, VECTOR_SEW8); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETGX_vector(q0, 1, VECTOR_SEW8); GETEX_vector(q1, 0, 0, VECTOR_SEW8); v0 = fpu_get_scratch(dyn); @@ -87,21 +87,21 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x08 ... 0x0a: if (nextop == 0x08) { INST_NAME("PSIGNB Gx, Ex"); - SET_ELEMENT_WIDTH(x1, VECTOR_SEW8); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); i32 = 7; nextop = F8; GETGX_vector(q0, 1, VECTOR_SEW8); GETEX_vector(q1, 0, 0, VECTOR_SEW8); } else if (nextop == 0x09) { INST_NAME("PSIGNW Gx, Ex"); - SET_ELEMENT_WIDTH(x1, VECTOR_SEW16); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); i32 = 15; nextop = F8; GETGX_vector(q0, 1, VECTOR_SEW16); GETEX_vector(q1, 0, 0, VECTOR_SEW16); } else { INST_NAME("PSIGND Gx, Ex"); - SET_ELEMENT_WIDTH(x1, VECTOR_SEW32); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); i32 = 31; nextop = F8; GETGX_vector(q0, 1, VECTOR_SEW32); @@ -128,16 +128,16 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x6F: INST_NAME("MOVDQA Gx, Ex"); nextop = F8; - SET_ELEMENT_WIDTH(x1, VECTOR_SEW8); + SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY, 1); if (MODREG) { - v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW8); + v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, dyn->vector_eew); GETGX_empty_vector(v0); VMV_V_V(v0, v1); } else { GETGX_empty_vector(v0); SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 0, 0); - VLE8_V(v0, ed, VECTOR_UNMASKED, VECTOR_NFIELD1); + VLE_V(v0, ed, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1); } break; case 0x7E: @@ -147,13 +147,13 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i nextop = F8; GETG; if (MODREG && gd == (nextop & 7) + (rex.b << 3)) { - SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY); + SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY, 1); // special case q0 = sse_get_reg_empty_vector(dyn, ninst, x1, gd); VXOR_VV(q0, q0, q0, VECTOR_UNMASKED); } else { - SET_ELEMENT_WIDTH(x1, VECTOR_SEW8); - q0 = sse_get_reg_vector(dyn, ninst, x1, gd, 1, VECTOR_SEW8); + SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY, 1); + q0 = sse_get_reg_vector(dyn, ninst, x1, gd, 1, dyn->vector_eew); GETEX_vector(q1, 0, 0, VECTOR_SEW8); VXOR_VV(q0, q0, q1, VECTOR_UNMASKED); } diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index 0ec15c43..6c86d94a 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -1732,9 +1732,9 @@ void sse_forget_reg_vector(dynarec_rv64_t* dyn, int ninst, int s1, int a) if (dyn->e.ssecache[a].vector == 0) return sse_forget_reg(dyn, ninst, s1, a); if (dyn->e.extcache[EXTIDX(dyn->e.ssecache[a].reg)].t == EXT_CACHE_XMMW) { - SET_ELEMENT_WIDTH(s1, VECTOR_SEW8); + SET_ELEMENT_WIDTH(s1, VECTOR_SEWANY, 1); ADDI(s1, xEmu, offsetof(x64emu_t, xmm[a])); - VSE8_V(dyn->e.ssecache[a].reg, s1, VECTOR_UNMASKED, VECTOR_NFIELD1); + VSE_V(dyn->e.ssecache[a].reg, s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1); } fpu_free_reg(dyn, dyn->e.ssecache[a].reg); dyn->e.olds[a].changed = 0; @@ -1756,9 +1756,9 @@ void sse_purge07cache(dynarec_rv64_t* dyn, int ninst, int s1) ++old; } if (dyn->e.ssecache[i].vector) { - SET_ELEMENT_WIDTH(s1, VECTOR_SEW8); + SET_ELEMENT_WIDTH(s1, VECTOR_SEWANY, 0); ADDI(s1, xEmu, offsetof(x64emu_t, xmm[i])); - VSE8_V(dyn->e.ssecache[i].reg, s1, VECTOR_UNMASKED, VECTOR_NFIELD1); + VSE_V(dyn->e.ssecache[i].reg, s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1); } else if (dyn->e.ssecache[i].single) FSW(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i])); else @@ -1782,9 +1782,11 @@ static void sse_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1) ++old; } if (dyn->e.ssecache[i].vector) { - SET_ELEMENT_WIDTH(s1, VECTOR_SEW8); - ADDI(s1, xEmu, offsetof(x64emu_t, xmm[i])); - VSE8_V(dyn->e.ssecache[i].reg, s1, VECTOR_UNMASKED, VECTOR_NFIELD1); + if (dyn->e.ssecache[i].write) { + SET_ELEMENT_WIDTH(s1, VECTOR_SEWANY, 0); + ADDI(s1, xEmu, offsetof(x64emu_t, xmm[i])); + VSE_V(dyn->e.ssecache[i].reg, s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1); + } } else if (dyn->e.ssecache[i].single) FSW(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i])); else @@ -1808,9 +1810,9 @@ static void sse_reflectcache(dynarec_rv64_t* dyn, int ninst, int s1) for (int i = 0; i < 16; ++i) if (dyn->e.ssecache[i].v != -1) { if (dyn->e.ssecache[i].vector) { - SET_ELEMENT_WIDTH(s1, VECTOR_SEW8); + SET_ELEMENT_WIDTH(s1, VECTOR_SEWANY, 0); ADDI(s1, xEmu, offsetof(x64emu_t, xmm[i])); - VSE8_V(dyn->e.ssecache[i].reg, s1, VECTOR_UNMASKED, VECTOR_NFIELD1); + VSE_V(dyn->e.ssecache[i].reg, s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1); } else if (dyn->e.ssecache[i].single) FSW(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i])); else @@ -1823,9 +1825,9 @@ void sse_reflect_reg(dynarec_rv64_t* dyn, int ninst, int s1, int a) if (dyn->e.ssecache[a].v == -1) return; if (dyn->e.ssecache[a].vector) { - SET_ELEMENT_WIDTH(s1, VECTOR_SEW8); + SET_ELEMENT_WIDTH(s1, VECTOR_SEWANY, 0); ADDI(s1, xEmu, offsetof(x64emu_t, xmm[a])); - VSE8_V(dyn->e.ssecache[a].reg, s1, VECTOR_UNMASKED, VECTOR_NFIELD1); + VSE_V(dyn->e.ssecache[a].reg, s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1); } else if (dyn->e.ssecache[a].single) FSW(dyn->e.ssecache[a].reg, xEmu, offsetof(x64emu_t, xmm[a])); else @@ -1847,9 +1849,9 @@ void fpu_pushcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07) for (int i=start; i<8; ++i) if(dyn->e.ssecache[i].v!=-1) { if (dyn->e.ssecache[i].vector) { - SET_ELEMENT_WIDTH(s1, VECTOR_SEW8); + SET_ELEMENT_WIDTH(s1, VECTOR_SEWANY, 0); ADDI(s1, xEmu, offsetof(x64emu_t, xmm[i])); - VSE8_V(dyn->e.ssecache[i].reg, s1, VECTOR_UNMASKED, VECTOR_NFIELD1); + VSE_V(dyn->e.ssecache[i].reg, s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1); } else if (dyn->e.ssecache[i].single) FSW(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i])); else @@ -1894,9 +1896,9 @@ void fpu_popcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07) for (int i=start; i<8; ++i) if(dyn->e.ssecache[i].v!=-1) { if (dyn->e.ssecache[i].vector) { - SET_ELEMENT_WIDTH(s1, VECTOR_SEW8); + SET_ELEMENT_WIDTH(s1, VECTOR_SEWANY, 0); ADDI(s1, xEmu, offsetof(x64emu_t, xmm[i])); - VLE8_V(dyn->e.ssecache[i].reg, s1, VECTOR_UNMASKED, VECTOR_NFIELD1); + VLE_V(dyn->e.ssecache[i].reg, s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1); } else if (dyn->e.ssecache[i].single) FLW(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i])); else @@ -2077,9 +2079,9 @@ static void loadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, int case EXT_CACHE_XMMR: case EXT_CACHE_XMMW: MESSAGE(LOG_DUMP, "\t - Loading %s\n", getCacheName(t, n)); - SET_ELEMENT_WIDTH(s1, VECTOR_SEW8); + SET_ELEMENT_WIDTH(s1, VECTOR_SEWANY, 0); ADDI(s1, xEmu, offsetof(x64emu_t, xmm[n])); - VLE8_V(reg, s1, VECTOR_UNMASKED, VECTOR_NFIELD1); + VLE_V(reg, s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1); break; case EXT_CACHE_SS: MESSAGE(LOG_DUMP, "\t - Loading %s\n", getCacheName(t, n)); @@ -2137,9 +2139,9 @@ static void unloadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, i break; case EXT_CACHE_XMMW: MESSAGE(LOG_DUMP, "\t - Unloading %s\n", getCacheName(t, n)); - SET_ELEMENT_WIDTH(s1, VECTOR_SEW8); + SET_ELEMENT_WIDTH(s1, VECTOR_SEWANY, 0); ADDI(s1, xEmu, offsetof(x64emu_t, xmm[n])); - VSE8_V(reg, s1, VECTOR_UNMASKED, VECTOR_NFIELD1); + VSE_V(reg, s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1); break; case EXT_CACHE_SS: MESSAGE(LOG_DUMP, "\t - Unloading %s\n", getCacheName(t, n)); @@ -2292,6 +2294,15 @@ static void fpuCacheTransform(dynarec_rv64_t* dyn, int ninst, int s1, int s2, in FMVXD(s1, EXTREG(i)); FCVTDL(EXTREG(i), s1, RD_RTZ); cache.extcache[i].t = EXT_CACHE_ST_D; + } else if (cache.extcache[i].t == EXT_CACHE_XMMR && cache_i2.extcache[i].t == EXT_CACHE_XMMW) { + cache.extcache[i].t = EXT_CACHE_XMMW; + } else if (cache.extcache[i].t == EXT_CACHE_XMMW && cache_i2.extcache[i].t == EXT_CACHE_XMMR) { + // refresh cache... + MESSAGE(LOG_DUMP, "\t - Refreh %s\n", getCacheName(cache.extcache[i].t, cache.extcache[i].n)); + SET_ELEMENT_WIDTH(s1, VECTOR_SEWANY, 0); + ADDI(s1, xEmu, offsetof(x64emu_t, xmm[cache.extcache[i].n])); + VSE_V(EXTREG(i), s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1); + cache.extcache[i].t = EXT_CACHE_XMMR; } } } @@ -2378,7 +2389,7 @@ static void sewTransform(dynarec_rv64_t* dyn, int ninst, int s1) } void CacheTransform(dynarec_rv64_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3) { - if (cacheupd & 3) + if (cacheupd & 4) sewTransform(dyn, ninst, s1); if (cacheupd & 2) fpuCacheTransform(dyn, ninst, s1, s2, s3); @@ -2532,9 +2543,9 @@ void fpu_propagate_stack(dynarec_rv64_t* dyn, int ninst) // Use vector extension as like SIMD for now, this function sets the specified element width, // other configs are set automatically. -void vector_vsetvl_emul1(dynarec_rv64_t* dyn, int ninst, int s1, int sew) +int vector_vsetvl_emul1(dynarec_rv64_t* dyn, int ninst, int s1, int sew) { - if (sew == VECTOR_SEWNA) return; + if (sew == VECTOR_SEWNA) return VECTOR_SEW8; if (sew == VECTOR_SEWANY) sew = VECTOR_SEW8; /* mu: mask undisturbed * tu: tail undisturbed @@ -2545,4 +2556,5 @@ void vector_vsetvl_emul1(dynarec_rv64_t* dyn, int ninst, int s1, int sew) uint32_t vtypei = (0b0 << 7) | (0b0 << 6) | (sew << 3) | 0b000; ADDI(s1, xZR, 16 >> sew); VSETVLI(xZR, s1, vtypei); + return sew; } diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index acd9875d..db71985d 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -1081,15 +1081,16 @@ #define MODREG ((nextop & 0xC0) == 0xC0) #ifndef SET_ELEMENT_WIDTH -#define SET_ELEMENT_WIDTH(s1, sew) \ - do { \ - if (sew == VECTOR_SEWNA) { \ - } else if (sew == VECTOR_SEWANY && dyn->vector_sew != VECTOR_SEWNA) { \ - } else if (sew == dyn->vector_sew) { \ - } else { \ - vector_vsetvl_emul1(dyn, ninst, s1, sew); \ - } \ - dyn->vector_sew = sew; \ +#define SET_ELEMENT_WIDTH(s1, sew, set) \ + do { \ + if (sew == VECTOR_SEWANY && dyn->vector_sew != VECTOR_SEWNA) { \ + dyn->vector_eew = dyn->vector_sew; \ + } else if (sew == dyn->vector_sew) { \ + dyn->vector_eew = dyn->vector_sew; \ + } else { \ + dyn->vector_eew = vector_vsetvl_emul1(dyn, ninst, s1, sew); \ + } \ + if (set) dyn->vector_sew = dyn->vector_eew; \ } while (0) #endif @@ -1440,7 +1441,7 @@ void CacheTransform(dynarec_rv64_t* dyn, int ninst, int cacheupd, int s1, int s2 void rv64_move64(dynarec_rv64_t* dyn, int ninst, int reg, int64_t val); void rv64_move32(dynarec_rv64_t* dyn, int ninst, int reg, int32_t val, int zeroup); -void vector_vsetvl_emul1(dynarec_rv64_t* dyn, int ninst, int s1, int sew); +int vector_vsetvl_emul1(dynarec_rv64_t* dyn, int ninst, int s1, int sew); #if STEP < 2 #define CHECK_CACHE() 0 diff --git a/src/dynarec/rv64/dynarec_rv64_pass0.h b/src/dynarec/rv64/dynarec_rv64_pass0.h index 90d383ee..782dae0b 100644 --- a/src/dynarec/rv64/dynarec_rv64_pass0.h +++ b/src/dynarec/rv64/dynarec_rv64_pass0.h @@ -56,21 +56,10 @@ dynarec_log(LOG_NONE, "\n"); \ } -#define DEFAULT_VECTOR \ - if (box64_dynarec_log >= LOG_INFO || box64_dynarec_dump || box64_dynarec_missing) { \ - dynarec_log(LOG_NONE, "%p: Dynarec fallback to scalar version because of %s Opcode" \ - " %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X", \ - (void*)ip, rex.is32bits ? "x86 " : "x64 ", \ - PKip(0), \ - PKip(1), PKip(2), PKip(3), \ - PKip(4), PKip(5), PKip(6), \ - PKip(7), PKip(8), PKip(9), \ - PKip(10), PKip(11), PKip(12), \ - PKip(13), PKip(14)); \ - printFunctionAddr(ip, " => "); \ - dynarec_log(LOG_NONE, "\n"); \ - } \ - return 0 - -#define SET_ELEMENT_WIDTH(s1, sew) \ - dyn->vector_sew = sew; +#define SET_ELEMENT_WIDTH(s1, sew, set) \ + do { \ + if (sew != VECTOR_SEWANY && set) \ + dyn->vector_sew = sew; \ + else if (dyn->vector_sew == VECTOR_SEWNA && set) \ + dyn->vector_sew = VECTOR_SEW8; \ + } while (0) diff --git a/src/dynarec/rv64/dynarec_rv64_pass3.h b/src/dynarec/rv64/dynarec_rv64_pass3.h index 1dce2bc4..5dc088a2 100644 --- a/src/dynarec/rv64/dynarec_rv64_pass3.h +++ b/src/dynarec/rv64/dynarec_rv64_pass3.h @@ -25,3 +25,19 @@ #define TABLE64(A, V) {int val64offset = Table64(dyn, (V), 3); MESSAGE(LOG_DUMP, " Table64: 0x%lx\n", (V)); AUIPC(A, SPLIT20(val64offset)); LD(A, A, SPLIT12(val64offset));} #define FTABLE64(A, V) {mmx87_regs_t v = {.d = V}; int val64offset = Table64(dyn, v.q, 3); MESSAGE(LOG_DUMP, " FTable64: %g\n", v.d); AUIPC(x1, SPLIT20(val64offset)); FLD(A, x1, SPLIT12(val64offset));} + +#define DEFAULT_VECTOR \ + if (box64_dynarec_log >= LOG_INFO || box64_dynarec_dump || box64_dynarec_missing) { \ + dynarec_log(LOG_NONE, "%p: Dynarec fallback to scalar version because of %s Opcode" \ + " %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X", \ + (void*)ip, rex.is32bits ? "x86 " : "x64 ", \ + PKip(0), \ + PKip(1), PKip(2), PKip(3), \ + PKip(4), PKip(5), PKip(6), \ + PKip(7), PKip(8), PKip(9), \ + PKip(10), PKip(11), PKip(12), \ + PKip(13), PKip(14)); \ + printFunctionAddr(ip, " => "); \ + dynarec_log(LOG_NONE, "\n"); \ + } \ + return 0 diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h index fa02ab9d..61737deb 100644 --- a/src/dynarec/rv64/dynarec_rv64_private.h +++ b/src/dynarec/rv64/dynarec_rv64_private.h @@ -154,6 +154,7 @@ typedef struct dynarec_rv64_s { uint8_t always_test; uint8_t abort; uint8_t vector_sew; + uint8_t vector_eew; // effective element width } dynarec_rv64_t; // convert idx (0..24) to reg index (10..31 0..1) diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index 55384ed7..fa27dd8b 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -1256,8 +1256,8 @@ f28–31 ft8–11 FP temporaries Caller #define VSE32_V(vs3, rs1, vm, nf) EMIT(I_type(((nf) << 9) | (vm << 5), rs1, 0b110, vs3, 0b0100111)) // ...000.00000.....110.....0100111 #define VSE64_V(vs3, rs1, vm, nf) EMIT(I_type(((nf) << 9) | (vm << 5), rs1, 0b111, vs3, 0b0100111)) // ...000.00000.....111.....0100111 -#define VLE_V(vd, rs1, sew, vm, nf) EMIT(I_type(((nf) << 9) | (vm << 5), rs1, (sew == 0b000 ? 0b000 : (0b100 | sew)), vd, 0b0000111)) -#define VSE_V(vd, rs1, sew, vm, nf) EMIT(I_type(((nf) << 9) | (vm << 5), rs1, (sew == 0b000 ? 0b000 : (0b100 | sew)), vs3, 0b0100111)) +#define VLE_V(vd, rs1, sew, vm, nf) EMIT(I_type(((nf) << 9) | (vm << 5), rs1, (sew == 0b000 ? 0b000 : (0b100 | sew)), vd, 0b0000111)) +#define VSE_V(vs3, rs1, sew, vm, nf) EMIT(I_type(((nf) << 9) | (vm << 5), rs1, (sew == 0b000 ? 0b000 : (0b100 | sew)), vs3, 0b0100111)) // Vector Indexed-Unordered Instructions (including segment part) // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#76-vector-indexed-instructions |