diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-07-23 03:45:54 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-07-22 21:45:54 +0200 |
| commit | 9c1b015689e965fe117b13ee217345ea22c130ce (patch) | |
| tree | 4a6b1030ed97f05878f9bd92250b2a81c57d1c39 /src | |
| parent | 3b1fbef95ca3f6842fe4edc8f7c12f776eb0dccb (diff) | |
| download | box64-9c1b015689e965fe117b13ee217345ea22c130ce.tar.gz box64-9c1b015689e965fe117b13ee217345ea22c130ce.zip | |
[RV64_DYNAREC] Fixed vector infra (#1705)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f_vector.c | 35 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.c | 33 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 12 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_emitter.h | 3 |
4 files changed, 51 insertions, 32 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c index 257d07b0..df823f23 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c @@ -51,17 +51,17 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i INST_NAME("MOVAPD Gx, Ex"); nextop = F8; GETG; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8); if (MODREG) { - SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY); ed = (nextop & 7) + (rex.b << 3); - v1 = sse_get_reg_vector(dyn, ninst, x1, ed, 0); + v1 = sse_get_reg_vector(dyn, ninst, x1, ed, 0, VECTOR_SEW8); v0 = sse_get_reg_empty_vector(dyn, ninst, x1, gd); VMV_V_V(v0, v1); } else { SMREAD(); v0 = sse_get_reg_empty_vector(dyn, ninst, x1, gd); addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 0, 0); - VL1RE64_V(v0, ed); + VLE8_V(v0, ed, VECTOR_UNMASKED, VECTOR_NFIELD1); } break; case 0x38: // SSSE3 opcodes @@ -71,8 +71,8 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i INST_NAME("PSHUFB Gx, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW8); - GETGX_vector(q0, 1); - GETEX_vector(q1, 0, 0); + GETGX_vector(q0, 1, VECTOR_SEW8); + GETEX_vector(q1, 0, 0, VECTOR_SEW8); v0 = fpu_get_scratch(dyn); v1 = fpu_get_scratch(dyn); ADDI(x4, xZR, 0b000010001111); @@ -89,18 +89,24 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i INST_NAME("PSIGNB Gx, Ex"); SET_ELEMENT_WIDTH(x1, VECTOR_SEW8); i32 = 7; + nextop = F8; + GETGX_vector(q0, 1, VECTOR_SEW8); + GETEX_vector(q1, 0, 0, VECTOR_SEW8); } else if (nextop == 0x09) { INST_NAME("PSIGNW Gx, Ex"); SET_ELEMENT_WIDTH(x1, VECTOR_SEW16); i32 = 15; + nextop = F8; + GETGX_vector(q0, 1, VECTOR_SEW16); + GETEX_vector(q1, 0, 0, VECTOR_SEW16); } else { INST_NAME("PSIGND Gx, Ex"); SET_ELEMENT_WIDTH(x1, VECTOR_SEW32); i32 = 31; + nextop = F8; + GETGX_vector(q0, 1, VECTOR_SEW32); + GETEX_vector(q1, 0, 0, VECTOR_SEW32); } - nextop = F8; - GETGX_vector(q0, 1); - GETEX_vector(q1, 0, 0); v0 = fpu_get_scratch(dyn); v1 = fpu_get_scratch(dyn); // absolute @@ -122,16 +128,16 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x6F: INST_NAME("MOVDQA Gx, Ex"); nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8); if (MODREG) { - SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY); - v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0); + v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW8); GETGX_empty_vector(v0); VMV_V_V(v0, v1); } else { GETGX_empty_vector(v0); SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 0, 0); - VL1RE64_V(v0, ed); + VLE8_V(v0, ed, VECTOR_UNMASKED, VECTOR_NFIELD1); } break; case 0x7E: @@ -139,15 +145,16 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0xEF: INST_NAME("PXOR Gx, Ex"); nextop = F8; - SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY); GETG; if (MODREG && gd == (nextop & 7) + (rex.b << 3)) { + SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY); // special case q0 = sse_get_reg_empty_vector(dyn, ninst, x1, gd); VXOR_VV(q0, q0, q0, VECTOR_UNMASKED); } else { - q0 = sse_get_reg_vector(dyn, ninst, x1, gd, 1); - GETEX_vector(q1, 0, 0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8); + q0 = sse_get_reg_vector(dyn, ninst, x1, gd, 1, VECTOR_SEW8); + GETEX_vector(q1, 0, 0, VECTOR_SEW8); VXOR_VV(q0, q0, q1, VECTOR_UNMASKED); } break; diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index 3229b3da..2a28a0bf 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -1653,13 +1653,13 @@ void sse_forget_reg(dynarec_rv64_t* dyn, int ninst, int s1, int a) } // get rvv register for a SSE reg, create the entry if needed -int sse_get_reg_vector(dynarec_rv64_t* dyn, int ninst, int s1, int a, int forwrite) +int sse_get_reg_vector(dynarec_rv64_t* dyn, int ninst, int s1, int a, int forwrite, int sew) { if (dyn->e.ssecache[a].v != -1) { if (dyn->e.ssecache[a].vector == 0) { // it's in the fpu, forget it first... sse_forget_reg(dyn, ninst, s1, a); - return sse_get_reg_vector(dyn, ninst, s1, a, forwrite); + return sse_get_reg_vector(dyn, ninst, s1, a, forwrite, sew); } if (forwrite) { @@ -1675,7 +1675,7 @@ int sse_get_reg_vector(dynarec_rv64_t* dyn, int ninst, int s1, int a, int forwri dyn->e.ssecache[a].vector = 1; dyn->e.ssecache[a].single = 0; // just to be clean ADDI(s1, xEmu, offsetof(x64emu_t, xmm[a])); - VL1RE64_V(ret, s1); + VLE_V(ret, s1, sew, VECTOR_UNMASKED, VECTOR_NFIELD1); return ret; } @@ -1709,8 +1709,9 @@ void sse_forget_reg_vector(dynarec_rv64_t* dyn, int ninst, int s1, int a) if (dyn->e.ssecache[a].vector == 0) return sse_forget_reg(dyn, ninst, s1, a); if (dyn->e.extcache[EXTIDX(dyn->e.ssecache[a].reg)].t == EXT_CACHE_XMMW) { + SET_ELEMENT_WIDTH(s1, VECTOR_SEW8); ADDI(s1, xEmu, offsetof(x64emu_t, xmm[a])); - VS1R_V(dyn->e.ssecache[a].reg, s1); + VSE8_V(dyn->e.ssecache[a].reg, s1, VECTOR_UNMASKED, VECTOR_NFIELD1); } fpu_free_reg(dyn, dyn->e.ssecache[a].reg); dyn->e.ssecache[a].v = -1; @@ -1728,8 +1729,9 @@ void sse_purge07cache(dynarec_rv64_t* dyn, int ninst, int s1) ++old; } if (dyn->e.ssecache[i].vector) { + SET_ELEMENT_WIDTH(s1, VECTOR_SEW8); ADDI(s1, xEmu, offsetof(x64emu_t, xmm[i])); - VS1R_V(dyn->e.ssecache[i].reg, s1); + VSE8_V(dyn->e.ssecache[i].reg, s1, VECTOR_UNMASKED, VECTOR_NFIELD1); } else if (dyn->e.ssecache[i].single) FSW(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i])); else @@ -1753,8 +1755,9 @@ static void sse_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1) ++old; } if (dyn->e.ssecache[i].vector) { + SET_ELEMENT_WIDTH(s1, VECTOR_SEW8); ADDI(s1, xEmu, offsetof(x64emu_t, xmm[i])); - VS1R_V(dyn->e.ssecache[i].reg, s1); + VSE8_V(dyn->e.ssecache[i].reg, s1, VECTOR_UNMASKED, VECTOR_NFIELD1); } else if (dyn->e.ssecache[i].single) FSW(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i])); else @@ -1783,8 +1786,9 @@ static void sse_reflectcache(dynarec_rv64_t* dyn, int ninst, int s1) for (int i = 0; i < 16; ++i) if (dyn->e.ssecache[i].v != -1) { if (dyn->e.ssecache[i].vector) { + SET_ELEMENT_WIDTH(s1, VECTOR_SEW8); ADDI(s1, xEmu, offsetof(x64emu_t, xmm[i])); - VS1R_V(dyn->e.ssecache[i].reg, s1); + VSE8_V(dyn->e.ssecache[i].reg, s1, VECTOR_UNMASKED, VECTOR_NFIELD1); } else if (dyn->e.ssecache[i].single) FSW(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i])); else @@ -1797,8 +1801,9 @@ void sse_reflect_reg(dynarec_rv64_t* dyn, int ninst, int s1, int a) if (dyn->e.ssecache[a].v == -1) return; if (dyn->e.ssecache[a].vector) { + SET_ELEMENT_WIDTH(s1, VECTOR_SEW8); ADDI(s1, xEmu, offsetof(x64emu_t, xmm[a])); - VS1R_V(dyn->e.ssecache[a].reg, s1); + VSE8_V(dyn->e.ssecache[a].reg, s1, VECTOR_UNMASKED, VECTOR_NFIELD1); } else if (dyn->e.ssecache[a].single) FSW(dyn->e.ssecache[a].reg, xEmu, offsetof(x64emu_t, xmm[a])); else @@ -1820,8 +1825,9 @@ void fpu_pushcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07) for (int i=start; i<8; ++i) if(dyn->e.ssecache[i].v!=-1) { if (dyn->e.ssecache[i].vector) { + SET_ELEMENT_WIDTH(s1, VECTOR_SEW8); ADDI(s1, xEmu, offsetof(x64emu_t, xmm[i])); - VS1R_V(dyn->e.ssecache[i].reg, s1); + VSE8_V(dyn->e.ssecache[i].reg, s1, VECTOR_UNMASKED, VECTOR_NFIELD1); } else if (dyn->e.ssecache[i].single) FSW(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i])); else @@ -1866,8 +1872,9 @@ void fpu_popcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07) for (int i=start; i<8; ++i) if(dyn->e.ssecache[i].v!=-1) { if (dyn->e.ssecache[i].vector) { + SET_ELEMENT_WIDTH(s1, VECTOR_SEW8); ADDI(s1, xEmu, offsetof(x64emu_t, xmm[i])); - VL1RE64_V(dyn->e.ssecache[i].reg, s1); + VLE8_V(dyn->e.ssecache[i].reg, s1, VECTOR_UNMASKED, VECTOR_NFIELD1); } else if (dyn->e.ssecache[i].single) FLW(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i])); else @@ -2046,8 +2053,9 @@ static void loadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, int case EXT_CACHE_XMMR: case EXT_CACHE_XMMW: MESSAGE(LOG_DUMP, "\t - Loading %s\n", getCacheName(t, n)); + SET_ELEMENT_WIDTH(s1, VECTOR_SEW8); ADDI(s1, xEmu, offsetof(x64emu_t, xmm[n])); - VL1RE64_V(i, s1); + VLE8_V(i, s1, VECTOR_UNMASKED, VECTOR_NFIELD1); break; case EXT_CACHE_SS: MESSAGE(LOG_DUMP, "\t - Loading %s\n", getCacheName(t, n)); @@ -2105,8 +2113,9 @@ static void unloadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, i break; case EXT_CACHE_XMMW: MESSAGE(LOG_DUMP, "\t - Unloading %s\n", getCacheName(t, n)); + SET_ELEMENT_WIDTH(s1, VECTOR_SEW8); ADDI(s1, xEmu, offsetof(x64emu_t, xmm[n])); - VS1R_V(i, s1); + VSE8_V(i, s1, VECTOR_UNMASKED, VECTOR_NFIELD1); break; case EXT_CACHE_SS: MESSAGE(LOG_DUMP, "\t - Unloading %s\n", getCacheName(t, n)); diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index e3c3bbf2..16098141 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -495,20 +495,20 @@ } // Get GX as a quad (might use x1) -#define GETGX_vector(a, w) \ +#define GETGX_vector(a, w, sew) \ gd = ((nextop & 0x38) >> 3) + (rex.r << 3); \ - a = sse_get_reg_vector(dyn, ninst, x1, gd, w) + a = sse_get_reg_vector(dyn, ninst, x1, gd, w, sew) // Get EX as a quad, (x1 is used) -#define GETEX_vector(a, w, D) \ +#define GETEX_vector(a, w, D, sew) \ if (MODREG) { \ - a = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), w); \ + a = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), w, sew); \ } else { \ SMREAD(); \ addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 1, D); \ a = fpu_get_scratch(dyn); \ ADDI(x2, ed, fixedaddress); \ - VL1RE64_V(a, x2); \ + VLE_V(a, x2, sew, VECTOR_UNMASKED, VECTOR_NFIELD1); \ } #define GETGM() \ @@ -1486,7 +1486,7 @@ void mmx_forget_reg(dynarec_rv64_t* dyn, int ninst, int a); // get float register for a SSE reg, create the entry if needed int sse_get_reg(dynarec_rv64_t* dyn, int ninst, int s1, int a, int single); // get rvv register for a SSE reg, create the entry if needed -int sse_get_reg_vector(dynarec_rv64_t* dyn, int ninst, int s1, int a, int forwrite); +int sse_get_reg_vector(dynarec_rv64_t* dyn, int ninst, int s1, int a, int forwrite, int sew); // get float register for a SSE reg, but don't try to synch it if it needed to be created int sse_get_reg_empty(dynarec_rv64_t* dyn, int ninst, int s1, int a, int single); // get rvv register for an SSE reg, but don't try to synch it if it needed to be created diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index 959308fb..abf718df 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -1256,6 +1256,9 @@ f28–31 ft8–11 FP temporaries Caller #define VSE32_V(vs3, rs1, vm, nf) EMIT(I_type(((nf) << 9) | (vm << 5), rs1, 0b110, vs3, 0b0100111)) // ...000.00000.....110.....0100111 #define VSE64_V(vs3, rs1, vm, nf) EMIT(I_type(((nf) << 9) | (vm << 5), rs1, 0b111, vs3, 0b0100111)) // ...000.00000.....111.....0100111 +#define VLE_V(vd, rs1, sew, vm, nf) EMIT(I_type(((nf) << 9) | (vm << 5), rs1, (sew == 0b000 ? 0b000 : (0b100 | sew)), vd, 0b0000111)) +#define VSE_V(vd, rs1, sew, vm, nf) EMIT(I_type(((nf) << 9) | (vm << 5), rs1, (sew == 0b000 ? 0b000 : (0b100 | sew)), vs3, 0b0100111)) + // Vector Indexed-Unordered Instructions (including segment part) // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#76-vector-indexed-instructions |