diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-07-18 17:39:48 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-07-18 11:39:48 +0200 |
| commit | 81f92d8f62d0ef3f4b934dc9e0bf6d3f9ea8da92 (patch) | |
| tree | 05e079923e15295d93b50a0ae79ca6ad28fb0e65 /src | |
| parent | 427051d04c81e0cd47e1ebe6891b3d625d778cc2 (diff) | |
| download | box64-81f92d8f62d0ef3f4b934dc9e0bf6d3f9ea8da92.tar.gz box64-81f92d8f62d0ef3f4b934dc9e0bf6d3f9ea8da92.zip | |
[RV64_DYNAREC] Added 66 0F 38 00 PSHUFB for vector (#1697)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f_vector.c | 27 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.c | 4 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 5 |
3 files changed, 32 insertions, 4 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c index a2154076..deb95d26 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c @@ -58,7 +58,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i ed = (nextop & 7) + (rex.b << 3); v1 = sse_get_reg_vector(dyn, ninst, x1, ed, 0); v0 = sse_get_reg_empty_vector(dyn, ninst, x1, gd); - VOR_VV(v0, v1, v1, VECTOR_UNMASKED); + VMV_V_V(v0, v1); } else { SMREAD(); v0 = sse_get_reg_empty_vector(dyn, ninst, x1, gd); @@ -66,6 +66,29 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VLE8_V(v0, ed, VECTOR_UNMASKED, VECTOR_NFIELD1); } break; + case 0x38: // SSSE3 opcodes + nextop = F8; + switch (nextop) { + case 0x00: + INST_NAME("PSHUFB Gx, Ex"); + nextop = F8; + // FIXME + vector_vsetvl_emul1(dyn, ninst, x1, VECTOR_SEW8); + + GETGX_vector(q0, 1); + GETEX_vector(q1, 0, 0); + v0 = fpu_get_scratch(dyn); + v1 = fpu_get_scratch(dyn); + ADDI(x4, xZR, 0b000010001111); + VMV_V_X(v0, x4); // broadcast the mask + VAND_VV(v0, v0, q1, VECTOR_UNMASKED); + VRGATHER_VV(v1, v0, q0, VECTOR_UNMASKED); // registers cannot be overlapped!! + VMV_V_V(q0, v1); + break; + default: + DEFAULT_VECTOR; + } + break; case 0x6E: return 0; case 0x6F: @@ -77,7 +100,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i if (MODREG) { v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0); GETGX_empty_vector(v0); - VOR_VV(v0, v1, v1, VECTOR_UNMASKED); + VMV_V_V(v0, v1); } else { GETGX_empty_vector(v0); SMREAD(); diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index 5d389af3..4c132350 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -1959,7 +1959,7 @@ static void swapCache(dynarec_rv64_t* dyn, int ninst, int i, int j, extcache_t * if (!cache->extcache[i].v) { // a mov is enough, no need to swap MESSAGE(LOG_DUMP, "\t - Moving %d <- %d\n", i, j); - VOR_VV(i, j, j, VECTOR_UNMASKED); + VMV_V_V(i, j); cache->extcache[i].v = cache->extcache[j].v; cache->extcache[j].v = 0; return; @@ -2024,7 +2024,7 @@ static void loadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, int int j = i + 1; while (cache->extcache[j].v) ++j; MESSAGE(LOG_DUMP, "\t - Moving away %d\n", i); - VOR_VV(j, i, i, VECTOR_UNMASKED); + VMV_V_V(j, i); cache->extcache[j].v = cache->extcache[i].v; } else if (cache->extcache[i].v) { int single = 0; diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 7a565bd7..17cca67d 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -494,6 +494,11 @@ fixedaddress = 0; /* TODO: optimize this! */ \ } +// Get GX as a quad (might use x1) +#define GETGX_vector(a, w) \ + gd = ((nextop & 0x38) >> 3) + (rex.r << 3); \ + a = sse_get_reg_vector(dyn, ninst, x1, gd, w) + // Get EX as a quad, (x1 is used) #define GETEX_vector(a, w, D) \ if (MODREG) { \ |