diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-09-17 01:38:37 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-09-16 19:38:37 +0200 |
| commit | b8798c580f570c9b75bcaddc285d0a449e2c67a0 (patch) | |
| tree | f775cd6323d4f5891a98f01f825c1a6472536bfd | |
| parent | 2b58e8a253afe59539a1413a4a86b126c1fc72ed (diff) | |
| download | box64-b8798c580f570c9b75bcaddc285d0a449e2c67a0.tar.gz box64-b8798c580f570c9b75bcaddc285d0a449e2c67a0.zip | |
[RV64_DYNAREC] Added more opcodes for vector (#1830)
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f_vector.c | 184 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.c | 4 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 2 |
3 files changed, 187 insertions, 3 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c index 0d25ecb0..5618e0b2 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c @@ -197,6 +197,190 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VXOR_VV(q0, q1, v0, VECTOR_UNMASKED); VSUB_VV(q0, v0, q0, VECTOR_UNMASKED); break; + case 0x20: + INST_NAME("PMOVSXBW Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); + GETGX_empty_vector(q0); + GETEX_vector(q1, 0, 0, VECTOR_SEW8); + fpu_get_scratch(dyn); // HACK: skip v3, for vector register group alignment! + v0 = fpu_get_scratch(dyn); + vector_vsetvli(dyn, ninst, x1, VECTOR_SEW8, VECTOR_LMUL1, 0.5); + VWADD_VX(v0, xZR, q1, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + VMV_V_V(q0, v0); + break; + case 0x21: + INST_NAME("PMOVSXBD Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); + GETGX_empty_vector(q0); + GETEX_vector(q1, 0, 0, VECTOR_SEW8); + fpu_get_scratch(dyn); // HACK: skip v3, for vector register group alignment! + v0 = fpu_get_scratch(dyn); + fpu_get_scratch(dyn); + v1 = fpu_get_scratch(dyn); + vector_vsetvli(dyn, ninst, x1, VECTOR_SEW8, VECTOR_LMUL1, 0.25); + VWADD_VX(v0, xZR, q1, VECTOR_UNMASKED); + vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 0.5); + VWADD_VX(v1, xZR, v0, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + VMV_V_V(q0, v1); + break; + case 0x22: + INST_NAME("PMOVSXBQ Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); + GETGX_empty_vector(q0); + GETEX_vector(q1, 0, 0, VECTOR_SEW8); + fpu_get_scratch(dyn); // HACK: skip v3, for vector register group alignment! + v0 = fpu_get_scratch(dyn); + fpu_get_scratch(dyn); + v1 = fpu_get_scratch(dyn); + vector_vsetvli(dyn, ninst, x1, VECTOR_SEW8, VECTOR_LMUL1, 0.125); + VWADD_VX(v0, xZR, q1, VECTOR_UNMASKED); + vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 0.25); + VWADD_VX(v1, xZR, v0, VECTOR_UNMASKED); + vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL1, 0.5); + VWADD_VX(v0, xZR, v1, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + VMV_V_V(q0, v0); + break; + case 0x23: + INST_NAME("PMOVSXWD Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + GETGX_empty_vector(q0); + GETEX_vector(q1, 0, 0, VECTOR_SEW16); + fpu_get_scratch(dyn); // HACK: skip v3, for vector register group alignment! + v0 = fpu_get_scratch(dyn); + vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 0.5); + VWADD_VX(v0, xZR, q1, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + VMV_V_V(q0, v0); + break; + case 0x24: + INST_NAME("PMOVSXWQ Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + GETGX_empty_vector(q0); + GETEX_vector(q1, 0, 0, VECTOR_SEW16); + fpu_get_scratch(dyn); // HACK: skip v3, for vector register group alignment! + v0 = fpu_get_scratch(dyn); + fpu_get_scratch(dyn); + v1 = fpu_get_scratch(dyn); + vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 0.25); + VWADD_VX(v0, xZR, q1, VECTOR_UNMASKED); + vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL1, 0.5); + VWADD_VX(v1, xZR, v0, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + VMV_V_V(q0, v1); + break; + case 0x25: + INST_NAME("PMOVSXDQ Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + GETGX_empty_vector(q0); + GETEX_vector(q1, 0, 0, VECTOR_SEW32); + fpu_get_scratch(dyn); // HACK: skip v3, for vector register group alignment! + v0 = fpu_get_scratch(dyn); + vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL1, 0.5); + VWADD_VX(v0, xZR, q1, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + VMV_V_V(q0, v0); + break; + case 0x30: + INST_NAME("PMOVZXBW Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); + GETGX_empty_vector(q0); + GETEX_vector(q1, 0, 0, VECTOR_SEW8); + fpu_get_scratch(dyn); // HACK: skip v3, for vector register group alignment! + v0 = fpu_get_scratch(dyn); + vector_vsetvli(dyn, ninst, x1, VECTOR_SEW8, VECTOR_LMUL1, 0.5); + VWADDU_VX(v0, xZR, q1, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + VMV_V_V(q0, v0); + break; + case 0x31: + INST_NAME("PMOVZXBD Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); + GETGX_empty_vector(q0); + GETEX_vector(q1, 0, 0, VECTOR_SEW8); + fpu_get_scratch(dyn); // HACK: skip v3, for vector register group alignment! + v0 = fpu_get_scratch(dyn); + fpu_get_scratch(dyn); + v1 = fpu_get_scratch(dyn); + vector_vsetvli(dyn, ninst, x1, VECTOR_SEW8, VECTOR_LMUL1, 0.25); + VWADDU_VX(v0, xZR, q1, VECTOR_UNMASKED); + vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 0.5); + VWADDU_VX(v1, xZR, v0, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + VMV_V_V(q0, v1); + break; + case 0x32: + INST_NAME("PMOVZXBQ Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); + GETGX_empty_vector(q0); + GETEX_vector(q1, 0, 0, VECTOR_SEW8); + fpu_get_scratch(dyn); // HACK: skip v3, for vector register group alignment! + v0 = fpu_get_scratch(dyn); + fpu_get_scratch(dyn); + v1 = fpu_get_scratch(dyn); + vector_vsetvli(dyn, ninst, x1, VECTOR_SEW8, VECTOR_LMUL1, 0.125); + VWADDU_VX(v0, xZR, q1, VECTOR_UNMASKED); + vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 0.25); + VWADDU_VX(v1, xZR, v0, VECTOR_UNMASKED); + vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL1, 0.5); + VWADDU_VX(v0, xZR, v1, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + VMV_V_V(q0, v0); + break; + case 0x33: + INST_NAME("PMOVZXWD Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + GETGX_empty_vector(q0); + GETEX_vector(q1, 0, 0, VECTOR_SEW16); + fpu_get_scratch(dyn); // HACK: skip v3, for vector register group alignment! + v0 = fpu_get_scratch(dyn); + vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 0.5); + VWADDU_VX(v0, xZR, q1, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + VMV_V_V(q0, v0); + break; + case 0x34: + INST_NAME("PMOVZXWQ Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + GETGX_empty_vector(q0); + GETEX_vector(q1, 0, 0, VECTOR_SEW16); + fpu_get_scratch(dyn); // HACK: skip v3, for vector register group alignment! + v0 = fpu_get_scratch(dyn); + fpu_get_scratch(dyn); + v1 = fpu_get_scratch(dyn); + vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 0.25); + VWADDU_VX(v0, xZR, q1, VECTOR_UNMASKED); + vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL1, 0.5); + VWADDU_VX(v1, xZR, v0, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + VMV_V_V(q0, v1); + break; + case 0x35: + INST_NAME("PMOVZXDQ Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + GETGX_empty_vector(q0); + GETEX_vector(q1, 0, 0, VECTOR_SEW32); + fpu_get_scratch(dyn); // HACK: skip v3, for vector register group alignment! + v0 = fpu_get_scratch(dyn); + vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL1, 0.5); + VWADDU_VX(v0, xZR, q1, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + VMV_V_V(q0, v0); + break; default: DEFAULT_VECTOR; } diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index 4fafa883..7c384fc8 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -2594,7 +2594,7 @@ void fpu_propagate_stack(dynarec_rv64_t* dyn, int ninst) } // Simple wrapper for vsetvli -int vector_vsetvli(dynarec_rv64_t* dyn, int ninst, int s1, int sew, int vlmul, int multiple) +int vector_vsetvli(dynarec_rv64_t* dyn, int ninst, int s1, int sew, int vlmul, float multiple) { if (sew == VECTOR_SEWNA) return VECTOR_SEW8; if (sew == VECTOR_SEWANY) sew = VECTOR_SEW8; @@ -2605,7 +2605,7 @@ int vector_vsetvli(dynarec_rv64_t* dyn, int ninst, int s1, int sew, int vlmul, i * * mu tu sew lmul */ uint32_t vtypei = (0b0 << 7) | (0b0 << 6) | (sew << 3) | vlmul; - ADDI(s1, xZR, (16 >> sew) * multiple); // TODO: it's possible to reuse s1 sometimes + ADDI(s1, xZR, (int)((float)(16 >> sew) * multiple)); // TODO: it's possible to reuse s1 sometimes VSETVLI(xZR, s1, vtypei); return sew; } diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index b12c1e00..0d1c3f7e 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -1442,7 +1442,7 @@ void CacheTransform(dynarec_rv64_t* dyn, int ninst, int cacheupd, int s1, int s2 void rv64_move64(dynarec_rv64_t* dyn, int ninst, int reg, int64_t val); void rv64_move32(dynarec_rv64_t* dyn, int ninst, int reg, int32_t val, int zeroup); -int vector_vsetvli(dynarec_rv64_t* dyn, int ninst, int s1, int sew, int vlmul, int multiple); +int vector_vsetvli(dynarec_rv64_t* dyn, int ninst, int s1, int sew, int vlmul, float multiple); #if STEP < 2 #define CHECK_CACHE() 0 |