diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-09-11 03:11:58 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-09-10 21:11:58 +0200 |
| commit | fc9900c8f6b29185f285c0f687d9a666206071d8 (patch) | |
| tree | fed26f37989e0abbc5e889e729588d73c16828cc /src | |
| parent | 629346b6a70833d0b2e3944abc67b1710875174d (diff) | |
| download | box64-fc9900c8f6b29185f285c0f687d9a666206071d8.tar.gz box64-fc9900c8f6b29185f285c0f687d9a666206071d8.zip | |
[RV64_DYNAREC] Added more 66 0F opcodes for vector (#1815)
* [RV64_DYNAREC] Added more 66 0F opcodes for vector * [RV64_DYNAREC] Fixed PACKUSWB for vlen >= 256
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f_vector.c | 67 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.c | 2 |
2 files changed, 66 insertions, 3 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c index 08842f23..96d4ea89 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c @@ -36,6 +36,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i int q0, q1; int d0, d1, d2; int64_t fixedaddress, gdoffset; + uint32_t vtypei; int unscaled; MAYUSE(d0); MAYUSE(d1); @@ -123,6 +124,60 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i DEFAULT_VECTOR; } break; + case 0x61: + INST_NAME("PUNPCKLWD Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + ADDI(x1, xZR, 0b10101010); + VMV_V_X(VMASK, x1); // VMASK = 0b10101010 + v0 = fpu_get_scratch(dyn); + VIOTA_M(v0, VMASK, VECTOR_UNMASKED); // v0 = 3 3 2 2 1 1 0 0 + GETGX_vector(q0, 1, VECTOR_SEW16); + GETEX_vector(q1, 0, 0, VECTOR_SEW16); + d0 = fpu_get_scratch(dyn); + d1 = fpu_get_scratch(dyn); + VRGATHER_VV(d0, v0, q0, VECTOR_UNMASKED); + VRGATHER_VV(d1, v0, q1, VECTOR_UNMASKED); + VMERGE_VVM(q0, d1, d0); + break; + case 0x67: + INST_NAME("PACKUSWB Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + GETGX_vector(q0, 1, VECTOR_SEW16); + GETEX_vector(q1, 0, 0, VECTOR_SEW16); + fpu_get_scratch(dyn); // HACK: skip v3, for vector register group alignment! + d0 = fpu_get_scratch(dyn); + d1 = fpu_get_scratch(dyn); + VMAX_VX(d0, xZR, q0, VECTOR_UNMASKED); + VMAX_VX(d1, xZR, q1, VECTOR_UNMASKED); + if (rv64_vlen >= 256) { + /* mu tu sew lmul=1 */ + vtypei = (0b0 << 7) | (0b0 << 6) | (VECTOR_SEW16 << 3) | 0b000; + ADDI(x1, xZR, 16); // double the vl for slideup. + VSETVLI(xZR, x1, vtypei); + VSLIDEUP_VI(d0, 8, d1, VECTOR_UNMASKED); // splice d0 and d1 here! + } + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); + VNCLIPU_WI(q0, 0, d0, VECTOR_UNMASKED); + break; + case 0x69: + INST_NAME("PUNPCKHWD Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + ADDI(x1, xZR, 0b10101010); + VMV_V_X(VMASK, x1); // VMASK = 0b10101010 + v0 = fpu_get_scratch(dyn); + VIOTA_M(v0, VMASK, VECTOR_UNMASKED); + VADD_VI(v0, 4, v0, VECTOR_UNMASKED); // v0 = 7 7 6 6 5 5 4 4 + GETGX_vector(q0, 1, VECTOR_SEW16); + GETEX_vector(q1, 0, 0, VECTOR_SEW16); + d0 = fpu_get_scratch(dyn); + d1 = fpu_get_scratch(dyn); + VRGATHER_VV(d0, v0, q0, VECTOR_UNMASKED); + VRGATHER_VV(d1, v0, q1, VECTOR_UNMASKED); + VMERGE_VVM(q0, d1, d0); + break; case 0x6C: INST_NAME("PUNPCKLQDQ Gx, Ex"); nextop = F8; @@ -190,18 +245,26 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } else { SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY, 1); q0 = sse_get_reg_vector(dyn, ninst, x1, gd, 1, dyn->vector_eew); - GETEX_vector(q1, 0, 0, VECTOR_SEW8); + GETEX_vector(q1, 0, 0, dyn->vector_eew); VXOR_VV(q0, q0, q1, VECTOR_UNMASKED); } break; case 0xD4: - INST_NAME("PADDQ Gx,Ex"); + INST_NAME("PADDQ Gx, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); GETGX_vector(q0, 1, VECTOR_SEW64); GETEX_vector(q1, 0, 0, VECTOR_SEW64); VADD_VV(q0, q0, q1, VECTOR_UNMASKED); break; + case 0xDB: + INST_NAME("PAND Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY, 1); + GETGX_vector(q0, 1, dyn->vector_eew); + GETEX_vector(q1, 0, 0, dyn->vector_eew); + VAND_VV(q0, q0, q1, VECTOR_UNMASKED); + break; default: DEFAULT_VECTOR; } diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index 1206004c..75ad99a8 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -2601,7 +2601,7 @@ int vector_vsetvl_emul1(dynarec_rv64_t* dyn, int ninst, int s1, int sew) * sew: selected element width * lmul: vector register group multiplier * - * mu tu sew lmul=1 */ + * mu tu sew lmul=1 */ uint32_t vtypei = (0b0 << 7) | (0b0 << 6) | (sew << 3) | 0b000; ADDI(s1, xZR, 16 >> sew); VSETVLI(xZR, s1, vtypei); |