diff options
Diffstat (limited to 'target/riscv/vector_helper.c')
| -rw-r--r-- | target/riscv/vector_helper.c | 152 |
1 files changed, 129 insertions, 23 deletions
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index a96fc49c71..d224861c2c 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -127,6 +127,11 @@ static inline uint32_t vext_vta(uint32_t desc) return FIELD_EX32(simd_data(desc), VDATA, VTA); } +static inline uint32_t vext_vma(uint32_t desc) +{ + return FIELD_EX32(simd_data(desc), VDATA, VMA); +} + static inline uint32_t vext_vta_all_1s(uint32_t desc) { return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S); @@ -278,14 +283,18 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, uint32_t esz = 1 << log2_esz; uint32_t total_elems = vext_get_total_elems(env, desc, esz); uint32_t vta = vext_vta(desc); + uint32_t vma = vext_vma(desc); for (i = env->vstart; i < env->vl; i++, env->vstart++) { - if (!vm && !vext_elem_mask(v0, i)) { - continue; - } - k = 0; while (k < nf) { + if (!vm && !vext_elem_mask(v0, i)) { + /* set masked-off elements to 1s */ + vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, + (i + k * max_elems + 1) * esz); + k++; + continue; + } target_ulong addr = base + stride * i + (k << log2_esz); ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); k++; @@ -477,15 +486,19 @@ vext_ldst_index(void *vd, void *v0, target_ulong base, uint32_t esz = 1 << log2_esz; uint32_t total_elems = vext_get_total_elems(env, desc, esz); uint32_t vta = vext_vta(desc); + uint32_t vma = vext_vma(desc); /* load bytes from guest memory */ for (i = env->vstart; i < env->vl; i++, env->vstart++) { - if (!vm && !vext_elem_mask(v0, i)) { - continue; - } - k = 0; while (k < nf) { + if (!vm && !vext_elem_mask(v0, i)) { + /* set masked-off elements to 1s */ + vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, + (i + k * max_elems + 1) * esz); + k++; + continue; + } abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz); ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); k++; @@ -574,6 +587,7 @@ vext_ldff(void *vd, void *v0, target_ulong base, uint32_t esz = 1 << log2_esz; uint32_t total_elems = vext_get_total_elems(env, desc, esz); uint32_t vta = vext_vta(desc); + uint32_t vma = vext_vma(desc); target_ulong addr, offset, remain; /* probe every access*/ @@ -619,10 +633,14 @@ ProbeSuccess: } for (i = env->vstart; i < env->vl; i++) { k = 0; - if (!vm && !vext_elem_mask(v0, i)) { - continue; - } while (k < nf) { + if (!vm && !vext_elem_mask(v0, i)) { + /* set masked-off elements to 1s */ + vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, + (i + k * max_elems + 1) * esz); + k++; + continue; + } target_ulong addr = base + ((i * nf + k) << log2_esz); ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); k++; @@ -812,10 +830,13 @@ static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, uint32_t vl = env->vl; uint32_t total_elems = vext_get_total_elems(env, desc, esz); uint32_t vta = vext_vta(desc); + uint32_t vma = vext_vma(desc); uint32_t i; for (i = env->vstart; i < vl; i++) { if (!vm && !vext_elem_mask(v0, i)) { + /* set masked-off elements to 1s */ + vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); continue; } fn(vd, vs1, vs2, i); @@ -878,10 +899,13 @@ static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, uint32_t vl = env->vl; uint32_t total_elems = vext_get_total_elems(env, desc, esz); uint32_t vta = vext_vta(desc); + uint32_t vma = vext_vma(desc); uint32_t i; for (i = env->vstart; i < vl; i++) { if (!vm && !vext_elem_mask(v0, i)) { + /* set masked-off elements to 1s */ + vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); continue; } fn(vd, s1, vs2, i); @@ -1274,10 +1298,13 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ uint32_t esz = sizeof(TS1); \ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ uint32_t vta = vext_vta(desc); \ + uint32_t vma = vext_vma(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ if (!vm && !vext_elem_mask(v0, i)) { \ + /* set masked-off elements to 1s */ \ + vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ continue; \ } \ TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ @@ -1315,10 +1342,14 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ uint32_t total_elems = \ vext_get_total_elems(env, desc, esz); \ uint32_t vta = vext_vta(desc); \ + uint32_t vma = vext_vma(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ if (!vm && !vext_elem_mask(v0, i)) { \ + /* set masked-off elements to 1s */ \ + vext_set_elems_1s(vd, vma, i * esz, \ + (i + 1) * esz); \ continue; \ } \ TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ @@ -1373,12 +1404,17 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ uint32_t vl = env->vl; \ uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ uint32_t vta_all_1s = vext_vta_all_1s(desc); \ + uint32_t vma = vext_vma(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ if (!vm && !vext_elem_mask(v0, i)) { \ + /* set masked-off elements to 1s */ \ + if (vma) { \ + vext_set_elem_mask(vd, i, 1); \ + } \ continue; \ } \ vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ @@ -1431,11 +1467,16 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ uint32_t vl = env->vl; \ uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ uint32_t vta_all_1s = vext_vta_all_1s(desc); \ + uint32_t vma = vext_vma(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ if (!vm && !vext_elem_mask(v0, i)) { \ + /* set masked-off elements to 1s */ \ + if (vma) { \ + vext_set_elem_mask(vd, i, 1); \ + } \ continue; \ } \ vext_set_elem_mask(vd, i, \ @@ -2088,10 +2129,12 @@ static inline void vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, CPURISCVState *env, uint32_t vl, uint32_t vm, int vxrm, - opivv2_rm_fn *fn) + opivv2_rm_fn *fn, uint32_t vma, uint32_t esz) { for (uint32_t i = env->vstart; i < vl; i++) { if (!vm && !vext_elem_mask(v0, i)) { + /* set masked-off elements to 1s */ + vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); continue; } fn(vd, vs1, vs2, i, env, vxrm); @@ -2109,23 +2152,24 @@ vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, uint32_t vl = env->vl; uint32_t total_elems = vext_get_total_elems(env, desc, esz); uint32_t vta = vext_vta(desc); + uint32_t vma = vext_vma(desc); switch (env->vxrm) { case 0: /* rnu */ vext_vv_rm_1(vd, v0, vs1, vs2, - env, vl, vm, 0, fn); + env, vl, vm, 0, fn, vma, esz); break; case 1: /* rne */ vext_vv_rm_1(vd, v0, vs1, vs2, - env, vl, vm, 1, fn); + env, vl, vm, 1, fn, vma, esz); break; case 2: /* rdn */ vext_vv_rm_1(vd, v0, vs1, vs2, - env, vl, vm, 2, fn); + env, vl, vm, 2, fn, vma, esz); break; default: /* rod */ vext_vv_rm_1(vd, v0, vs1, vs2, - env, vl, vm, 3, fn); + env, vl, vm, 3, fn, vma, esz); break; } /* set tail elements to 1s */ @@ -2209,10 +2253,12 @@ static inline void vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, CPURISCVState *env, uint32_t vl, uint32_t vm, int vxrm, - opivx2_rm_fn *fn) + opivx2_rm_fn *fn, uint32_t vma, uint32_t esz) { for (uint32_t i = env->vstart; i < vl; i++) { if (!vm && !vext_elem_mask(v0, i)) { + /* set masked-off elements to 1s */ + vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); continue; } fn(vd, s1, vs2, i, env, vxrm); @@ -2230,23 +2276,24 @@ vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, uint32_t vl = env->vl; uint32_t total_elems = vext_get_total_elems(env, desc, esz); uint32_t vta = vext_vta(desc); + uint32_t vma = vext_vma(desc); switch (env->vxrm) { case 0: /* rnu */ vext_vx_rm_1(vd, v0, s1, vs2, - env, vl, vm, 0, fn); + env, vl, vm, 0, fn, vma, esz); break; case 1: /* rne */ vext_vx_rm_1(vd, v0, s1, vs2, - env, vl, vm, 1, fn); + env, vl, vm, 1, fn, vma, esz); break; case 2: /* rdn */ vext_vx_rm_1(vd, v0, s1, vs2, - env, vl, vm, 2, fn); + env, vl, vm, 2, fn, vma, esz); break; default: /* rod */ vext_vx_rm_1(vd, v0, s1, vs2, - env, vl, vm, 3, fn); + env, vl, vm, 3, fn, vma, esz); break; } /* set tail elements to 1s */ @@ -3004,10 +3051,14 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ uint32_t total_elems = \ vext_get_total_elems(env, desc, ESZ); \ uint32_t vta = vext_vta(desc); \ + uint32_t vma = vext_vma(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ if (!vm && !vext_elem_mask(v0, i)) { \ + /* set masked-off elements to 1s */ \ + vext_set_elems_1s(vd, vma, i * ESZ, \ + (i + 1) * ESZ); \ continue; \ } \ do_##NAME(vd, vs1, vs2, i, env); \ @@ -3043,10 +3094,14 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ uint32_t total_elems = \ vext_get_total_elems(env, desc, ESZ); \ uint32_t vta = vext_vta(desc); \ + uint32_t vma = vext_vma(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ if (!vm && !vext_elem_mask(v0, i)) { \ + /* set masked-off elements to 1s */ \ + vext_set_elems_1s(vd, vma, i * ESZ, \ + (i + 1) * ESZ); \ continue; \ } \ do_##NAME(vd, s1, vs2, i, env); \ @@ -3618,6 +3673,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ uint32_t total_elems = \ vext_get_total_elems(env, desc, ESZ); \ uint32_t vta = vext_vta(desc); \ + uint32_t vma = vext_vma(desc); \ uint32_t i; \ \ if (vl == 0) { \ @@ -3625,6 +3681,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ } \ for (i = env->vstart; i < vl; i++) { \ if (!vm && !vext_elem_mask(v0, i)) { \ + /* set masked-off elements to 1s */ \ + vext_set_elems_1s(vd, vma, i * ESZ, \ + (i + 1) * ESZ); \ continue; \ } \ do_##NAME(vd, vs2, i, env); \ @@ -4135,12 +4194,17 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ uint32_t vl = env->vl; \ uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ uint32_t vta_all_1s = vext_vta_all_1s(desc); \ + uint32_t vma = vext_vma(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ if (!vm && !vext_elem_mask(v0, i)) { \ + /* set masked-off elements to 1s */ \ + if (vma) { \ + vext_set_elem_mask(vd, i, 1); \ + } \ continue; \ } \ vext_set_elem_mask(vd, i, \ @@ -4168,11 +4232,16 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ uint32_t vl = env->vl; \ uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ uint32_t vta_all_1s = vext_vta_all_1s(desc); \ + uint32_t vma = vext_vma(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ if (!vm && !vext_elem_mask(v0, i)) { \ + /* set masked-off elements to 1s */ \ + if (vma) { \ + vext_set_elem_mask(vd, i, 1); \ + } \ continue; \ } \ vext_set_elem_mask(vd, i, \ @@ -4295,10 +4364,14 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ uint32_t total_elems = \ vext_get_total_elems(env, desc, ESZ); \ uint32_t vta = vext_vta(desc); \ + uint32_t vma = vext_vma(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ if (!vm && !vext_elem_mask(v0, i)) { \ + /* set masked-off elements to 1s */ \ + vext_set_elems_1s(vd, vma, i * ESZ, \ + (i + 1) * ESZ); \ continue; \ } \ do_##NAME(vd, vs2, i); \ @@ -4806,11 +4879,16 @@ static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, uint32_t vl = env->vl; uint32_t total_elems = env_archcpu(env)->cfg.vlen; uint32_t vta_all_1s = vext_vta_all_1s(desc); + uint32_t vma = vext_vma(desc); int i; bool first_mask_bit = false; for (i = env->vstart; i < vl; i++) { if (!vm && !vext_elem_mask(v0, i)) { + /* set masked-off elements to 1s */ + if (vma) { + vext_set_elem_mask(vd, i, 1); + } continue; } /* write a zero to all following active elements */ @@ -4871,11 +4949,14 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ uint32_t esz = sizeof(ETYPE); \ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ uint32_t vta = vext_vta(desc); \ + uint32_t vma = vext_vma(desc); \ uint32_t sum = 0; \ int i; \ \ for (i = env->vstart; i < vl; i++) { \ if (!vm && !vext_elem_mask(v0, i)) { \ + /* set masked-off elements to 1s */ \ + vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ continue; \ } \ *((ETYPE *)vd + H(i)) = sum; \ @@ -4902,10 +4983,13 @@ void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ uint32_t esz = sizeof(ETYPE); \ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ uint32_t vta = vext_vta(desc); \ + uint32_t vma = vext_vma(desc); \ int i; \ \ for (i = env->vstart; i < vl; i++) { \ if (!vm && !vext_elem_mask(v0, i)) { \ + /* set masked-off elements to 1s */ \ + vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ continue; \ } \ *((ETYPE *)vd + H(i)) = i; \ @@ -4934,11 +5018,14 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ uint32_t esz = sizeof(ETYPE); \ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ uint32_t vta = vext_vta(desc); \ + uint32_t vma = vext_vma(desc); \ target_ulong offset = s1, i_min, i; \ \ i_min = MAX(env->vstart, offset); \ for (i = i_min; i < vl; i++) { \ if (!vm && !vext_elem_mask(v0, i)) { \ + /* set masked-off elements to 1s */ \ + vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ continue; \ } \ *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ @@ -4963,13 +5050,17 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ uint32_t esz = sizeof(ETYPE); \ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ uint32_t vta = vext_vta(desc); \ + uint32_t vma = vext_vma(desc); \ target_ulong i_max, i; \ \ i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \ for (i = env->vstart; i < i_max; ++i) { \ - if (vm || vext_elem_mask(v0, i)) { \ - *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ + if (!vm && !vext_elem_mask(v0, i)) { \ + /* set masked-off elements to 1s */ \ + vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ + continue; \ } \ + *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ } \ \ for (i = i_max; i < vl; ++i) { \ @@ -4999,10 +5090,13 @@ static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ uint32_t esz = sizeof(ETYPE); \ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ uint32_t vta = vext_vta(desc); \ + uint32_t vma = vext_vma(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ if (!vm && !vext_elem_mask(v0, i)) { \ + /* set masked-off elements to 1s */ \ + vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ continue; \ } \ if (i == 0) { \ @@ -5044,10 +5138,13 @@ static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ uint32_t esz = sizeof(ETYPE); \ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ uint32_t vta = vext_vta(desc); \ + uint32_t vma = vext_vma(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ if (!vm && !vext_elem_mask(v0, i)) { \ + /* set masked-off elements to 1s */ \ + vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ continue; \ } \ if (i == vl - 1) { \ @@ -5115,11 +5212,14 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ uint32_t esz = sizeof(TS2); \ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ uint32_t vta = vext_vta(desc); \ + uint32_t vma = vext_vma(desc); \ uint64_t index; \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ if (!vm && !vext_elem_mask(v0, i)) { \ + /* set masked-off elements to 1s */ \ + vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ continue; \ } \ index = *((TS1 *)vs1 + HS1(i)); \ @@ -5155,11 +5255,14 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ uint32_t esz = sizeof(ETYPE); \ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ uint32_t vta = vext_vta(desc); \ + uint32_t vma = vext_vma(desc); \ uint64_t index = s1; \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ if (!vm && !vext_elem_mask(v0, i)) { \ + /* set masked-off elements to 1s */ \ + vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ continue; \ } \ if (index >= vlmax) { \ @@ -5234,10 +5337,13 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ uint32_t esz = sizeof(ETYPE); \ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ uint32_t vta = vext_vta(desc); \ + uint32_t vma = vext_vma(desc); \ uint32_t i; \ \ for (i = env->vstart; i < vl; i++) { \ if (!vm && !vext_elem_mask(v0, i)) { \ + /* set masked-off elements to 1s */ \ + vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ continue; \ } \ *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ |