diff options
Diffstat (limited to 'target/arm/tcg/sve_helper.c')
| -rw-r--r-- | target/arm/tcg/sve_helper.c | 109 |
1 files changed, 96 insertions, 13 deletions
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c index 43b872c7fd..803f0a094d 100644 --- a/target/arm/tcg/sve_helper.c +++ b/target/arm/tcg/sve_helper.c @@ -4484,33 +4484,35 @@ static TYPE FUNC##_reduce(TYPE *data, float_status *status, uintptr_t n) \ } \ } \ uint64_t helper_sve_##NAME##v_##SUF(void *vn, void *vg, \ - float_status *s, uint32_t desc) \ + float_status *status, uint32_t desc) \ { \ uintptr_t i, oprsz = simd_oprsz(desc), maxsz = simd_data(desc); \ TYPE data[sizeof(ARMVectorReg) / sizeof(TYPE)]; \ + TYPE ident = IDENT; \ for (i = 0; i < oprsz; ) { \ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ do { \ TYPE nn = *(TYPE *)(vn + H(i)); \ - *(TYPE *)((void *)data + i) = (pg & 1 ? nn : IDENT); \ + *(TYPE *)((void *)data + i) = (pg & 1 ? nn : ident); \ i += sizeof(TYPE), pg >>= sizeof(TYPE); \ } while (i & 15); \ } \ for (; i < maxsz; i += sizeof(TYPE)) { \ - *(TYPE *)((void *)data + i) = IDENT; \ + *(TYPE *)((void *)data + i) = ident; \ } \ - return FUNC##_reduce(data, s, maxsz / sizeof(TYPE)); \ + return FUNC##_reduce(data, status, maxsz / sizeof(TYPE)); \ } \ void helper_sve2p1_##NAME##qv_##SUF(void *vd, void *vn, void *vg, \ float_status *status, uint32_t desc) \ { \ unsigned oprsz = simd_oprsz(desc), segments = oprsz / 16; \ + TYPE ident = IDENT; \ for (unsigned e = 0; e < 16; e += sizeof(TYPE)) { \ TYPE data[ARM_MAX_VQ]; \ for (unsigned s = 0; s < segments; s++) { \ uint16_t pg = *(uint16_t *)(vg + H1_2(s * 2)); \ - TYPE nn = *(TYPE *)(vn + H(s * 16 + H(e))); \ - data[s] = (pg >> e) & 1 ? nn : IDENT; \ + TYPE nn = *(TYPE *)(vn + (s * 16 + H(e))); \ + data[s] = (pg >> e) & 1 ? nn : ident; \ } \ *(TYPE *)(vd + H(e)) = FUNC##_reduce(data, status, segments); \ } \ @@ -4521,14 +4523,17 @@ DO_REDUCE(fadd,h, float16, H1_2, float16_add, float16_zero) DO_REDUCE(fadd,s, float32, H1_4, float32_add, float32_zero) DO_REDUCE(fadd,d, float64, H1_8, float64_add, float64_zero) -/* Identity is floatN_default_nan, without the function call. */ -DO_REDUCE(fminnm,h, float16, H1_2, float16_minnum, 0x7E00) -DO_REDUCE(fminnm,s, float32, H1_4, float32_minnum, 0x7FC00000) -DO_REDUCE(fminnm,d, float64, H1_8, float64_minnum, 0x7FF8000000000000ULL) +/* + * We can't avoid the function call for the default NaN value, because + * it changes when FPCR.AH is set. + */ +DO_REDUCE(fminnm,h, float16, H1_2, float16_minnum, float16_default_nan(status)) +DO_REDUCE(fminnm,s, float32, H1_4, float32_minnum, float32_default_nan(status)) +DO_REDUCE(fminnm,d, float64, H1_8, float64_minnum, float64_default_nan(status)) -DO_REDUCE(fmaxnm,h, float16, H1_2, float16_maxnum, 0x7E00) -DO_REDUCE(fmaxnm,s, float32, H1_4, float32_maxnum, 0x7FC00000) -DO_REDUCE(fmaxnm,d, float64, H1_8, float64_maxnum, 0x7FF8000000000000ULL) +DO_REDUCE(fmaxnm,h, float16, H1_2, float16_maxnum, float16_default_nan(status)) +DO_REDUCE(fmaxnm,s, float32, H1_4, float32_maxnum, float32_default_nan(status)) +DO_REDUCE(fmaxnm,d, float64, H1_8, float64_maxnum, float64_default_nan(status)) DO_REDUCE(fmin,h, float16, H1_2, float16_min, float16_infinity) DO_REDUCE(fmin,s, float32, H1_4, float32_min, float32_infinity) @@ -4629,14 +4634,17 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \ } while (i != 0); \ } +DO_ZPZZ_FP(sve_fadd_b16, uint16_t, H1_2, bfloat16_add) DO_ZPZZ_FP(sve_fadd_h, uint16_t, H1_2, float16_add) DO_ZPZZ_FP(sve_fadd_s, uint32_t, H1_4, float32_add) DO_ZPZZ_FP(sve_fadd_d, uint64_t, H1_8, float64_add) +DO_ZPZZ_FP(sve_fsub_b16, uint16_t, H1_2, bfloat16_sub) DO_ZPZZ_FP(sve_fsub_h, uint16_t, H1_2, float16_sub) DO_ZPZZ_FP(sve_fsub_s, uint32_t, H1_4, float32_sub) DO_ZPZZ_FP(sve_fsub_d, uint64_t, H1_8, float64_sub) +DO_ZPZZ_FP(sve_fmul_b16, uint16_t, H1_2, bfloat16_mul) DO_ZPZZ_FP(sve_fmul_h, uint16_t, H1_2, float16_mul) DO_ZPZZ_FP(sve_fmul_s, uint32_t, H1_4, float32_mul) DO_ZPZZ_FP(sve_fmul_d, uint64_t, H1_8, float64_mul) @@ -4645,26 +4653,32 @@ DO_ZPZZ_FP(sve_fdiv_h, uint16_t, H1_2, float16_div) DO_ZPZZ_FP(sve_fdiv_s, uint32_t, H1_4, float32_div) DO_ZPZZ_FP(sve_fdiv_d, uint64_t, H1_8, float64_div) +DO_ZPZZ_FP(sve_fmin_b16, uint16_t, H1_2, bfloat16_min) DO_ZPZZ_FP(sve_fmin_h, uint16_t, H1_2, float16_min) DO_ZPZZ_FP(sve_fmin_s, uint32_t, H1_4, float32_min) DO_ZPZZ_FP(sve_fmin_d, uint64_t, H1_8, float64_min) +DO_ZPZZ_FP(sve_fmax_b16, uint16_t, H1_2, bfloat16_max) DO_ZPZZ_FP(sve_fmax_h, uint16_t, H1_2, float16_max) DO_ZPZZ_FP(sve_fmax_s, uint32_t, H1_4, float32_max) DO_ZPZZ_FP(sve_fmax_d, uint64_t, H1_8, float64_max) +DO_ZPZZ_FP(sve_ah_fmin_b16, uint16_t, H1_2, helper_sme2_ah_fmin_b16) DO_ZPZZ_FP(sve_ah_fmin_h, uint16_t, H1_2, helper_vfp_ah_minh) DO_ZPZZ_FP(sve_ah_fmin_s, uint32_t, H1_4, helper_vfp_ah_mins) DO_ZPZZ_FP(sve_ah_fmin_d, uint64_t, H1_8, helper_vfp_ah_mind) +DO_ZPZZ_FP(sve_ah_fmax_b16, uint16_t, H1_2, helper_sme2_ah_fmax_b16) DO_ZPZZ_FP(sve_ah_fmax_h, uint16_t, H1_2, helper_vfp_ah_maxh) DO_ZPZZ_FP(sve_ah_fmax_s, uint32_t, H1_4, helper_vfp_ah_maxs) DO_ZPZZ_FP(sve_ah_fmax_d, uint64_t, H1_8, helper_vfp_ah_maxd) +DO_ZPZZ_FP(sve_fminnum_b16, uint16_t, H1_2, bfloat16_minnum) DO_ZPZZ_FP(sve_fminnum_h, uint16_t, H1_2, float16_minnum) DO_ZPZZ_FP(sve_fminnum_s, uint32_t, H1_4, float32_minnum) DO_ZPZZ_FP(sve_fminnum_d, uint64_t, H1_8, float64_minnum) +DO_ZPZZ_FP(sve_fmaxnum_b16, uint16_t, H1_2, bfloat16_maxnum) DO_ZPZZ_FP(sve_fmaxnum_h, uint16_t, H1_2, float16_maxnum) DO_ZPZZ_FP(sve_fmaxnum_s, uint32_t, H1_4, float32_maxnum) DO_ZPZZ_FP(sve_fmaxnum_d, uint64_t, H1_8, float64_maxnum) @@ -5090,6 +5104,75 @@ DO_ZPZ_FP(flogb_d, float64, H1_8, do_float64_logb_as_int) #undef DO_ZPZ_FP +static void do_fmla_zpzzz_b16(void *vd, void *vn, void *vm, void *va, void *vg, + float_status *status, uint32_t desc, + uint16_t neg1, uint16_t neg3, int flags) +{ + intptr_t i = simd_oprsz(desc); + uint64_t *g = vg; + + do { + uint64_t pg = g[(i - 1) >> 6]; + do { + i -= 2; + if (likely((pg >> (i & 63)) & 1)) { + float16 e1, e2, e3, r; + + e1 = *(uint16_t *)(vn + H1_2(i)) ^ neg1; + e2 = *(uint16_t *)(vm + H1_2(i)); + e3 = *(uint16_t *)(va + H1_2(i)) ^ neg3; + r = bfloat16_muladd(e1, e2, e3, flags, status); + *(uint16_t *)(vd + H1_2(i)) = r; + } + } while (i & 63); + } while (i != 0); +} + +void HELPER(sve_fmla_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0, 0); +} + +void HELPER(sve_fmls_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0x8000, 0, 0); +} + +void HELPER(sve_fnmla_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, 0); +} + +void HELPER(sve_fnmls_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0x8000, 0); +} + +void HELPER(sve_ah_fmls_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0, + float_muladd_negate_product); +} + +void HELPER(sve_ah_fnmla_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0, + float_muladd_negate_product | float_muladd_negate_c); +} + +void HELPER(sve_ah_fnmls_zpzzz_b16)(void *vd, void *vn, void *vm, void *va, + void *vg, float_status *status, uint32_t desc) +{ + do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0, + float_muladd_negate_c); +} + static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg, float_status *status, uint32_t desc, uint16_t neg1, uint16_t neg3, int flags) |