diff options
Diffstat (limited to 'target/riscv/vector_helper.c')
| -rw-r--r-- | target/riscv/vector_helper.c | 317 |
1 files changed, 180 insertions, 137 deletions
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 2423affe37..f4d0438988 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -50,10 +50,7 @@ target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, } } - if ((sew > cpu->cfg.elen) - || vill - || (ediv != 0) - || (reserved != 0)) { + if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) { /* only set vill bit. */ env->vill = 1; env->vtype = 0; @@ -290,7 +287,7 @@ static void vext_set_tail_elems_1s(CPURISCVState *env, target_ulong vl, } /* - *** stride: access vector element from strided memory + * stride: access vector element from strided memory */ static void vext_ldst_stride(void *vd, void *v0, target_ulong base, @@ -356,10 +353,10 @@ GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) /* - *** unit-stride: access elements stored contiguously in memory + * unit-stride: access elements stored contiguously in memory */ -/* unmasked unit-stride load and store operation*/ +/* unmasked unit-stride load and store operation */ static void vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl, @@ -385,8 +382,8 @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, } /* - * masked unit-stride load and store operation will be a special case of stride, - * stride = NF * sizeof (MTYPE) + * masked unit-stride load and store operation will be a special case of + * stride, stride = NF * sizeof (MTYPE) */ #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ @@ -432,7 +429,7 @@ GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) /* - *** unit stride mask load and store, EEW = 1 + * unit stride mask load and store, EEW = 1 */ void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, CPURISCVState *env, uint32_t desc) @@ -453,7 +450,7 @@ void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, } /* - *** index: access vector element from indexed memory + * index: access vector element from indexed memory */ typedef target_ulong vext_get_index_addr(target_ulong base, uint32_t idx, void *vs2); @@ -557,7 +554,7 @@ GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) /* - *** unit-stride fault-only-fisrt load instructions + * unit-stride fault-only-fisrt load instructions */ static inline void vext_ldff(void *vd, void *v0, target_ulong base, @@ -574,7 +571,7 @@ vext_ldff(void *vd, void *v0, target_ulong base, uint32_t vma = vext_vma(desc); target_ulong addr, offset, remain; - /* probe every access*/ + /* probe every access */ for (i = env->vstart; i < env->vl; i++) { if (!vm && !vext_elem_mask(v0, i)) { continue; @@ -663,7 +660,7 @@ GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) /* - *** load and store whole register instructions + * load and store whole register instructions */ static void vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, @@ -681,7 +678,8 @@ vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, /* load/store rest of elements of current segment pointed by vstart */ for (pos = off; pos < max_elems; pos++, env->vstart++) { target_ulong addr = base + ((pos + k * max_elems) << log2_esz); - ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra); + ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, + ra); } k++; } @@ -736,7 +734,7 @@ GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b) GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) /* - *** Vector Integer Arithmetic Instructions + * Vector Integer Arithmetic Instructions */ /* expand macro args before macro */ @@ -1116,7 +1114,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ \ *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ } \ - env->vstart = 0; \ + env->vstart = 0; \ /* set tail elements to 1s */ \ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ } @@ -1152,8 +1150,10 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ } \ env->vstart = 0; \ - /* mask destination register are always tail-agnostic */ \ - /* set tail elements to 1s */ \ + /* + * mask destination register are always tail-agnostic + * set tail elements to 1s + */ \ if (vta_all_1s) { \ for (; i < total_elems; i++) { \ vext_set_elem_mask(vd, i, 1); \ @@ -1188,8 +1188,10 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ } \ env->vstart = 0; \ - /* mask destination register are always tail-agnostic */ \ - /* set tail elements to 1s */ \ + /* + * mask destination register are always tail-agnostic + * set tail elements to 1s + */ \ if (vta_all_1s) { \ for (; i < total_elems; i++) { \ vext_set_elem_mask(vd, i, 1); \ @@ -1305,10 +1307,13 @@ GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) -/* generate the helpers for shift instructions with one vector and one scalar */ +/* + * generate the helpers for shift instructions with one vector and one scalar + */ #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ - void *vs2, CPURISCVState *env, uint32_t desc) \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ @@ -1394,8 +1399,10 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ } \ env->vstart = 0; \ - /* mask destination register are always tail-agnostic */ \ - /* set tail elements to 1s */ \ + /* + * mask destination register are always tail-agnostic + * set tail elements to 1s + */ \ if (vta_all_1s) { \ for (; i < total_elems; i++) { \ vext_set_elem_mask(vd, i, 1); \ @@ -1457,8 +1464,10 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ DO_OP(s2, (ETYPE)(target_long)s1)); \ } \ env->vstart = 0; \ - /* mask destination register are always tail-agnostic */ \ - /* set tail elements to 1s */ \ + /* + * mask destination register are always tail-agnostic + * set tail elements to 1s + */ \ if (vta_all_1s) { \ for (; i < total_elems; i++) { \ vext_set_elem_mask(vd, i, 1); \ @@ -1735,9 +1744,9 @@ GEN_VEXT_VX(vmulhsu_vx_d, 8) /* Vector Integer Divide Instructions */ #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) -#define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\ +#define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \ unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) -#define DO_REM(N, M) (unlikely(M == 0) ? N :\ +#define DO_REM(N, M) (unlikely(M == 0) ? N : \ unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) @@ -1846,7 +1855,7 @@ GEN_VEXT_VX(vwmulsu_vx_h, 4) GEN_VEXT_VX(vwmulsu_vx_w, 8) /* Vector Single-Width Integer Multiply-Add Instructions */ -#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ +#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ { \ TX1 s1 = *((T1 *)vs1 + HS1(i)); \ @@ -2077,7 +2086,7 @@ GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) /* - *** Vector Fixed-Point Arithmetic Instructions + * Vector Fixed-Point Arithmetic Instructions */ /* Vector Single-Width Saturating Add and Subtract */ @@ -2159,7 +2168,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ do_##NAME, ESZ); \ } -static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) +static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, + uint8_t b) { uint8_t res = a + b; if (res < a) { @@ -2277,7 +2287,8 @@ vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, /* generate helpers for fixed point instructions with OPIVX format */ #define GEN_VEXT_VX_RM(NAME, ESZ) \ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ - void *vs2, CPURISCVState *env, uint32_t desc) \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ { \ vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ do_##NAME, ESZ); \ @@ -2302,7 +2313,8 @@ static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) return res; } -static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) +static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, + int16_t b) { int16_t res = a + b; if ((res ^ a) & (res ^ b) & INT16_MIN) { @@ -2312,7 +2324,8 @@ static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) return res; } -static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) +static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, + int32_t b) { int32_t res = a + b; if ((res ^ a) & (res ^ b) & INT32_MIN) { @@ -2322,7 +2335,8 @@ static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) return res; } -static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) +static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, + int64_t b) { int64_t res = a + b; if ((res ^ a) & (res ^ b) & INT64_MIN) { @@ -2350,7 +2364,8 @@ GEN_VEXT_VX_RM(vsadd_vx_h, 2) GEN_VEXT_VX_RM(vsadd_vx_w, 4) GEN_VEXT_VX_RM(vsadd_vx_d, 8) -static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) +static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, + uint8_t b) { uint8_t res = a - b; if (res > a) { @@ -2421,7 +2436,8 @@ static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) return res; } -static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) +static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, + int16_t b) { int16_t res = a - b; if ((res ^ a) & (a ^ b) & INT16_MIN) { @@ -2431,7 +2447,8 @@ static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) return res; } -static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) +static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, + int32_t b) { int32_t res = a - b; if ((res ^ a) & (a ^ b) & INT32_MIN) { @@ -2441,7 +2458,8 @@ static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) return res; } -static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) +static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, + int64_t b) { int64_t res = a - b; if ((res ^ a) & (a ^ b) & INT64_MIN) { @@ -2497,7 +2515,8 @@ static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) return 0; /* round-down (truncate) */ } -static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) +static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, + int32_t b) { int64_t res = (int64_t)a + b; uint8_t round = get_round(vxrm, res, 1); @@ -2505,7 +2524,8 @@ static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) return (res >> 1) + round; } -static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) +static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, + int64_t b) { int64_t res = a + b; uint8_t round = get_round(vxrm, res, 1); @@ -2570,7 +2590,8 @@ GEN_VEXT_VX_RM(vaaddu_vx_h, 2) GEN_VEXT_VX_RM(vaaddu_vx_w, 4) GEN_VEXT_VX_RM(vaaddu_vx_d, 8) -static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) +static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, + int32_t b) { int64_t res = (int64_t)a - b; uint8_t round = get_round(vxrm, res, 1); @@ -2578,7 +2599,8 @@ static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) return (res >> 1) + round; } -static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) +static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, + int64_t b) { int64_t res = (int64_t)a - b; uint8_t round = get_round(vxrm, res, 1); @@ -2651,7 +2673,7 @@ static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) res = (int16_t)a * (int16_t)b; round = get_round(vxrm, res, 7); - res = (res >> 7) + round; + res = (res >> 7) + round; if (res > INT8_MAX) { env->vxsat = 0x1; @@ -2671,7 +2693,7 @@ static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) res = (int32_t)a * (int32_t)b; round = get_round(vxrm, res, 15); - res = (res >> 15) + round; + res = (res >> 15) + round; if (res > INT16_MAX) { env->vxsat = 0x1; @@ -2691,7 +2713,7 @@ static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) res = (int64_t)a * (int64_t)b; round = get_round(vxrm, res, 31); - res = (res >> 31) + round; + res = (res >> 31) + round; if (res > INT32_MAX) { env->vxsat = 0x1; @@ -2758,7 +2780,7 @@ vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) uint8_t res; round = get_round(vxrm, a, shift); - res = (a >> shift) + round; + res = (a >> shift) + round; return res; } static inline uint16_t @@ -2862,7 +2884,7 @@ vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) int16_t res; round = get_round(vxrm, a, shift); - res = (a >> shift) + round; + res = (a >> shift) + round; if (res > INT8_MAX) { env->vxsat = 0x1; return INT8_MAX; @@ -2881,7 +2903,7 @@ vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) int32_t res; round = get_round(vxrm, a, shift); - res = (a >> shift) + round; + res = (a >> shift) + round; if (res > INT16_MAX) { env->vxsat = 0x1; return INT16_MAX; @@ -2900,7 +2922,7 @@ vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) int64_t res; round = get_round(vxrm, a, shift); - res = (a >> shift) + round; + res = (a >> shift) + round; if (res > INT32_MAX) { env->vxsat = 0x1; return INT32_MAX; @@ -2933,7 +2955,7 @@ vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) uint16_t res; round = get_round(vxrm, a, shift); - res = (a >> shift) + round; + res = (a >> shift) + round; if (res > UINT8_MAX) { env->vxsat = 0x1; return UINT8_MAX; @@ -2949,7 +2971,7 @@ vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) uint32_t res; round = get_round(vxrm, a, shift); - res = (a >> shift) + round; + res = (a >> shift) + round; if (res > UINT16_MAX) { env->vxsat = 0x1; return UINT16_MAX; @@ -2965,7 +2987,7 @@ vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) uint64_t res; round = get_round(vxrm, a, shift); - res = (a >> shift) + round; + res = (a >> shift) + round; if (res > UINT32_MAX) { env->vxsat = 0x1; return UINT32_MAX; @@ -2989,7 +3011,7 @@ GEN_VEXT_VX_RM(vnclipu_wx_h, 2) GEN_VEXT_VX_RM(vnclipu_wx_w, 4) /* - *** Vector Float Point Arithmetic Instructions + * Vector Float Point Arithmetic Instructions */ /* Vector Single-Width Floating-Point Add/Subtract Instructions */ #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ @@ -3052,7 +3074,7 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ uint32_t total_elems = \ - vext_get_total_elems(env, desc, ESZ); \ + vext_get_total_elems(env, desc, ESZ); \ uint32_t vta = vext_vta(desc); \ uint32_t vma = vext_vma(desc); \ uint32_t i; \ @@ -3118,13 +3140,13 @@ GEN_VEXT_VF(vfrsub_vf_d, 8) static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) { return float32_add(float16_to_float32(a, true, s), - float16_to_float32(b, true, s), s); + float16_to_float32(b, true, s), s); } static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) { return float64_add(float32_to_float64(a, s), - float32_to_float64(b, s), s); + float32_to_float64(b, s), s); } @@ -3140,13 +3162,13 @@ GEN_VEXT_VF(vfwadd_vf_w, 8) static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) { return float32_sub(float16_to_float32(a, true, s), - float16_to_float32(b, true, s), s); + float16_to_float32(b, true, s), s); } static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) { return float64_sub(float32_to_float64(a, s), - float32_to_float64(b, s), s); + float32_to_float64(b, s), s); } @@ -3250,13 +3272,13 @@ GEN_VEXT_VF(vfrdiv_vf_d, 8) static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) { return float32_mul(float16_to_float32(a, true, s), - float16_to_float32(b, true, s), s); + float16_to_float32(b, true, s), s); } static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) { return float64_mul(float32_to_float64(a, s), - float32_to_float64(b, s), s); + float32_to_float64(b, s), s); } RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) @@ -3271,7 +3293,7 @@ GEN_VEXT_VF(vfwmul_vf_w, 8) /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ - CPURISCVState *env) \ + CPURISCVState *env) \ { \ TX1 s1 = *((T1 *)vs1 + HS1(i)); \ TX2 s2 = *((T2 *)vs2 + HS2(i)); \ @@ -3303,7 +3325,7 @@ GEN_VEXT_VV_ENV(vfmacc_vv_d, 8) #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ - CPURISCVState *env) \ + CPURISCVState *env) \ { \ TX2 s2 = *((T2 *)vs2 + HS2(i)); \ TD d = *((TD *)vd + HD(i)); \ @@ -3319,20 +3341,20 @@ GEN_VEXT_VF(vfmacc_vf_d, 8) static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { - return float16_muladd(a, b, d, - float_muladd_negate_c | float_muladd_negate_product, s); + return float16_muladd(a, b, d, float_muladd_negate_c | + float_muladd_negate_product, s); } static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) { - return float32_muladd(a, b, d, - float_muladd_negate_c | float_muladd_negate_product, s); + return float32_muladd(a, b, d, float_muladd_negate_c | + float_muladd_negate_product, s); } static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) { - return float64_muladd(a, b, d, - float_muladd_negate_c | float_muladd_negate_product, s); + return float64_muladd(a, b, d, float_muladd_negate_c | + float_muladd_negate_product, s); } RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) @@ -3434,20 +3456,20 @@ GEN_VEXT_VF(vfmadd_vf_d, 8) static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) { - return float16_muladd(d, b, a, - float_muladd_negate_c | float_muladd_negate_product, s); + return float16_muladd(d, b, a, float_muladd_negate_c | + float_muladd_negate_product, s); } static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) { - return float32_muladd(d, b, a, - float_muladd_negate_c | float_muladd_negate_product, s); + return float32_muladd(d, b, a, float_muladd_negate_c | + float_muladd_negate_product, s); } static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) { - return float64_muladd(d, b, a, - float_muladd_negate_c | float_muladd_negate_product, s); + return float64_muladd(d, b, a, float_muladd_negate_c | + float_muladd_negate_product, s); } RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) @@ -3523,13 +3545,13 @@ GEN_VEXT_VF(vfnmsub_vf_d, 8) static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) { return float32_muladd(float16_to_float32(a, true, s), - float16_to_float32(b, true, s), d, 0, s); + float16_to_float32(b, true, s), d, 0, s); } static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) { return float64_muladd(float32_to_float64(a, s), - float32_to_float64(b, s), d, 0, s); + float32_to_float64(b, s), d, 0, s); } RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) @@ -3544,15 +3566,16 @@ GEN_VEXT_VF(vfwmacc_vf_w, 8) static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) { return float32_muladd(float16_to_float32(a, true, s), - float16_to_float32(b, true, s), d, - float_muladd_negate_c | float_muladd_negate_product, s); + float16_to_float32(b, true, s), d, + float_muladd_negate_c | float_muladd_negate_product, + s); } static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) { - return float64_muladd(float32_to_float64(a, s), - float32_to_float64(b, s), d, - float_muladd_negate_c | float_muladd_negate_product, s); + return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s), + d, float_muladd_negate_c | + float_muladd_negate_product, s); } RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) @@ -3567,15 +3590,15 @@ GEN_VEXT_VF(vfwnmacc_vf_w, 8) static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) { return float32_muladd(float16_to_float32(a, true, s), - float16_to_float32(b, true, s), d, - float_muladd_negate_c, s); + float16_to_float32(b, true, s), d, + float_muladd_negate_c, s); } static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) { return float64_muladd(float32_to_float64(a, s), - float32_to_float64(b, s), d, - float_muladd_negate_c, s); + float32_to_float64(b, s), d, + float_muladd_negate_c, s); } RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) @@ -3590,15 +3613,15 @@ GEN_VEXT_VF(vfwmsac_vf_w, 8) static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) { return float32_muladd(float16_to_float32(a, true, s), - float16_to_float32(b, true, s), d, - float_muladd_negate_product, s); + float16_to_float32(b, true, s), d, + float_muladd_negate_product, s); } static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) { return float64_muladd(float32_to_float64(a, s), - float32_to_float64(b, s), d, - float_muladd_negate_product, s); + float32_to_float64(b, s), d, + float_muladd_negate_product, s); } RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) @@ -3616,9 +3639,9 @@ GEN_VEXT_VF(vfwnmsac_vf_w, 8) #define OP_UU_W uint32_t, uint32_t, uint32_t #define OP_UU_D uint64_t, uint64_t, uint64_t -#define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ +#define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ static void do_##NAME(void *vd, void *vs2, int i, \ - CPURISCVState *env) \ + CPURISCVState *env) \ { \ TX2 s2 = *((T2 *)vs2 + HS2(i)); \ *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ @@ -3626,7 +3649,7 @@ static void do_##NAME(void *vd, void *vs2, int i, \ #define GEN_VEXT_V_ENV(NAME, ESZ) \ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ - CPURISCVState *env, uint32_t desc) \ + CPURISCVState *env, uint32_t desc) \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ @@ -3703,9 +3726,9 @@ static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) } int idx = ((exp & 1) << (precision - 1)) | - (frac >> (frac_size - precision + 1)); + (frac >> (frac_size - precision + 1)); uint64_t out_frac = (uint64_t)(lookup_table[idx]) << - (frac_size - precision); + (frac_size - precision); uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; uint64_t val = 0; @@ -3727,9 +3750,9 @@ static float16 frsqrt7_h(float16 f, float_status *s) * frsqrt7(-subnormal) = canonical NaN */ if (float16_is_signaling_nan(f, s) || - (float16_is_infinity(f) && sign) || - (float16_is_normal(f) && sign) || - (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { + (float16_is_infinity(f) && sign) || + (float16_is_normal(f) && sign) || + (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { s->float_exception_flags |= float_flag_invalid; return float16_default_nan(s); } @@ -3767,9 +3790,9 @@ static float32 frsqrt7_s(float32 f, float_status *s) * frsqrt7(-subnormal) = canonical NaN */ if (float32_is_signaling_nan(f, s) || - (float32_is_infinity(f) && sign) || - (float32_is_normal(f) && sign) || - (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { + (float32_is_infinity(f) && sign) || + (float32_is_normal(f) && sign) || + (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { s->float_exception_flags |= float_flag_invalid; return float32_default_nan(s); } @@ -3807,9 +3830,9 @@ static float64 frsqrt7_d(float64 f, float_status *s) * frsqrt7(-subnormal) = canonical NaN */ if (float64_is_signaling_nan(f, s) || - (float64_is_infinity(f) && sign) || - (float64_is_normal(f) && sign) || - (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { + (float64_is_infinity(f) && sign) || + (float64_is_normal(f) && sign) || + (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { s->float_exception_flags |= float_flag_invalid; return float64_default_nan(s); } @@ -3897,18 +3920,18 @@ static uint64_t frec7(uint64_t f, int exp_size, int frac_size, ((s->float_rounding_mode == float_round_up) && sign)) { /* Return greatest/negative finite value. */ return (sign << (exp_size + frac_size)) | - (MAKE_64BIT_MASK(frac_size, exp_size) - 1); + (MAKE_64BIT_MASK(frac_size, exp_size) - 1); } else { /* Return +-inf. */ return (sign << (exp_size + frac_size)) | - MAKE_64BIT_MASK(frac_size, exp_size); + MAKE_64BIT_MASK(frac_size, exp_size); } } } int idx = frac >> (frac_size - precision); uint64_t out_frac = (uint64_t)(lookup_table[idx]) << - (frac_size - precision); + (frac_size - precision); uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; if (out_exp == 0 || out_exp == UINT64_MAX) { @@ -4171,8 +4194,10 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ DO_OP(s2, s1, &env->fp_status)); \ } \ env->vstart = 0; \ - /* mask destination register are always tail-agnostic */ \ - /* set tail elements to 1s */ \ + /* + * mask destination register are always tail-agnostic + * set tail elements to 1s + */ \ if (vta_all_1s) { \ for (; i < total_elems; i++) { \ vext_set_elem_mask(vd, i, 1); \ @@ -4208,8 +4233,10 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ } \ env->vstart = 0; \ - /* mask destination register are always tail-agnostic */ \ - /* set tail elements to 1s */ \ + /* + * mask destination register are always tail-agnostic + * set tail elements to 1s + */ \ if (vta_all_1s) { \ for (; i < total_elems; i++) { \ vext_set_elem_mask(vd, i, 1); \ @@ -4422,8 +4449,8 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ \ for (i = env->vstart; i < vl; i++) { \ ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ - *((ETYPE *)vd + H(i)) \ - = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ + *((ETYPE *)vd + H(i)) = \ + (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ } \ env->vstart = 0; \ /* set tail elements to 1s */ \ @@ -4472,7 +4499,9 @@ GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8) #define WOP_UU_B uint16_t, uint8_t, uint8_t #define WOP_UU_H uint32_t, uint16_t, uint16_t #define WOP_UU_W uint64_t, uint32_t, uint32_t -/* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ +/* + * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer. + */ RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4) @@ -4484,7 +4513,9 @@ RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4) GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8) -/* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ +/* + * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float. + */ RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) @@ -4501,8 +4532,7 @@ GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4) GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8) /* - * vfwcvt.f.f.v vd, vs2, vm - * Convert single-width float to double-width float. + * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float. */ static uint32_t vfwcvtffv16(uint16_t a, float_status *s) { @@ -4535,7 +4565,9 @@ GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1) GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2) GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4) -/* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ +/* + * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float. + */ RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2) @@ -4559,12 +4591,13 @@ GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2) GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4) /* - *** Vector Reduction Operations + * Vector Reduction Operations */ /* Vector Single-Width Integer Reduction Instructions */ #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ - void *vs2, CPURISCVState *env, uint32_t desc) \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ @@ -4684,14 +4717,20 @@ GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) /* Maximum value */ -GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number) -GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number) -GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number) +GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, + float16_maximum_number) +GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, + float32_maximum_number) +GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, + float64_maximum_number) /* Minimum value */ -GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number) -GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number) -GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number) +GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, + float16_minimum_number) +GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, + float32_minimum_number) +GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, + float64_minimum_number) /* Vector Widening Floating-Point Add Instructions */ static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s) @@ -4712,7 +4751,7 @@ GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) /* - *** Vector Mask Operations + * Vector Mask Operations */ /* Vector Mask-Register Logical Instructions */ #define GEN_VEXT_MASK_VV(NAME, OP) \ @@ -4732,10 +4771,10 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ vext_set_elem_mask(vd, i, OP(b, a)); \ } \ env->vstart = 0; \ - /* mask destination register are always tail- \ - * agnostic \ + /* + * mask destination register are always tail-agnostic + * set tail elements to 1s */ \ - /* set tail elements to 1s */ \ if (vta_all_1s) { \ for (; i < total_elems; i++) { \ vext_set_elem_mask(vd, i, 1); \ @@ -4778,7 +4817,7 @@ target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, return cnt; } -/* vfirst find-first-set mask bit*/ +/* vfirst find-first-set mask bit */ target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, uint32_t desc) { @@ -4843,8 +4882,10 @@ static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, } } env->vstart = 0; - /* mask destination register are always tail-agnostic */ - /* set tail elements to 1s */ + /* + * mask destination register are always tail-agnostic + * set tail elements to 1s + */ if (vta_all_1s) { for (; i < total_elems; i++) { vext_set_elem_mask(vd, i, 1); @@ -4936,7 +4977,7 @@ GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) /* - *** Vector Permutation Instructions + * Vector Permutation Instructions */ /* Vector Slide Instructions */ @@ -5013,7 +5054,8 @@ GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \ static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ - void *vs2, CPURISCVState *env, uint32_t desc) \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ { \ typedef uint##BITWIDTH##_t ETYPE; \ uint32_t vm = vext_vm(desc); \ @@ -5061,7 +5103,8 @@ GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \ static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ - void *vs2, CPURISCVState *env, uint32_t desc) \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ { \ typedef uint##BITWIDTH##_t ETYPE; \ uint32_t vm = vext_vm(desc); \ |