diff options
Diffstat (limited to 'target/arm/tcg/translate-a64.c')
| -rw-r--r-- | target/arm/tcg/translate-a64.c | 782 |
1 files changed, 640 insertions, 142 deletions
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index d6ac2ed418..8bef391bb0 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -654,6 +654,68 @@ static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v) write_fp_dreg(s, reg, tmp); } +/* + * Write a double result to 128 bit vector register reg, honouring FPCR.NEP: + * - if FPCR.NEP == 0, clear the high elements of reg + * - if FPCR.NEP == 1, set the high elements of reg from mergereg + * (i.e. merge the result with those high elements) + * In either case, SVE register bits above 128 are zeroed (per R_WKYLB). + */ +static void write_fp_dreg_merging(DisasContext *s, int reg, int mergereg, + TCGv_i64 v) +{ + if (!s->fpcr_nep) { + write_fp_dreg(s, reg, v); + return; + } + + /* + * Move from mergereg to reg; this sets the high elements and + * clears the bits above 128 as a side effect. + */ + tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), + vec_full_reg_offset(s, mergereg), + 16, vec_full_reg_size(s)); + tcg_gen_st_i64(v, tcg_env, vec_full_reg_offset(s, reg)); +} + +/* + * Write a single-prec result, but only clear the higher elements + * of the destination register if FPCR.NEP is 0; otherwise preserve them. + */ +static void write_fp_sreg_merging(DisasContext *s, int reg, int mergereg, + TCGv_i32 v) +{ + if (!s->fpcr_nep) { + write_fp_sreg(s, reg, v); + return; + } + + tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), + vec_full_reg_offset(s, mergereg), + 16, vec_full_reg_size(s)); + tcg_gen_st_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32)); +} + +/* + * Write a half-prec result, but only clear the higher elements + * of the destination register if FPCR.NEP is 0; otherwise preserve them. + * The caller must ensure that the top 16 bits of v are zero. + */ +static void write_fp_hreg_merging(DisasContext *s, int reg, int mergereg, + TCGv_i32 v) +{ + if (!s->fpcr_nep) { + write_fp_sreg(s, reg, v); + return; + } + + tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), + vec_full_reg_offset(s, mergereg), + 16, vec_full_reg_size(s)); + tcg_gen_st16_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16)); +} + /* Expand a 2-operand AdvSIMD vector operation using an expander function. */ static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn, GVecGen2Fn *gvec_fn, int vece) @@ -712,10 +774,10 @@ static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, * an out-of-line helper. */ static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, - int rm, bool is_fp16, int data, + int rm, ARMFPStatusFlavour fpsttype, int data, gen_helper_gvec_3_ptr *fn) { - TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64); + TCGv_ptr fpst = fpstatus_ptr(fpsttype); tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), vec_full_reg_offset(s, rm), fpst, @@ -754,10 +816,11 @@ static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn, * an out-of-line helper. */ static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, - int rm, int ra, bool is_fp16, int data, + int rm, int ra, ARMFPStatusFlavour fpsttype, + int data, gen_helper_gvec_4_ptr *fn) { - TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64); + TCGv_ptr fpst = fpstatus_ptr(fpsttype); tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), vec_full_reg_offset(s, rm), @@ -765,6 +828,111 @@ static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, is_q ? 16 : 8, vec_full_reg_size(s), data, fn); } +/* + * When FPCR.AH == 1, NEG and ABS do not flip the sign bit of a NaN. + * These functions implement + * d = floatN_is_any_nan(s) ? s : floatN_chs(s) + * which for float32 is + * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s ^ (1 << 31)) + * and similarly for the other float sizes. + */ +static void gen_vfp_ah_negh(TCGv_i32 d, TCGv_i32 s) +{ + TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32(); + + gen_vfp_negh(chs_s, s); + gen_vfp_absh(abs_s, s); + tcg_gen_movcond_i32(TCG_COND_GTU, d, + abs_s, tcg_constant_i32(0x7c00), + s, chs_s); +} + +static void gen_vfp_ah_negs(TCGv_i32 d, TCGv_i32 s) +{ + TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32(); + + gen_vfp_negs(chs_s, s); + gen_vfp_abss(abs_s, s); + tcg_gen_movcond_i32(TCG_COND_GTU, d, + abs_s, tcg_constant_i32(0x7f800000UL), + s, chs_s); +} + +static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s) +{ + TCGv_i64 abs_s = tcg_temp_new_i64(), chs_s = tcg_temp_new_i64(); + + gen_vfp_negd(chs_s, s); + gen_vfp_absd(abs_s, s); + tcg_gen_movcond_i64(TCG_COND_GTU, d, + abs_s, tcg_constant_i64(0x7ff0000000000000ULL), + s, chs_s); +} + +/* + * These functions implement + * d = floatN_is_any_nan(s) ? s : floatN_abs(s) + * which for float32 is + * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s & ~(1 << 31)) + * and similarly for the other float sizes. + */ +static void gen_vfp_ah_absh(TCGv_i32 d, TCGv_i32 s) +{ + TCGv_i32 abs_s = tcg_temp_new_i32(); + + gen_vfp_absh(abs_s, s); + tcg_gen_movcond_i32(TCG_COND_GTU, d, + abs_s, tcg_constant_i32(0x7c00), + s, abs_s); +} + +static void gen_vfp_ah_abss(TCGv_i32 d, TCGv_i32 s) +{ + TCGv_i32 abs_s = tcg_temp_new_i32(); + + gen_vfp_abss(abs_s, s); + tcg_gen_movcond_i32(TCG_COND_GTU, d, + abs_s, tcg_constant_i32(0x7f800000UL), + s, abs_s); +} + +static void gen_vfp_ah_absd(TCGv_i64 d, TCGv_i64 s) +{ + TCGv_i64 abs_s = tcg_temp_new_i64(); + + gen_vfp_absd(abs_s, s); + tcg_gen_movcond_i64(TCG_COND_GTU, d, + abs_s, tcg_constant_i64(0x7ff0000000000000ULL), + s, abs_s); +} + +static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) +{ + if (dc->fpcr_ah) { + gen_vfp_ah_negh(d, s); + } else { + gen_vfp_negh(d, s); + } +} + +static void gen_vfp_maybe_ah_negs(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) +{ + if (dc->fpcr_ah) { + gen_vfp_ah_negs(d, s); + } else { + gen_vfp_negs(d, s); + } +} + +static void gen_vfp_maybe_ah_negd(DisasContext *dc, TCGv_i64 d, TCGv_i64 s) +{ + if (dc->fpcr_ah) { + gen_vfp_ah_negd(d, s); + } else { + gen_vfp_negd(d, s); + } +} + /* Set ZF and NF based on a 64 bit result. This is alas fiddlier * than the 32 bit equivalent. */ @@ -5025,23 +5193,25 @@ typedef struct FPScalar { void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); } FPScalar; -static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) +static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, + const FPScalar *f, int mergereg, + ARMFPStatusFlavour fpsttype) { switch (a->esz) { case MO_64: if (fp_access_check(s)) { TCGv_i64 t0 = read_fp_dreg(s, a->rn); TCGv_i64 t1 = read_fp_dreg(s, a->rm); - f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); - write_fp_dreg(s, a->rd, t0); + f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype)); + write_fp_dreg_merging(s, a->rd, mergereg, t0); } break; case MO_32: if (fp_access_check(s)) { TCGv_i32 t0 = read_fp_sreg(s, a->rn); TCGv_i32 t1 = read_fp_sreg(s, a->rm); - f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); - write_fp_sreg(s, a->rd, t0); + f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype)); + write_fp_sreg_merging(s, a->rd, mergereg, t0); } break; case MO_16: @@ -5051,8 +5221,8 @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) if (fp_access_check(s)) { TCGv_i32 t0 = read_fp_hreg(s, a->rn); TCGv_i32 t1 = read_fp_hreg(s, a->rm); - f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); - write_fp_sreg(s, a->rd, t0); + f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype)); + write_fp_hreg_merging(s, a->rd, mergereg, t0); } break; default: @@ -5061,68 +5231,103 @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) return true; } +static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f, + int mergereg) +{ + return do_fp3_scalar_with_fpsttype(s, a, f, mergereg, + a->esz == MO_16 ? + FPST_A64_F16 : FPST_A64); +} + +static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a, + const FPScalar *fnormal, const FPScalar *fah, + int mergereg) +{ + return do_fp3_scalar_with_fpsttype(s, a, s->fpcr_ah ? fah : fnormal, + mergereg, select_ah_fpst(s, a->esz)); +} + +/* Some insns need to call different helpers when FPCR.AH == 1 */ +static bool do_fp3_scalar_2fn(DisasContext *s, arg_rrr_e *a, + const FPScalar *fnormal, + const FPScalar *fah, + int mergereg) +{ + return do_fp3_scalar(s, a, s->fpcr_ah ? fah : fnormal, mergereg); +} + static const FPScalar f_scalar_fadd = { gen_helper_vfp_addh, gen_helper_vfp_adds, gen_helper_vfp_addd, }; -TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd) +TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd, a->rn) static const FPScalar f_scalar_fsub = { gen_helper_vfp_subh, gen_helper_vfp_subs, gen_helper_vfp_subd, }; -TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub) +TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub, a->rn) static const FPScalar f_scalar_fdiv = { gen_helper_vfp_divh, gen_helper_vfp_divs, gen_helper_vfp_divd, }; -TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv) +TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv, a->rn) static const FPScalar f_scalar_fmul = { gen_helper_vfp_mulh, gen_helper_vfp_muls, gen_helper_vfp_muld, }; -TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul) +TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul, a->rn) static const FPScalar f_scalar_fmax = { gen_helper_vfp_maxh, gen_helper_vfp_maxs, gen_helper_vfp_maxd, }; -TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax) +static const FPScalar f_scalar_fmax_ah = { + gen_helper_vfp_ah_maxh, + gen_helper_vfp_ah_maxs, + gen_helper_vfp_ah_maxd, +}; +TRANS(FMAX_s, do_fp3_scalar_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah, a->rn) static const FPScalar f_scalar_fmin = { gen_helper_vfp_minh, gen_helper_vfp_mins, gen_helper_vfp_mind, }; -TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin) +static const FPScalar f_scalar_fmin_ah = { + gen_helper_vfp_ah_minh, + gen_helper_vfp_ah_mins, + gen_helper_vfp_ah_mind, +}; +TRANS(FMIN_s, do_fp3_scalar_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah, a->rn) static const FPScalar f_scalar_fmaxnm = { gen_helper_vfp_maxnumh, gen_helper_vfp_maxnums, gen_helper_vfp_maxnumd, }; -TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm) +TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm, a->rn) static const FPScalar f_scalar_fminnm = { gen_helper_vfp_minnumh, gen_helper_vfp_minnums, gen_helper_vfp_minnumd, }; -TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm) +TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm, a->rn) static const FPScalar f_scalar_fmulx = { gen_helper_advsimd_mulxh, gen_helper_vfp_mulxs, gen_helper_vfp_mulxd, }; -TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx) +TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx, a->rn) static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) { @@ -5142,47 +5347,70 @@ static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) gen_vfp_negd(d, d); } +static void gen_fnmul_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) +{ + gen_helper_vfp_mulh(d, n, m, s); + gen_vfp_ah_negh(d, d); +} + +static void gen_fnmul_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) +{ + gen_helper_vfp_muls(d, n, m, s); + gen_vfp_ah_negs(d, d); +} + +static void gen_fnmul_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) +{ + gen_helper_vfp_muld(d, n, m, s); + gen_vfp_ah_negd(d, d); +} + static const FPScalar f_scalar_fnmul = { gen_fnmul_h, gen_fnmul_s, gen_fnmul_d, }; -TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul) +static const FPScalar f_scalar_ah_fnmul = { + gen_fnmul_ah_h, + gen_fnmul_ah_s, + gen_fnmul_ah_d, +}; +TRANS(FNMUL_s, do_fp3_scalar_2fn, a, &f_scalar_fnmul, &f_scalar_ah_fnmul, a->rn) static const FPScalar f_scalar_fcmeq = { gen_helper_advsimd_ceq_f16, gen_helper_neon_ceq_f32, gen_helper_neon_ceq_f64, }; -TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq) +TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq, a->rm) static const FPScalar f_scalar_fcmge = { gen_helper_advsimd_cge_f16, gen_helper_neon_cge_f32, gen_helper_neon_cge_f64, }; -TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge) +TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge, a->rm) static const FPScalar f_scalar_fcmgt = { gen_helper_advsimd_cgt_f16, gen_helper_neon_cgt_f32, gen_helper_neon_cgt_f64, }; -TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt) +TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt, a->rm) static const FPScalar f_scalar_facge = { gen_helper_advsimd_acge_f16, gen_helper_neon_acge_f32, gen_helper_neon_acge_f64, }; -TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge) +TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge, a->rm) static const FPScalar f_scalar_facgt = { gen_helper_advsimd_acgt_f16, gen_helper_neon_acgt_f32, gen_helper_neon_acgt_f64, }; -TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt) +TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt, a->rm) static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) { @@ -5202,26 +5430,61 @@ static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) gen_vfp_absd(d, d); } +static void gen_fabd_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) +{ + gen_helper_vfp_subh(d, n, m, s); + gen_vfp_ah_absh(d, d); +} + +static void gen_fabd_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) +{ + gen_helper_vfp_subs(d, n, m, s); + gen_vfp_ah_abss(d, d); +} + +static void gen_fabd_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) +{ + gen_helper_vfp_subd(d, n, m, s); + gen_vfp_ah_absd(d, d); +} + static const FPScalar f_scalar_fabd = { gen_fabd_h, gen_fabd_s, gen_fabd_d, }; -TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd) +static const FPScalar f_scalar_ah_fabd = { + gen_fabd_ah_h, + gen_fabd_ah_s, + gen_fabd_ah_d, +}; +TRANS(FABD_s, do_fp3_scalar_2fn, a, &f_scalar_fabd, &f_scalar_ah_fabd, a->rn) static const FPScalar f_scalar_frecps = { gen_helper_recpsf_f16, gen_helper_recpsf_f32, gen_helper_recpsf_f64, }; -TRANS(FRECPS_s, do_fp3_scalar, a, &f_scalar_frecps) +static const FPScalar f_scalar_ah_frecps = { + gen_helper_recpsf_ah_f16, + gen_helper_recpsf_ah_f32, + gen_helper_recpsf_ah_f64, +}; +TRANS(FRECPS_s, do_fp3_scalar_ah_2fn, a, + &f_scalar_frecps, &f_scalar_ah_frecps, a->rn) static const FPScalar f_scalar_frsqrts = { gen_helper_rsqrtsf_f16, gen_helper_rsqrtsf_f32, gen_helper_rsqrtsf_f64, }; -TRANS(FRSQRTS_s, do_fp3_scalar, a, &f_scalar_frsqrts) +static const FPScalar f_scalar_ah_frsqrts = { + gen_helper_rsqrtsf_ah_f16, + gen_helper_rsqrtsf_ah_f32, + gen_helper_rsqrtsf_ah_f64, +}; +TRANS(FRSQRTS_s, do_fp3_scalar_ah_2fn, a, + &f_scalar_frsqrts, &f_scalar_ah_frsqrts, a->rn) static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, const FPScalar *f, bool swap) @@ -5472,8 +5735,10 @@ TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU) TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ) TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE) -static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, - gen_helper_gvec_3_ptr * const fns[3]) +static bool do_fp3_vector_with_fpsttype(DisasContext *s, arg_qrrr_e *a, + int data, + gen_helper_gvec_3_ptr * const fns[3], + ARMFPStatusFlavour fpsttype) { MemOp esz = a->esz; int check = fp_access_check_vector_hsd(s, a->q, esz); @@ -5482,11 +5747,34 @@ static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, return check == 0; } - gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, - esz == MO_16, data, fns[esz - 1]); + gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, fpsttype, + data, fns[esz - 1]); return true; } +static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, + gen_helper_gvec_3_ptr * const fns[3]) +{ + return do_fp3_vector_with_fpsttype(s, a, data, fns, + a->esz == MO_16 ? + FPST_A64_F16 : FPST_A64); +} + +static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data, + gen_helper_gvec_3_ptr * const fnormal[3], + gen_helper_gvec_3_ptr * const fah[3]) +{ + return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal); +} + +static bool do_fp3_vector_ah_2fn(DisasContext *s, arg_qrrr_e *a, int data, + gen_helper_gvec_3_ptr * const fnormal[3], + gen_helper_gvec_3_ptr * const fah[3]) +{ + return do_fp3_vector_with_fpsttype(s, a, data, s->fpcr_ah ? fah : fnormal, + select_ah_fpst(s, a->esz)); +} + static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = { gen_helper_gvec_fadd_h, gen_helper_gvec_fadd_s, @@ -5520,14 +5808,24 @@ static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = { gen_helper_gvec_fmax_s, gen_helper_gvec_fmax_d, }; -TRANS(FMAX_v, do_fp3_vector, a, 0, f_vector_fmax) +static gen_helper_gvec_3_ptr * const f_vector_fmax_ah[3] = { + gen_helper_gvec_ah_fmax_h, + gen_helper_gvec_ah_fmax_s, + gen_helper_gvec_ah_fmax_d, +}; +TRANS(FMAX_v, do_fp3_vector_2fn, a, 0, f_vector_fmax, f_vector_fmax_ah) static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = { gen_helper_gvec_fmin_h, gen_helper_gvec_fmin_s, gen_helper_gvec_fmin_d, }; -TRANS(FMIN_v, do_fp3_vector, a, 0, f_vector_fmin) +static gen_helper_gvec_3_ptr * const f_vector_fmin_ah[3] = { + gen_helper_gvec_ah_fmin_h, + gen_helper_gvec_ah_fmin_s, + gen_helper_gvec_ah_fmin_d, +}; +TRANS(FMIN_v, do_fp3_vector_2fn, a, 0, f_vector_fmin, f_vector_fmin_ah) static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = { gen_helper_gvec_fmaxnum_h, @@ -5562,7 +5860,12 @@ static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = { gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_d, }; -TRANS(FMLS_v, do_fp3_vector, a, 0, f_vector_fmls) +static gen_helper_gvec_3_ptr * const f_vector_fmls_ah[3] = { + gen_helper_gvec_ah_vfms_h, + gen_helper_gvec_ah_vfms_s, + gen_helper_gvec_ah_vfms_d, +}; +TRANS(FMLS_v, do_fp3_vector_2fn, a, 0, f_vector_fmls, f_vector_fmls_ah) static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = { gen_helper_gvec_fceq_h, @@ -5604,21 +5907,36 @@ static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = { gen_helper_gvec_fabd_s, gen_helper_gvec_fabd_d, }; -TRANS(FABD_v, do_fp3_vector, a, 0, f_vector_fabd) +static gen_helper_gvec_3_ptr * const f_vector_ah_fabd[3] = { + gen_helper_gvec_ah_fabd_h, + gen_helper_gvec_ah_fabd_s, + gen_helper_gvec_ah_fabd_d, +}; +TRANS(FABD_v, do_fp3_vector_2fn, a, 0, f_vector_fabd, f_vector_ah_fabd) static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { gen_helper_gvec_recps_h, gen_helper_gvec_recps_s, gen_helper_gvec_recps_d, }; -TRANS(FRECPS_v, do_fp3_vector, a, 0, f_vector_frecps) +static gen_helper_gvec_3_ptr * const f_vector_ah_frecps[3] = { + gen_helper_gvec_ah_recps_h, + gen_helper_gvec_ah_recps_s, + gen_helper_gvec_ah_recps_d, +}; +TRANS(FRECPS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frecps, f_vector_ah_frecps) static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = { gen_helper_gvec_rsqrts_h, gen_helper_gvec_rsqrts_s, gen_helper_gvec_rsqrts_d, }; -TRANS(FRSQRTS_v, do_fp3_vector, a, 0, f_vector_frsqrts) +static gen_helper_gvec_3_ptr * const f_vector_ah_frsqrts[3] = { + gen_helper_gvec_ah_rsqrts_h, + gen_helper_gvec_ah_rsqrts_s, + gen_helper_gvec_ah_rsqrts_d, +}; +TRANS(FRSQRTS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frsqrts, f_vector_ah_frsqrts) static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = { gen_helper_gvec_faddp_h, @@ -5632,14 +5950,24 @@ static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = { gen_helper_gvec_fmaxp_s, gen_helper_gvec_fmaxp_d, }; -TRANS(FMAXP_v, do_fp3_vector, a, 0, f_vector_fmaxp) +static gen_helper_gvec_3_ptr * const f_vector_ah_fmaxp[3] = { + gen_helper_gvec_ah_fmaxp_h, + gen_helper_gvec_ah_fmaxp_s, + gen_helper_gvec_ah_fmaxp_d, +}; +TRANS(FMAXP_v, do_fp3_vector_2fn, a, 0, f_vector_fmaxp, f_vector_ah_fmaxp) static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = { gen_helper_gvec_fminp_h, gen_helper_gvec_fminp_s, gen_helper_gvec_fminp_d, }; -TRANS(FMINP_v, do_fp3_vector, a, 0, f_vector_fminp) +static gen_helper_gvec_3_ptr * const f_vector_ah_fminp[3] = { + gen_helper_gvec_ah_fminp_h, + gen_helper_gvec_ah_fminp_s, + gen_helper_gvec_ah_fminp_d, +}; +TRANS(FMINP_v, do_fp3_vector_2fn, a, 0, f_vector_fminp, f_vector_ah_fminp) static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = { gen_helper_gvec_fmaxnump_h, @@ -5795,7 +6123,8 @@ static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a) } if (fp_access_check(s)) { /* Q bit selects BFMLALB vs BFMLALT. */ - gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, false, a->q, + gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, + s->fpcr_ah ? FPST_AH : FPST_A64, a->q, gen_helper_gvec_bfmlal); } return true; @@ -5806,8 +6135,14 @@ static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = { gen_helper_gvec_fcadds, gen_helper_gvec_fcaddd, }; -TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0, f_vector_fcadd) -TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1, f_vector_fcadd) +/* + * Encode FPCR.AH into the data so the helper knows whether the + * negations it does should avoid flipping the sign bit on a NaN + */ +TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0 | (s->fpcr_ah << 1), + f_vector_fcadd) +TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1 | (s->fpcr_ah << 1), + f_vector_fcadd) static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) { @@ -5828,7 +6163,8 @@ static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) } gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, - a->esz == MO_16, a->rot, fn[a->esz]); + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, + a->rot | (s->fpcr_ah << 2), fn[a->esz]); return true; } @@ -6197,7 +6533,7 @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) read_vec_element(s, t1, a->rm, a->idx, MO_64); f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); - write_fp_dreg(s, a->rd, t0); + write_fp_dreg_merging(s, a->rd, a->rn, t0); } break; case MO_32: @@ -6207,7 +6543,7 @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) read_vec_element_i32(s, t1, a->rm, a->idx, MO_32); f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); - write_fp_sreg(s, a->rd, t0); + write_fp_sreg_merging(s, a->rd, a->rn, t0); } break; case MO_16: @@ -6220,7 +6556,7 @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) read_vec_element_i32(s, t1, a->rm, a->idx, MO_16); f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); - write_fp_sreg(s, a->rd, t0); + write_fp_hreg_merging(s, a->rd, a->rn, t0); } break; default: @@ -6243,10 +6579,10 @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) read_vec_element(s, t2, a->rm, a->idx, MO_64); if (neg) { - gen_vfp_negd(t1, t1); + gen_vfp_maybe_ah_negd(s, t1, t1); } gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); - write_fp_dreg(s, a->rd, t0); + write_fp_dreg_merging(s, a->rd, a->rd, t0); } break; case MO_32: @@ -6257,10 +6593,10 @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) read_vec_element_i32(s, t2, a->rm, a->idx, MO_32); if (neg) { - gen_vfp_negs(t1, t1); + gen_vfp_maybe_ah_negs(s, t1, t1); } gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); - write_fp_sreg(s, a->rd, t0); + write_fp_sreg_merging(s, a->rd, a->rd, t0); } break; case MO_16: @@ -6274,11 +6610,11 @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) read_vec_element_i32(s, t2, a->rm, a->idx, MO_16); if (neg) { - gen_vfp_negh(t1, t1); + gen_vfp_maybe_ah_negh(s, t1, t1); } gen_helper_advsimd_muladdh(t0, t1, t2, t0, fpstatus_ptr(FPST_A64_F16)); - write_fp_sreg(s, a->rd, t0); + write_fp_hreg_merging(s, a->rd, a->rd, t0); } break; default: @@ -6374,7 +6710,8 @@ static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a, } gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, - esz == MO_16, a->idx, fns[esz - 1]); + esz == MO_16 ? FPST_A64_F16 : FPST_A64, + a->idx, fns[esz - 1]); return true; } @@ -6394,10 +6731,16 @@ TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx) static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) { - static gen_helper_gvec_4_ptr * const fns[3] = { - gen_helper_gvec_fmla_idx_h, - gen_helper_gvec_fmla_idx_s, - gen_helper_gvec_fmla_idx_d, + static gen_helper_gvec_4_ptr * const fns[3][3] = { + { gen_helper_gvec_fmla_idx_h, + gen_helper_gvec_fmla_idx_s, + gen_helper_gvec_fmla_idx_d }, + { gen_helper_gvec_fmls_idx_h, + gen_helper_gvec_fmls_idx_s, + gen_helper_gvec_fmls_idx_d }, + { gen_helper_gvec_ah_fmls_idx_h, + gen_helper_gvec_ah_fmls_idx_s, + gen_helper_gvec_ah_fmls_idx_d }, }; MemOp esz = a->esz; int check = fp_access_check_vector_hsd(s, a->q, esz); @@ -6407,8 +6750,8 @@ static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) } gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, - esz == MO_16, (a->idx << 1) | neg, - fns[esz - 1]); + esz == MO_16 ? FPST_A64_F16 : FPST_A64, + a->idx, fns[neg ? 1 + s->fpcr_ah : 0][esz - 1]); return true; } @@ -6542,7 +6885,8 @@ static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a) } if (fp_access_check(s)) { /* Q bit selects BFMLALB vs BFMLALT. */ - gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 0, + gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, + s->fpcr_ah ? FPST_AH : FPST_A64, (a->idx << 1) | a->q, gen_helper_gvec_bfmlal_idx); } @@ -6571,7 +6915,8 @@ static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) } if (fp_access_check(s)) { gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, - a->esz == MO_16, (a->idx << 2) | a->rot, fn); + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, + (s->fpcr_ah << 4) | (a->idx << 2) | a->rot, fn); } return true; } @@ -6625,9 +6970,16 @@ static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f) return true; } +static bool do_fp3_scalar_pair_2fn(DisasContext *s, arg_rr_e *a, + const FPScalar *fnormal, + const FPScalar *fah) +{ + return do_fp3_scalar_pair(s, a, s->fpcr_ah ? fah : fnormal); +} + TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd) -TRANS(FMAXP_s, do_fp3_scalar_pair, a, &f_scalar_fmax) -TRANS(FMINP_s, do_fp3_scalar_pair, a, &f_scalar_fmin) +TRANS(FMAXP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah) +TRANS(FMINP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah) TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm) TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm) @@ -6746,14 +7098,14 @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) TCGv_i64 ta = read_fp_dreg(s, a->ra); if (neg_a) { - gen_vfp_negd(ta, ta); + gen_vfp_maybe_ah_negd(s, ta, ta); } if (neg_n) { - gen_vfp_negd(tn, tn); + gen_vfp_maybe_ah_negd(s, tn, tn); } fpst = fpstatus_ptr(FPST_A64); gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst); - write_fp_dreg(s, a->rd, ta); + write_fp_dreg_merging(s, a->rd, a->ra, ta); } break; @@ -6764,14 +7116,14 @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) TCGv_i32 ta = read_fp_sreg(s, a->ra); if (neg_a) { - gen_vfp_negs(ta, ta); + gen_vfp_maybe_ah_negs(s, ta, ta); } if (neg_n) { - gen_vfp_negs(tn, tn); + gen_vfp_maybe_ah_negs(s, tn, tn); } fpst = fpstatus_ptr(FPST_A64); gen_helper_vfp_muladds(ta, tn, tm, ta, fpst); - write_fp_sreg(s, a->rd, ta); + write_fp_sreg_merging(s, a->rd, a->ra, ta); } break; @@ -6785,14 +7137,14 @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) TCGv_i32 ta = read_fp_hreg(s, a->ra); if (neg_a) { - gen_vfp_negh(ta, ta); + gen_vfp_maybe_ah_negh(s, ta, ta); } if (neg_n) { - gen_vfp_negh(tn, tn); + gen_vfp_maybe_ah_negh(s, tn, tn); } fpst = fpstatus_ptr(FPST_A64_F16); gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst); - write_fp_sreg(s, a->rd, ta); + write_fp_hreg_merging(s, a->rd, a->ra, ta); } break; @@ -6879,27 +7231,35 @@ static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz, } static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a, - NeonGenTwoSingleOpFn *fn) + NeonGenTwoSingleOpFn *fnormal, + NeonGenTwoSingleOpFn *fah) { if (fp_access_check(s)) { MemOp esz = a->esz; int elts = (a->q ? 16 : 8) >> esz; TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); - TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, fn); + TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, + s->fpcr_ah ? fah : fnormal); write_fp_sreg(s, a->rd, res); } return true; } -TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_maxnumh) -TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_minnumh) -TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_maxh) -TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_minh) +TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, + gen_helper_vfp_maxnumh, gen_helper_vfp_maxnumh) +TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, + gen_helper_vfp_minnumh, gen_helper_vfp_minnumh) +TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, + gen_helper_vfp_maxh, gen_helper_vfp_ah_maxh) +TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, + gen_helper_vfp_minh, gen_helper_vfp_ah_minh) -TRANS(FMAXNMV_s, do_fp_reduction, a, gen_helper_vfp_maxnums) -TRANS(FMINNMV_s, do_fp_reduction, a, gen_helper_vfp_minnums) -TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs) -TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins) +TRANS(FMAXNMV_s, do_fp_reduction, a, + gen_helper_vfp_maxnums, gen_helper_vfp_maxnums) +TRANS(FMINNMV_s, do_fp_reduction, a, + gen_helper_vfp_minnums, gen_helper_vfp_minnums) +TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs, gen_helper_vfp_ah_maxs) +TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins, gen_helper_vfp_ah_mins) /* * Floating-point Immediate @@ -8323,21 +8683,30 @@ typedef struct FPScalar1Int { } FPScalar1Int; static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, - const FPScalar1Int *f) + const FPScalar1Int *f, + bool merging) { switch (a->esz) { case MO_64: if (fp_access_check(s)) { TCGv_i64 t = read_fp_dreg(s, a->rn); f->gen_d(t, t); - write_fp_dreg(s, a->rd, t); + if (merging) { + write_fp_dreg_merging(s, a->rd, a->rd, t); + } else { + write_fp_dreg(s, a->rd, t); + } } break; case MO_32: if (fp_access_check(s)) { TCGv_i32 t = read_fp_sreg(s, a->rn); f->gen_s(t, t); - write_fp_sreg(s, a->rd, t); + if (merging) { + write_fp_sreg_merging(s, a->rd, a->rd, t); + } else { + write_fp_sreg(s, a->rd, t); + } } break; case MO_16: @@ -8347,7 +8716,11 @@ static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, if (fp_access_check(s)) { TCGv_i32 t = read_fp_hreg(s, a->rn); f->gen_h(t, t); - write_fp_sreg(s, a->rd, t); + if (merging) { + write_fp_hreg_merging(s, a->rd, a->rd, t); + } else { + write_fp_sreg(s, a->rd, t); + } } break; default: @@ -8356,26 +8729,43 @@ static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, return true; } +static bool do_fp1_scalar_int_2fn(DisasContext *s, arg_rr_e *a, + const FPScalar1Int *fnormal, + const FPScalar1Int *fah) +{ + return do_fp1_scalar_int(s, a, s->fpcr_ah ? fah : fnormal, true); +} + static const FPScalar1Int f_scalar_fmov = { tcg_gen_mov_i32, tcg_gen_mov_i32, tcg_gen_mov_i64, }; -TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov) +TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov, false) static const FPScalar1Int f_scalar_fabs = { gen_vfp_absh, gen_vfp_abss, gen_vfp_absd, }; -TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs) +static const FPScalar1Int f_scalar_ah_fabs = { + gen_vfp_ah_absh, + gen_vfp_ah_abss, + gen_vfp_ah_absd, +}; +TRANS(FABS_s, do_fp1_scalar_int_2fn, a, &f_scalar_fabs, &f_scalar_ah_fabs) static const FPScalar1Int f_scalar_fneg = { gen_vfp_negh, gen_vfp_negs, gen_vfp_negd, }; -TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg) +static const FPScalar1Int f_scalar_ah_fneg = { + gen_vfp_ah_negh, + gen_vfp_ah_negs, + gen_vfp_ah_negd, +}; +TRANS(FNEG_s, do_fp1_scalar_int_2fn, a, &f_scalar_fneg, &f_scalar_ah_fneg) typedef struct FPScalar1 { void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr); @@ -8383,8 +8773,9 @@ typedef struct FPScalar1 { void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_ptr); } FPScalar1; -static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, - const FPScalar1 *f, int rmode) +static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a, + const FPScalar1 *f, int rmode, + ARMFPStatusFlavour fpsttype) { TCGv_i32 tcg_rmode = NULL; TCGv_ptr fpst; @@ -8396,7 +8787,7 @@ static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, return check == 0; } - fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); + fpst = fpstatus_ptr(fpsttype); if (rmode >= 0) { tcg_rmode = gen_set_rmode(rmode, fpst); } @@ -8405,17 +8796,17 @@ static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, case MO_64: t64 = read_fp_dreg(s, a->rn); f->gen_d(t64, t64, fpst); - write_fp_dreg(s, a->rd, t64); + write_fp_dreg_merging(s, a->rd, a->rd, t64); break; case MO_32: t32 = read_fp_sreg(s, a->rn); f->gen_s(t32, t32, fpst); - write_fp_sreg(s, a->rd, t32); + write_fp_sreg_merging(s, a->rd, a->rd, t32); break; case MO_16: t32 = read_fp_hreg(s, a->rn); f->gen_h(t32, t32, fpst); - write_fp_sreg(s, a->rd, t32); + write_fp_hreg_merging(s, a->rd, a->rd, t32); break; default: g_assert_not_reached(); @@ -8427,6 +8818,20 @@ static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, return true; } +static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, + const FPScalar1 *f, int rmode) +{ + return do_fp1_scalar_with_fpsttype(s, a, f, rmode, + a->esz == MO_16 ? + FPST_A64_F16 : FPST_A64); +} + +static bool do_fp1_scalar_ah(DisasContext *s, arg_rr_e *a, + const FPScalar1 *f, int rmode) +{ + return do_fp1_scalar_with_fpsttype(s, a, f, rmode, select_ah_fpst(s, a->esz)); +} + static const FPScalar1 f_scalar_fsqrt = { gen_helper_vfp_sqrth, gen_helper_vfp_sqrts, @@ -8453,10 +8858,27 @@ static const FPScalar1 f_scalar_frintx = { }; TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1) -static const FPScalar1 f_scalar_bfcvt = { - .gen_s = gen_helper_bfcvt, -}; -TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar, a, &f_scalar_bfcvt, -1) +static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a) +{ + ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_AH : FPST_A64; + TCGv_i32 t32; + int check; + + if (!dc_isar_feature(aa64_bf16, s)) { + return false; + } + + check = fp_access_check_scalar_hsd(s, a->esz); + + if (check <= 0) { + return check == 0; + } + + t32 = read_fp_sreg(s, a->rn); + gen_helper_bfcvt(t32, t32, fpstatus_ptr(fpsttype)); + write_fp_hreg_merging(s, a->rd, a->rd, t32); + return true; +} static const FPScalar1 f_scalar_frint32 = { NULL, @@ -8481,21 +8903,35 @@ static const FPScalar1 f_scalar_frecpe = { gen_helper_recpe_f32, gen_helper_recpe_f64, }; -TRANS(FRECPE_s, do_fp1_scalar, a, &f_scalar_frecpe, -1) +static const FPScalar1 f_scalar_frecpe_rpres = { + gen_helper_recpe_f16, + gen_helper_recpe_rpres_f32, + gen_helper_recpe_f64, +}; +TRANS(FRECPE_s, do_fp1_scalar_ah, a, + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? + &f_scalar_frecpe_rpres : &f_scalar_frecpe, -1) static const FPScalar1 f_scalar_frecpx = { gen_helper_frecpx_f16, gen_helper_frecpx_f32, gen_helper_frecpx_f64, }; -TRANS(FRECPX_s, do_fp1_scalar, a, &f_scalar_frecpx, -1) +TRANS(FRECPX_s, do_fp1_scalar_ah, a, &f_scalar_frecpx, -1) static const FPScalar1 f_scalar_frsqrte = { gen_helper_rsqrte_f16, gen_helper_rsqrte_f32, gen_helper_rsqrte_f64, }; -TRANS(FRSQRTE_s, do_fp1_scalar, a, &f_scalar_frsqrte, -1) +static const FPScalar1 f_scalar_frsqrte_rpres = { + gen_helper_rsqrte_f16, + gen_helper_rsqrte_rpres_f32, + gen_helper_rsqrte_f64, +}; +TRANS(FRSQRTE_s, do_fp1_scalar_ah, a, + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? + &f_scalar_frsqrte_rpres : &f_scalar_frsqrte, -1) static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a) { @@ -8505,7 +8941,7 @@ static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a) TCGv_ptr fpst = fpstatus_ptr(FPST_A64); gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst); - write_fp_dreg(s, a->rd, tcg_rd); + write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd); } return true; } @@ -8518,8 +8954,8 @@ static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a) TCGv_ptr fpst = fpstatus_ptr(FPST_A64); gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); - /* write_fp_sreg is OK here because top half of result is zero */ - write_fp_sreg(s, a->rd, tmp); + /* write_fp_hreg_merging is OK here because top half of result is zero */ + write_fp_hreg_merging(s, a->rd, a->rd, tmp); } return true; } @@ -8532,7 +8968,7 @@ static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a) TCGv_ptr fpst = fpstatus_ptr(FPST_A64); gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst); - write_fp_sreg(s, a->rd, tcg_rd); + write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd); } return true; } @@ -8546,8 +8982,8 @@ static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a) TCGv_ptr fpst = fpstatus_ptr(FPST_A64); gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp); - /* write_fp_sreg is OK here because top half of tcg_rd is zero */ - write_fp_sreg(s, a->rd, tcg_rd); + /* write_fp_hreg_merging is OK here because top half of tcg_rd is zero */ + write_fp_hreg_merging(s, a->rd, a->rd, tcg_rd); } return true; } @@ -8561,7 +8997,7 @@ static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a) TCGv_i32 tcg_ahp = get_ahp_flag(); gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); - write_fp_sreg(s, a->rd, tcg_rd); + write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd); } return true; } @@ -8575,7 +9011,7 @@ static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a) TCGv_i32 tcg_ahp = get_ahp_flag(); gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); - write_fp_dreg(s, a->rd, tcg_rd); + write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd); } return true; } @@ -8598,7 +9034,7 @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, } else { gen_helper_vfp_uqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus); } - write_fp_dreg(s, rd, tcg_double); + write_fp_dreg_merging(s, rd, rd, tcg_double); break; case MO_32: @@ -8608,7 +9044,7 @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, } else { gen_helper_vfp_uqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); } - write_fp_sreg(s, rd, tcg_single); + write_fp_sreg_merging(s, rd, rd, tcg_single); break; case MO_16: @@ -8618,7 +9054,7 @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, } else { gen_helper_vfp_uqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); } - write_fp_sreg(s, rd, tcg_single); + write_fp_hreg_merging(s, rd, rd, tcg_single); break; default: @@ -8823,7 +9259,9 @@ static bool do_fcvt_f(DisasContext *s, arg_fcvt *a, do_fcvt_scalar(s, a->esz | (is_signed ? MO_SIGN : 0), a->esz, tcg_int, a->shift, a->rn, rmode); - clear_vec(s, a->rd); + if (!s->fpcr_nep) { + clear_vec(s, a->rd); + } write_vec_element(s, tcg_int, a->rd, 0, a->esz); return true; } @@ -9097,24 +9535,21 @@ static ArithOneOp * const f_scalar_uqxtn[] = { }; TRANS(UQXTN_s, do_2misc_narrow_scalar, a, f_scalar_uqxtn) -static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n) +static bool trans_FCVTXN_s(DisasContext *s, arg_rr_e *a) { - /* - * 64 bit to 32 bit float conversion - * with von Neumann rounding (round to odd) - */ - TCGv_i32 tmp = tcg_temp_new_i32(); - gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64)); - tcg_gen_extu_i32_i64(d, tmp); + if (fp_access_check(s)) { + /* + * 64 bit to 32 bit float conversion + * with von Neumann rounding (round to odd) + */ + TCGv_i64 src = read_fp_dreg(s, a->rn); + TCGv_i32 dst = tcg_temp_new_i32(); + gen_helper_fcvtx_f64_to_f32(dst, src, fpstatus_ptr(FPST_A64)); + write_fp_sreg_merging(s, a->rd, a->rd, dst); + } + return true; } -static ArithOneOp * const f_scalar_fcvtxn[] = { - NULL, - NULL, - gen_fcvtxn_sd, -}; -TRANS(FCVTXN_s, do_2misc_narrow_scalar, a, f_scalar_fcvtxn) - #undef WRAP_ENV static bool do_gvec_fn2(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) @@ -9216,11 +9651,27 @@ static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n) tcg_gen_extu_i32_i64(d, tmp); } +static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n) +{ + /* + * 64 bit to 32 bit float conversion + * with von Neumann rounding (round to odd) + */ + TCGv_i32 tmp = tcg_temp_new_i32(); + gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64)); + tcg_gen_extu_i32_i64(d, tmp); +} + static ArithOneOp * const f_vector_fcvtn[] = { NULL, gen_fcvtn_hs, gen_fcvtn_sd, }; +static ArithOneOp * const f_scalar_fcvtxn[] = { + NULL, + NULL, + gen_fcvtxn_sd, +}; TRANS(FCVTN_v, do_2misc_narrow_vector, a, f_vector_fcvtn) TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn) @@ -9232,12 +9683,27 @@ static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n) tcg_gen_extu_i32_i64(d, tmp); } -static ArithOneOp * const f_vector_bfcvtn[] = { - NULL, - gen_bfcvtn_hs, - NULL, +static void gen_bfcvtn_ah_hs(TCGv_i64 d, TCGv_i64 n) +{ + TCGv_ptr fpst = fpstatus_ptr(FPST_AH); + TCGv_i32 tmp = tcg_temp_new_i32(); + gen_helper_bfcvt_pair(tmp, n, fpst); + tcg_gen_extu_i32_i64(d, tmp); +} + +static ArithOneOp * const f_vector_bfcvtn[2][3] = { + { + NULL, + gen_bfcvtn_hs, + NULL, + }, { + NULL, + gen_bfcvtn_ah_hs, + NULL, + } }; -TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a, f_vector_bfcvtn) +TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a, + f_vector_bfcvtn[s->fpcr_ah]) static bool trans_SHLL_v(DisasContext *s, arg_qrr_e *a) { @@ -9350,9 +9816,10 @@ TRANS_FEAT(FRINT64Z_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint64, FPROUNDING_ZERO) TRANS_FEAT(FRINT64X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint64, -1) -static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, - int rd, int rn, int data, - gen_helper_gvec_2_ptr * const fns[3]) +static bool do_gvec_op2_fpst_with_fpsttype(DisasContext *s, MemOp esz, + bool is_q, int rd, int rn, int data, + gen_helper_gvec_2_ptr * const fns[3], + ARMFPStatusFlavour fpsttype) { int check = fp_access_check_vector_hsd(s, is_q, esz); TCGv_ptr fpst; @@ -9361,7 +9828,7 @@ static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, return check == 0; } - fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); + fpst = fpstatus_ptr(fpsttype); tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), fpst, is_q ? 16 : 8, vec_full_reg_size(s), @@ -9369,6 +9836,23 @@ static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, return true; } +static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, + int rd, int rn, int data, + gen_helper_gvec_2_ptr * const fns[3]) +{ + return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, fns, + esz == MO_16 ? FPST_A64_F16 : + FPST_A64); +} + +static bool do_gvec_op2_ah_fpst(DisasContext *s, MemOp esz, bool is_q, + int rd, int rn, int data, + gen_helper_gvec_2_ptr * const fns[3]) +{ + return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, + fns, select_ah_fpst(s, esz)); +} + static gen_helper_gvec_2_ptr * const f_scvtf_v[] = { gen_helper_gvec_vcvt_sh, gen_helper_gvec_vcvt_sf, @@ -9478,14 +9962,26 @@ static gen_helper_gvec_2_ptr * const f_frecpe[] = { gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d, }; -TRANS(FRECPE_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frecpe) +static gen_helper_gvec_2_ptr * const f_frecpe_rpres[] = { + gen_helper_gvec_frecpe_h, + gen_helper_gvec_frecpe_rpres_s, + gen_helper_gvec_frecpe_d, +}; +TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frecpe_rpres : f_frecpe) static gen_helper_gvec_2_ptr * const f_frsqrte[] = { gen_helper_gvec_frsqrte_h, gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d, }; -TRANS(FRSQRTE_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frsqrte) +static gen_helper_gvec_2_ptr * const f_frsqrte_rpres[] = { + gen_helper_gvec_frsqrte_h, + gen_helper_gvec_frsqrte_rpres_s, + gen_helper_gvec_frsqrte_d, +}; +TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, + s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frsqrte_rpres : f_frsqrte) static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a) { @@ -9655,6 +10151,8 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2); dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20); dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE); + dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH); + dc->fpcr_nep = EX_TBFLAG_A64(tb_flags, NEP); dc->vec_len = 0; dc->vec_stride = 0; dc->cp_regs = arm_cpu->cp_regs; |