diff options
Diffstat (limited to 'target/arm/translate-sve.c')
| -rw-r--r-- | target/arm/translate-sve.c | 81 |
1 files changed, 47 insertions, 34 deletions
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index 374051cd20..667879564f 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -1625,7 +1625,7 @@ static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) /* Detect signed overflow for subtraction. */ tcg_gen_xor_i64(t0, reg, val); tcg_gen_sub_i64(t1, reg, val); - tcg_gen_xor_i64(reg, reg, t0); + tcg_gen_xor_i64(reg, reg, t1); tcg_gen_and_i64(t0, t0, reg); /* Bound the result. */ @@ -3173,19 +3173,19 @@ static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn) static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn) { - if (!sve_access_check(s)) { - return true; - } - - TCGv_i64 op0 = read_cpu_reg(s, a->rn, 1); - TCGv_i64 op1 = read_cpu_reg(s, a->rm, 1); - TCGv_i64 t0 = tcg_temp_new_i64(); - TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 op0, op1, t0, t1, tmax; TCGv_i32 t2, t3; TCGv_ptr ptr; unsigned desc, vsz = vec_full_reg_size(s); TCGCond cond; + if (!sve_access_check(s)) { + return true; + } + + op0 = read_cpu_reg(s, a->rn, 1); + op1 = read_cpu_reg(s, a->rm, 1); + if (!a->sf) { if (a->u) { tcg_gen_ext32u_i64(op0, op0); @@ -3198,32 +3198,47 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn) /* For the helper, compress the different conditions into a computation * of how many iterations for which the condition is true. - * - * This is slightly complicated by 0 <= UINT64_MAX, which is nominally - * 2**64 iterations, overflowing to 0. Of course, predicate registers - * aren't that large, so any value >= predicate size is sufficient. */ + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); tcg_gen_sub_i64(t0, op1, op0); - /* t0 = MIN(op1 - op0, vsz). */ - tcg_gen_movi_i64(t1, vsz); - tcg_gen_umin_i64(t0, t0, t1); + tmax = tcg_const_i64(vsz >> a->esz); if (a->eq) { /* Equality means one more iteration. */ tcg_gen_addi_i64(t0, t0, 1); + + /* If op1 is max (un)signed integer (and the only time the addition + * above could overflow), then we produce an all-true predicate by + * setting the count to the vector length. This is because the + * pseudocode is described as an increment + compare loop, and the + * max integer would always compare true. + */ + tcg_gen_movi_i64(t1, (a->sf + ? (a->u ? UINT64_MAX : INT64_MAX) + : (a->u ? UINT32_MAX : INT32_MAX))); + tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0); } - /* t0 = (condition true ? t0 : 0). */ + /* Bound to the maximum. */ + tcg_gen_umin_i64(t0, t0, tmax); + tcg_temp_free_i64(tmax); + + /* Set the count to zero if the condition is false. */ cond = (a->u ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU) : (a->eq ? TCG_COND_LE : TCG_COND_LT)); tcg_gen_movi_i64(t1, 0); tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1); + tcg_temp_free_i64(t1); + /* Since we're bounded, pass as a 32-bit type. */ t2 = tcg_temp_new_i32(); tcg_gen_extrl_i64_i32(t2, t0); tcg_temp_free_i64(t0); - tcg_temp_free_i64(t1); + + /* Scale elements to bits. */ + tcg_gen_shli_i32(t2, t2, a->esz); desc = (vsz / 8) - 2; desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz); @@ -4078,7 +4093,7 @@ static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg, static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn) { - return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvt_sh); + return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh); } static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a, uint32_t insn) @@ -4088,7 +4103,7 @@ static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a, uint32_t insn) static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn) { - return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvt_dh); + return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh); } static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a, uint32_t insn) @@ -4357,12 +4372,11 @@ static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn) * The load should begin at the address Rn + IMM. */ -static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len, - int rn, int imm) +static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm) { - uint32_t len_align = QEMU_ALIGN_DOWN(len, 8); - uint32_t len_remain = len % 8; - uint32_t nparts = len / 8 + ctpop8(len_remain); + int len_align = QEMU_ALIGN_DOWN(len, 8); + int len_remain = len % 8; + int nparts = len / 8 + ctpop8(len_remain); int midx = get_mem_index(s); TCGv_i64 addr, t0, t1; @@ -4443,12 +4457,11 @@ static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len, } /* Similarly for stores. */ -static void do_str(DisasContext *s, uint32_t vofs, uint32_t len, - int rn, int imm) +static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm) { - uint32_t len_align = QEMU_ALIGN_DOWN(len, 8); - uint32_t len_remain = len % 8; - uint32_t nparts = len / 8 + ctpop8(len_remain); + int len_align = QEMU_ALIGN_DOWN(len, 8); + int len_remain = len % 8; + int nparts = len / 8 + ctpop8(len_remain); int midx = get_mem_index(s); TCGv_i64 addr, t0; @@ -4652,8 +4665,7 @@ static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn) } if (sve_access_check(s)) { TCGv_i64 addr = new_tmp_a64(s); - tcg_gen_muli_i64(addr, cpu_reg(s, a->rm), - (a->nreg + 1) << dtype_msz(a->dtype)); + tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg); } @@ -4806,6 +4818,7 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn) unsigned vsz = vec_full_reg_size(s); unsigned psz = pred_full_reg_size(s); unsigned esz = dtype_esz[a->dtype]; + unsigned msz = dtype_msz(a->dtype); TCGLabel *over = gen_new_label(); TCGv_i64 temp; @@ -4829,7 +4842,7 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn) /* Load the data. */ temp = tcg_temp_new_i64(); - tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << esz); + tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz); tcg_gen_qemu_ld_i64(temp, temp, get_mem_index(s), s->be_data | dtype_mop[a->dtype]); @@ -4885,7 +4898,7 @@ static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a, uint32_t insn) } if (sve_access_check(s)) { TCGv_i64 addr = new_tmp_a64(s); - tcg_gen_muli_i64(addr, cpu_reg(s, a->rm), (a->nreg + 1) << a->msz); + tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz); tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg); } |