diff options
Diffstat (limited to 'target-arm')
| -rw-r--r-- | target-arm/arm-semi.c | 9 | ||||
| -rw-r--r-- | target-arm/cpu.c | 15 | ||||
| -rw-r--r-- | target-arm/cpu.h | 15 | ||||
| -rw-r--r-- | target-arm/helper-a64.c | 178 | ||||
| -rw-r--r-- | target-arm/helper-a64.h | 10 | ||||
| -rw-r--r-- | target-arm/helper.c | 575 | ||||
| -rw-r--r-- | target-arm/helper.h | 45 | ||||
| -rw-r--r-- | target-arm/neon_helper.c | 187 | ||||
| -rw-r--r-- | target-arm/op_helper.c | 44 | ||||
| -rw-r--r-- | target-arm/translate-a64.c | 1517 | ||||
| -rw-r--r-- | target-arm/translate.c | 37 | ||||
| -rw-r--r-- | target-arm/translate.h | 8 |
12 files changed, 2337 insertions, 303 deletions
diff --git a/target-arm/arm-semi.c b/target-arm/arm-semi.c index ee469c49c5..ebb5235521 100644 --- a/target-arm/arm-semi.c +++ b/target-arm/arm-semi.c @@ -127,7 +127,7 @@ static void arm_semi_cb(CPUState *cs, target_ulong ret, target_ulong err) ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; #ifdef CONFIG_USER_ONLY - TaskState *ts = env->opaque; + TaskState *ts = cs->opaque; #endif if (ret == (target_ulong)-1) { @@ -164,7 +164,7 @@ static void arm_semi_flen_cb(CPUState *cs, target_ulong ret, target_ulong err) cpu_memory_rw_debug(cs, env->regs[13]-64+32, (uint8_t *)&size, 4, 0); env->regs[0] = be32_to_cpu(size); #ifdef CONFIG_USER_ONLY - ((TaskState *)env->opaque)->swi_errno = err; + ((TaskState *)cs->opaque)->swi_errno = err; #else syscall_err = err; #endif @@ -183,6 +183,7 @@ static void arm_semi_flen_cb(CPUState *cs, target_ulong ret, target_ulong err) uint32_t do_arm_semihosting(CPUARMState *env) { ARMCPU *cpu = arm_env_get_cpu(env); + CPUState *cs = CPU(cpu); target_ulong args; target_ulong arg0, arg1, arg2, arg3; char * s; @@ -190,7 +191,7 @@ uint32_t do_arm_semihosting(CPUARMState *env) uint32_t ret; uint32_t len; #ifdef CONFIG_USER_ONLY - TaskState *ts = env->opaque; + TaskState *ts = cs->opaque; #else CPUARMState *ts = env; #endif @@ -554,7 +555,7 @@ uint32_t do_arm_semihosting(CPUARMState *env) exit(0); default: fprintf(stderr, "qemu: Unsupported SemiHosting SWI 0x%02x\n", nr); - cpu_dump_state(CPU(cpu), stderr, fprintf, 0); + cpu_dump_state(cs, stderr, fprintf, 0); abort(); } } diff --git a/target-arm/cpu.c b/target-arm/cpu.c index 1ce8a9bc38..c32d8c4855 100644 --- a/target-arm/cpu.c +++ b/target-arm/cpu.c @@ -36,6 +36,12 @@ static void arm_cpu_set_pc(CPUState *cs, vaddr value) cpu->env.regs[15] = value; } +static bool arm_cpu_has_work(CPUState *cs) +{ + return cs->interrupt_request & + (CPU_INTERRUPT_FIQ | CPU_INTERRUPT_HARD | CPU_INTERRUPT_EXITTB); +} + static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque) { /* Reset a single ARMCPRegInfo register */ @@ -76,7 +82,7 @@ static void arm_cpu_reset(CPUState *s) acc->parent_reset(s); - memset(env, 0, offsetof(CPUARMState, breakpoints)); + memset(env, 0, offsetof(CPUARMState, features)); g_hash_table_foreach(cpu->cp_regs, cp_reg_reset, cpu); env->vfp.xregs[ARM_VFP_FPSID] = cpu->reset_fpsid; env->vfp.xregs[ARM_VFP_MVFR0] = cpu->mvfr0; @@ -143,7 +149,7 @@ static void arm_cpu_reset(CPUState *s) &env->vfp.fp_status); set_float_detect_tininess(float_tininess_before_rounding, &env->vfp.standard_fp_status); - tlb_flush(env, 1); + tlb_flush(s, 1); /* Reset is a state change for some CPUARMState fields which we * bake assumptions about into translated code, so we need to * tb_flush(). @@ -1001,12 +1007,15 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data) cc->reset = arm_cpu_reset; cc->class_by_name = arm_cpu_class_by_name; + cc->has_work = arm_cpu_has_work; cc->do_interrupt = arm_cpu_do_interrupt; cc->dump_state = arm_cpu_dump_state; cc->set_pc = arm_cpu_set_pc; cc->gdb_read_register = arm_cpu_gdb_read_register; cc->gdb_write_register = arm_cpu_gdb_write_register; -#ifndef CONFIG_USER_ONLY +#ifdef CONFIG_USER_ONLY + cc->handle_mmu_fault = arm_cpu_handle_mmu_fault; +#else cc->get_phys_page_debug = arm_cpu_get_phys_page_debug; cc->vmsd = &vmstate_arm_cpu; #endif diff --git a/target-arm/cpu.h b/target-arm/cpu.h index 49fef3fcbe..bf37cd60d0 100644 --- a/target-arm/cpu.h +++ b/target-arm/cpu.h @@ -222,6 +222,10 @@ typedef struct CPUARMState { uint64_t dbgbcr[16]; /* breakpoint control registers */ uint64_t dbgwvr[16]; /* watchpoint value registers */ uint64_t dbgwcr[16]; /* watchpoint control registers */ + /* If the counter is enabled, this stores the last time the counter + * was reset. Otherwise it stores the counter value + */ + uint32_t c15_ccnt; } cp15; struct { @@ -335,9 +339,8 @@ static inline bool is_a64(CPUARMState *env) is returned if the signal was handled by the virtual CPU. */ int cpu_arm_signal_handler(int host_signum, void *pinfo, void *puc); -int cpu_arm_handle_mmu_fault (CPUARMState *env, target_ulong address, int rw, - int mmu_idx); -#define cpu_handle_mmu_fault cpu_arm_handle_mmu_fault +int arm_cpu_handle_mmu_fault(CPUState *cpu, vaddr address, int rw, + int mmu_idx); /* SCTLR bit meanings. Several bits have been reused in newer * versions of the architecture; in that case we define constants @@ -1162,12 +1165,6 @@ static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, *cs_base = 0; } -static inline bool cpu_has_work(CPUState *cpu) -{ - return cpu->interrupt_request & - (CPU_INTERRUPT_FIQ | CPU_INTERRUPT_HARD | CPU_INTERRUPT_EXITTB); -} - #include "exec/exec-all.h" static inline void cpu_pc_from_tb(CPUARMState *env, TranslationBlock *tb) diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c index c2ce33ee88..ec0258295f 100644 --- a/target-arm/helper-a64.c +++ b/target-arm/helper-a64.c @@ -60,6 +60,11 @@ uint32_t HELPER(cls32)(uint32_t x) return clrsb32(x); } +uint32_t HELPER(clz32)(uint32_t x) +{ + return clz32(x); +} + uint64_t HELPER(rbit64)(uint64_t x) { /* assign the correct byte position */ @@ -180,6 +185,36 @@ uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices, return result; } +/* Helper function for 64 bit polynomial multiply case: + * perform PolynomialMult(op1, op2) and return either the top or + * bottom half of the 128 bit result. + */ +uint64_t HELPER(neon_pmull_64_lo)(uint64_t op1, uint64_t op2) +{ + int bitnum; + uint64_t res = 0; + + for (bitnum = 0; bitnum < 64; bitnum++) { + if (op1 & (1ULL << bitnum)) { + res ^= op2 << bitnum; + } + } + return res; +} +uint64_t HELPER(neon_pmull_64_hi)(uint64_t op1, uint64_t op2) +{ + int bitnum; + uint64_t res = 0; + + /* bit 0 of op1 can't influence the high 64 bits at all */ + for (bitnum = 1; bitnum < 64; bitnum++) { + if (op1 & (1ULL << bitnum)) { + res ^= op2 >> (64 - bitnum); + } + } + return res; +} + /* 64bit/double versions of the neon float compare functions */ uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp) { @@ -258,3 +293,146 @@ float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp) } return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst); } + +/* Pairwise long add: add pairs of adjacent elements into + * double-width elements in the result (eg _s8 is an 8x8->16 op) + */ +uint64_t HELPER(neon_addlp_s8)(uint64_t a) +{ + uint64_t nsignmask = 0x0080008000800080ULL; + uint64_t wsignmask = 0x8000800080008000ULL; + uint64_t elementmask = 0x00ff00ff00ff00ffULL; + uint64_t tmp1, tmp2; + uint64_t res, signres; + + /* Extract odd elements, sign extend each to a 16 bit field */ + tmp1 = a & elementmask; + tmp1 ^= nsignmask; + tmp1 |= wsignmask; + tmp1 = (tmp1 - nsignmask) ^ wsignmask; + /* Ditto for the even elements */ + tmp2 = (a >> 8) & elementmask; + tmp2 ^= nsignmask; + tmp2 |= wsignmask; + tmp2 = (tmp2 - nsignmask) ^ wsignmask; + + /* calculate the result by summing bits 0..14, 16..22, etc, + * and then adjusting the sign bits 15, 23, etc manually. + * This ensures the addition can't overflow the 16 bit field. + */ + signres = (tmp1 ^ tmp2) & wsignmask; + res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask); + res ^= signres; + + return res; +} + +uint64_t HELPER(neon_addlp_u8)(uint64_t a) +{ + uint64_t tmp; + + tmp = a & 0x00ff00ff00ff00ffULL; + tmp += (a >> 8) & 0x00ff00ff00ff00ffULL; + return tmp; +} + +uint64_t HELPER(neon_addlp_s16)(uint64_t a) +{ + int32_t reslo, reshi; + + reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16); + reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48); + + return (uint32_t)reslo | (((uint64_t)reshi) << 32); +} + +uint64_t HELPER(neon_addlp_u16)(uint64_t a) +{ + uint64_t tmp; + + tmp = a & 0x0000ffff0000ffffULL; + tmp += (a >> 16) & 0x0000ffff0000ffffULL; + return tmp; +} + +/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */ +float32 HELPER(frecpx_f32)(float32 a, void *fpstp) +{ + float_status *fpst = fpstp; + uint32_t val32, sbit; + int32_t exp; + + if (float32_is_any_nan(a)) { + float32 nan = a; + if (float32_is_signaling_nan(a)) { + float_raise(float_flag_invalid, fpst); + nan = float32_maybe_silence_nan(a); + } + if (fpst->default_nan_mode) { + nan = float32_default_nan; + } + return nan; + } + + val32 = float32_val(a); + sbit = 0x80000000ULL & val32; + exp = extract32(val32, 23, 8); + + if (exp == 0) { + return make_float32(sbit | (0xfe << 23)); + } else { + return make_float32(sbit | (~exp & 0xff) << 23); + } +} + +float64 HELPER(frecpx_f64)(float64 a, void *fpstp) +{ + float_status *fpst = fpstp; + uint64_t val64, sbit; + int64_t exp; + + if (float64_is_any_nan(a)) { + float64 nan = a; + if (float64_is_signaling_nan(a)) { + float_raise(float_flag_invalid, fpst); + nan = float64_maybe_silence_nan(a); + } + if (fpst->default_nan_mode) { + nan = float64_default_nan; + } + return nan; + } + + val64 = float64_val(a); + sbit = 0x8000000000000000ULL & val64; + exp = extract64(float64_val(a), 52, 11); + + if (exp == 0) { + return make_float64(sbit | (0x7feULL << 52)); + } else { + return make_float64(sbit | (~exp & 0x7ffULL) << 52); + } +} + +float32 HELPER(fcvtx_f64_to_f32)(float64 a, CPUARMState *env) +{ + /* Von Neumann rounding is implemented by using round-to-zero + * and then setting the LSB of the result if Inexact was raised. + */ + float32 r; + float_status *fpst = &env->vfp.fp_status; + float_status tstat = *fpst; + int exflags; + + set_float_rounding_mode(float_round_to_zero, &tstat); + set_float_exception_flags(0, &tstat); + r = float64_to_float32(a, &tstat); + r = float32_maybe_silence_nan(r); + exflags = get_float_exception_flags(&tstat); + if (exflags & float_flag_inexact) { + r = make_float32(float32_val(r) | 1); + } + exflags |= get_float_exception_flags(fpst); + set_float_exception_flags(exflags, fpst); + return r; +} diff --git a/target-arm/helper-a64.h b/target-arm/helper-a64.h index ab9933cab0..3f05bedcca 100644 --- a/target-arm/helper-a64.h +++ b/target-arm/helper-a64.h @@ -21,12 +21,15 @@ DEF_HELPER_FLAGS_2(sdiv64, TCG_CALL_NO_RWG_SE, s64, s64, s64) DEF_HELPER_FLAGS_1(clz64, TCG_CALL_NO_RWG_SE, i64, i64) DEF_HELPER_FLAGS_1(cls64, TCG_CALL_NO_RWG_SE, i64, i64) DEF_HELPER_FLAGS_1(cls32, TCG_CALL_NO_RWG_SE, i32, i32) +DEF_HELPER_FLAGS_1(clz32, TCG_CALL_NO_RWG_SE, i32, i32) DEF_HELPER_FLAGS_1(rbit64, TCG_CALL_NO_RWG_SE, i64, i64) DEF_HELPER_3(vfp_cmps_a64, i64, f32, f32, ptr) DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr) DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr) DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr) DEF_HELPER_FLAGS_5(simd_tbl, TCG_CALL_NO_RWG_SE, i64, env, i64, i64, i32, i32) +DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_3(vfp_mulxs, TCG_CALL_NO_RWG, f32, f32, f32, ptr) DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, ptr) DEF_HELPER_FLAGS_3(neon_ceq_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr) @@ -36,3 +39,10 @@ DEF_HELPER_FLAGS_3(recpsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr) DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr) DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr) DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr) +DEF_HELPER_FLAGS_1(neon_addlp_s8, TCG_CALL_NO_RWG_SE, i64, i64) +DEF_HELPER_FLAGS_1(neon_addlp_u8, TCG_CALL_NO_RWG_SE, i64, i64) +DEF_HELPER_FLAGS_1(neon_addlp_s16, TCG_CALL_NO_RWG_SE, i64, i64) +DEF_HELPER_FLAGS_1(neon_addlp_u16, TCG_CALL_NO_RWG_SE, i64, i64) +DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, ptr) +DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, ptr) +DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, env) diff --git a/target-arm/helper.c b/target-arm/helper.c index 90f85f1899..55077ed1b6 100644 --- a/target-arm/helper.c +++ b/target-arm/helper.c @@ -13,6 +13,11 @@ static inline int get_phys_addr(CPUARMState *env, uint32_t address, int access_type, int is_user, hwaddr *phys_ptr, int *prot, target_ulong *page_size); + +/* Definitions for the PMCCNTR and PMCR registers */ +#define PMCRD 0x8 +#define PMCRC 0x4 +#define PMCRE 0x1 #endif static int vfp_gdb_get_reg(CPUARMState *env, uint8_t *buf, int reg) @@ -298,17 +303,21 @@ void init_cpreg_list(ARMCPU *cpu) static void dacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { + ARMCPU *cpu = arm_env_get_cpu(env); + env->cp15.c3 = value; - tlb_flush(env, 1); /* Flush TLB as domain not tracked in TLB */ + tlb_flush(CPU(cpu), 1); /* Flush TLB as domain not tracked in TLB */ } static void fcse_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { + ARMCPU *cpu = arm_env_get_cpu(env); + if (env->cp15.c13_fcse != value) { /* Unlike real hardware the qemu TLB uses virtual addresses, * not modified virtual addresses, so this causes a TLB flush. */ - tlb_flush(env, 1); + tlb_flush(CPU(cpu), 1); env->cp15.c13_fcse = value; } } @@ -316,12 +325,14 @@ static void fcse_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) static void contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { + ARMCPU *cpu = arm_env_get_cpu(env); + if (env->cp15.c13_context != value && !arm_feature(env, ARM_FEATURE_MPU)) { /* For VMSA (when not using the LPAE long descriptor page table * format) this register includes the ASID, so do a TLB flush. * For PMSA it is purely a process ID and no action is needed. */ - tlb_flush(env, 1); + tlb_flush(CPU(cpu), 1); } env->cp15.c13_context = value; } @@ -330,28 +341,36 @@ static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { /* Invalidate all (TLBIALL) */ - tlb_flush(env, 1); + ARMCPU *cpu = arm_env_get_cpu(env); + + tlb_flush(CPU(cpu), 1); } static void tlbimva_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { /* Invalidate single TLB entry by MVA and ASID (TLBIMVA) */ - tlb_flush_page(env, value & TARGET_PAGE_MASK); + ARMCPU *cpu = arm_env_get_cpu(env); + + tlb_flush_page(CPU(cpu), value & TARGET_PAGE_MASK); } static void tlbiasid_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { /* Invalidate by ASID (TLBIASID) */ - tlb_flush(env, value == 0); + ARMCPU *cpu = arm_env_get_cpu(env); + + tlb_flush(CPU(cpu), value == 0); } static void tlbimvaa_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { /* Invalidate single entry by MVA, all ASIDs (TLBIMVAA) */ - tlb_flush_page(env, value & TARGET_PAGE_MASK); + ARMCPU *cpu = arm_env_get_cpu(env); + + tlb_flush_page(CPU(cpu), value & TARGET_PAGE_MASK); } static const ARMCPRegInfo cp_reginfo[] = { @@ -469,7 +488,7 @@ static const ARMCPRegInfo v6_cp_reginfo[] = { static CPAccessResult pmreg_access(CPUARMState *env, const ARMCPRegInfo *ri) { - /* Perfomance monitor registers user accessibility is controlled + /* Performance monitor registers user accessibility is controlled * by PMUSERENR. */ if (arm_current_pl(env) == 0 && !env->cp15.c9_pmuserenr) { @@ -478,13 +497,84 @@ static CPAccessResult pmreg_access(CPUARMState *env, const ARMCPRegInfo *ri) return CP_ACCESS_OK; } +#ifndef CONFIG_USER_ONLY static void pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { + /* Don't computer the number of ticks in user mode */ + uint32_t temp_ticks; + + temp_ticks = qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) * + get_ticks_per_sec() / 1000000; + + if (env->cp15.c9_pmcr & PMCRE) { + /* If the counter is enabled */ + if (env->cp15.c9_pmcr & PMCRD) { + /* Increment once every 64 processor clock cycles */ + env->cp15.c15_ccnt = (temp_ticks/64) - env->cp15.c15_ccnt; + } else { + env->cp15.c15_ccnt = temp_ticks - env->cp15.c15_ccnt; + } + } + + if (value & PMCRC) { + /* The counter has been reset */ + env->cp15.c15_ccnt = 0; + } + /* only the DP, X, D and E bits are writable */ env->cp15.c9_pmcr &= ~0x39; env->cp15.c9_pmcr |= (value & 0x39); + + if (env->cp15.c9_pmcr & PMCRE) { + if (env->cp15.c9_pmcr & PMCRD) { + /* Increment once every 64 processor clock cycles */ + temp_ticks /= 64; + } + env->cp15.c15_ccnt = temp_ticks - env->cp15.c15_ccnt; + } +} + +static uint64_t pmccntr_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + uint32_t total_ticks; + + if (!(env->cp15.c9_pmcr & PMCRE)) { + /* Counter is disabled, do not change value */ + return env->cp15.c15_ccnt; + } + + total_ticks = qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) * + get_ticks_per_sec() / 1000000; + + if (env->cp15.c9_pmcr & PMCRD) { + /* Increment once every 64 processor clock cycles */ + total_ticks /= 64; + } + return total_ticks - env->cp15.c15_ccnt; +} + +static void pmccntr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + uint32_t total_ticks; + + if (!(env->cp15.c9_pmcr & PMCRE)) { + /* Counter is disabled, set the absolute value */ + env->cp15.c15_ccnt = value; + return; + } + + total_ticks = qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) * + get_ticks_per_sec() / 1000000; + + if (env->cp15.c9_pmcr & PMCRD) { + /* Increment once every 64 processor clock cycles */ + total_ticks /= 64; + } + env->cp15.c15_ccnt = total_ticks - value; } +#endif static void pmcntenset_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) @@ -604,10 +694,12 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { { .name = "PMSELR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 5, .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0, .accessfn = pmreg_access }, - /* Unimplemented, RAZ/WI. */ +#ifndef CONFIG_USER_ONLY { .name = "PMCCNTR", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 0, - .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .access = PL0_RW, .resetvalue = 0, .type = ARM_CP_IO, + .readfn = pmccntr_read, .writefn = pmccntr_write, .accessfn = pmreg_access }, +#endif { .name = "PMXEVTYPER", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 1, .access = PL0_RW, .fieldoffset = offsetof(CPUARMState, cp15.c9_pmxevtyper), @@ -1270,11 +1362,13 @@ static void vmsa_ttbcr_raw_write(CPUARMState *env, const ARMCPRegInfo *ri, static void vmsa_ttbcr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { + ARMCPU *cpu = arm_env_get_cpu(env); + if (arm_feature(env, ARM_FEATURE_LPAE)) { /* With LPAE the TTBCR could result in a change of ASID * via the TTBCR.A1 bit, so do a TLB flush. */ - tlb_flush(env, 1); + tlb_flush(CPU(cpu), 1); } vmsa_ttbcr_raw_write(env, ri, value); } @@ -1289,8 +1383,10 @@ static void vmsa_ttbcr_reset(CPUARMState *env, const ARMCPRegInfo *ri) static void vmsa_tcr_el1_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { + ARMCPU *cpu = arm_env_get_cpu(env); + /* For AArch64 the A1 bit could result in a change of ASID, so TLB flush. */ - tlb_flush(env, 1); + tlb_flush(CPU(cpu), 1); env->cp15.c2_control = value; } @@ -1301,7 +1397,9 @@ static void vmsa_ttbr_write(CPUARMState *env, const ARMCPRegInfo *ri, * must flush the TLB. */ if (cpreg_field_is_64bit(ri)) { - tlb_flush(env, 1); + ARMCPU *cpu = arm_env_get_cpu(env); + + tlb_flush(CPU(cpu), 1); } raw_write(env, ri, value); } @@ -1608,24 +1706,27 @@ static void tlbi_aa64_va_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { /* Invalidate by VA (AArch64 version) */ + ARMCPU *cpu = arm_env_get_cpu(env); uint64_t pageaddr = value << 12; - tlb_flush_page(env, pageaddr); + tlb_flush_page(CPU(cpu), pageaddr); } static void tlbi_aa64_vaa_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { /* Invalidate by VA, all ASIDs (AArch64 version) */ + ARMCPU *cpu = arm_env_get_cpu(env); uint64_t pageaddr = value << 12; - tlb_flush_page(env, pageaddr); + tlb_flush_page(CPU(cpu), pageaddr); } static void tlbi_aa64_asid_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { /* Invalidate by ASID (AArch64 version) */ + ARMCPU *cpu = arm_env_get_cpu(env); int asid = extract64(value, 48, 16); - tlb_flush(env, asid == 0); + tlb_flush(CPU(cpu), asid == 0); } static const ARMCPRegInfo v8_cp_reginfo[] = { @@ -1751,10 +1852,12 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { + ARMCPU *cpu = arm_env_get_cpu(env); + env->cp15.c1_sys = value; /* ??? Lots of these bits are not implemented. */ /* This may enable/disable the MMU, so do a TLB flush. */ - tlb_flush(env, 1); + tlb_flush(CPU(cpu), 1); } static CPAccessResult ctr_el0_access(CPUARMState *env, const ARMCPRegInfo *ri) @@ -1873,21 +1976,25 @@ void register_cp_regs_for_features(ARMCPU *cpu) } if (arm_feature(env, ARM_FEATURE_V7)) { /* v7 performance monitor control register: same implementor - * field as main ID register, and we implement no event counters. + * field as main ID register, and we implement only the cycle + * count register. */ +#ifndef CONFIG_USER_ONLY ARMCPRegInfo pmcr = { .name = "PMCR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 0, .access = PL0_RW, .resetvalue = cpu->midr & 0xff000000, + .type = ARM_CP_IO, .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcr), .accessfn = pmreg_access, .writefn = pmcr_write, .raw_writefn = raw_write, }; + define_one_arm_cp_reg(cpu, &pmcr); +#endif ARMCPRegInfo clidr = { .name = "CLIDR", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = cpu->clidr }; - define_one_arm_cp_reg(cpu, &pmcr); define_one_arm_cp_reg(cpu, &clidr); define_arm_cp_regs(cpu, v7_cp_reginfo); } else { @@ -2105,19 +2212,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) ARMCPU *cpu_arm_init(const char *cpu_model) { - ARMCPU *cpu; - ObjectClass *oc; - - oc = cpu_class_by_name(TYPE_ARM_CPU, cpu_model); - if (!oc) { - return NULL; - } - cpu = ARM_CPU(object_new(object_class_get_name(oc))); - - /* TODO this should be set centrally, once possible */ - object_property_set_bool(OBJECT(cpu), true, "realized", NULL); - - return cpu; + return ARM_CPU(cpu_generic_init(TYPE_ARM_CPU, cpu_model)); } void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu) @@ -2478,7 +2573,7 @@ uint32_t cpsr_read(CPUARMState *env) (env->CF << 29) | ((env->VF & 0x80000000) >> 3) | (env->QF << 27) | (env->thumb << 5) | ((env->condexec_bits & 3) << 25) | ((env->condexec_bits & 0xfc) << 8) - | (env->GE << 16) | env->daif; + | (env->GE << 16) | (env->daif & CPSR_AIF); } void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask) @@ -2580,20 +2675,20 @@ uint32_t HELPER(rbit)(uint32_t x) void arm_cpu_do_interrupt(CPUState *cs) { - ARMCPU *cpu = ARM_CPU(cs); - CPUARMState *env = &cpu->env; - - env->exception_index = -1; + cs->exception_index = -1; } -int cpu_arm_handle_mmu_fault (CPUARMState *env, target_ulong address, int rw, - int mmu_idx) +int arm_cpu_handle_mmu_fault(CPUState *cs, vaddr address, int rw, + int mmu_idx) { + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + if (rw == 2) { - env->exception_index = EXCP_PREFETCH_ABORT; + cs->exception_index = EXCP_PREFETCH_ABORT; env->cp15.c6_insn = address; } else { - env->exception_index = EXCP_DATA_ABORT; + cs->exception_index = EXCP_DATA_ABORT; env->cp15.c6_data = address; } return 1; @@ -2602,29 +2697,40 @@ int cpu_arm_handle_mmu_fault (CPUARMState *env, target_ulong address, int rw, /* These should probably raise undefined insn exceptions. */ void HELPER(v7m_msr)(CPUARMState *env, uint32_t reg, uint32_t val) { - cpu_abort(env, "v7m_mrs %d\n", reg); + ARMCPU *cpu = arm_env_get_cpu(env); + + cpu_abort(CPU(cpu), "v7m_msr %d\n", reg); } uint32_t HELPER(v7m_mrs)(CPUARMState *env, uint32_t reg) { - cpu_abort(env, "v7m_mrs %d\n", reg); + ARMCPU *cpu = arm_env_get_cpu(env); + + cpu_abort(CPU(cpu), "v7m_mrs %d\n", reg); return 0; } void switch_mode(CPUARMState *env, int mode) { - if (mode != ARM_CPU_MODE_USR) - cpu_abort(env, "Tried to switch out of user mode\n"); + ARMCPU *cpu = arm_env_get_cpu(env); + + if (mode != ARM_CPU_MODE_USR) { + cpu_abort(CPU(cpu), "Tried to switch out of user mode\n"); + } } void HELPER(set_r13_banked)(CPUARMState *env, uint32_t mode, uint32_t val) { - cpu_abort(env, "banked r13 write\n"); + ARMCPU *cpu = arm_env_get_cpu(env); + + cpu_abort(CPU(cpu), "banked r13 write\n"); } uint32_t HELPER(get_r13_banked)(CPUARMState *env, uint32_t mode) { - cpu_abort(env, "banked r13 read\n"); + ARMCPU *cpu = arm_env_get_cpu(env); + + cpu_abort(CPU(cpu), "banked r13 read\n"); return 0; } @@ -2681,15 +2787,17 @@ void switch_mode(CPUARMState *env, int mode) static void v7m_push(CPUARMState *env, uint32_t val) { - CPUState *cs = ENV_GET_CPU(env); + CPUState *cs = CPU(arm_env_get_cpu(env)); + env->regs[13] -= 4; stl_phys(cs->as, env->regs[13], val); } static uint32_t v7m_pop(CPUARMState *env) { - CPUState *cs = ENV_GET_CPU(env); + CPUState *cs = CPU(arm_env_get_cpu(env)); uint32_t val; + val = ldl_phys(cs->as, env->regs[13]); env->regs[13] += 4; return val; @@ -2777,7 +2885,7 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs) uint32_t lr; uint32_t addr; - arm_log_exception(env->exception_index); + arm_log_exception(cs->exception_index); lr = 0xfffffff1; if (env->v7m.current_sp) @@ -2789,7 +2897,7 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs) handle it. */ /* TODO: Need to escalate if the current priority is higher than the one we're raising. */ - switch (env->exception_index) { + switch (cs->exception_index) { case EXCP_UDEF: armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE); return; @@ -2821,7 +2929,7 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs) do_v7m_exception_exit(env); return; default: - cpu_abort(env, "Unhandled exception 0x%x\n", env->exception_index); + cpu_abort(cs, "Unhandled exception 0x%x\n", cs->exception_index); return; /* Never happens. Keep compiler happy. */ } @@ -2862,10 +2970,10 @@ void arm_cpu_do_interrupt(CPUState *cs) assert(!IS_M(env)); - arm_log_exception(env->exception_index); + arm_log_exception(cs->exception_index); /* TODO: Vectored interrupt controller. */ - switch (env->exception_index) { + switch (cs->exception_index) { case EXCP_UDEF: new_mode = ARM_CPU_MODE_UND; addr = 0x04; @@ -2946,7 +3054,7 @@ void arm_cpu_do_interrupt(CPUState *cs) offset = 4; break; default: - cpu_abort(env, "Unhandled exception 0x%x\n", env->exception_index); + cpu_abort(cs, "Unhandled exception 0x%x\n", cs->exception_index); return; /* Never happens. Keep compiler happy. */ } /* High vectors. */ @@ -3053,7 +3161,7 @@ static int get_phys_addr_v5(CPUARMState *env, uint32_t address, int access_type, int is_user, hwaddr *phys_ptr, int *prot, target_ulong *page_size) { - CPUState *cs = ENV_GET_CPU(env); + CPUState *cs = CPU(arm_env_get_cpu(env)); int code; uint32_t table; uint32_t desc; @@ -3149,7 +3257,7 @@ static int get_phys_addr_v6(CPUARMState *env, uint32_t address, int access_type, int is_user, hwaddr *phys_ptr, int *prot, target_ulong *page_size) { - CPUState *cs = ENV_GET_CPU(env); + CPUState *cs = CPU(arm_env_get_cpu(env)); int code; uint32_t table; uint32_t desc; @@ -3272,7 +3380,7 @@ static int get_phys_addr_lpae(CPUARMState *env, uint32_t address, hwaddr *phys_ptr, int *prot, target_ulong *page_size_ptr) { - CPUState *cs = ENV_GET_CPU(env); + CPUState *cs = CPU(arm_env_get_cpu(env)); /* Read an LPAE long-descriptor translation table. */ MMUFaultType fault_type = translation_fault; uint32_t level = 1; @@ -3552,9 +3660,11 @@ static inline int get_phys_addr(CPUARMState *env, uint32_t address, } } -int cpu_arm_handle_mmu_fault (CPUARMState *env, target_ulong address, - int access_type, int mmu_idx) +int arm_cpu_handle_mmu_fault(CPUState *cs, vaddr address, + int access_type, int mmu_idx) { + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; hwaddr phys_addr; target_ulong page_size; int prot; @@ -3567,20 +3677,20 @@ int cpu_arm_handle_mmu_fault (CPUARMState *env, target_ulong address, /* Map a single [sub]page. */ phys_addr &= ~(hwaddr)0x3ff; address &= ~(uint32_t)0x3ff; - tlb_set_page (env, address, phys_addr, prot, mmu_idx, page_size); + tlb_set_page(cs, address, phys_addr, prot, mmu_idx, page_size); return 0; } if (access_type == 2) { env->cp15.c5_insn = ret; env->cp15.c6_insn = address; - env->exception_index = EXCP_PREFETCH_ABORT; + cs->exception_index = EXCP_PREFETCH_ABORT; } else { env->cp15.c5_data = ret; if (access_type == 1 && arm_feature(env, ARM_FEATURE_V6)) env->cp15.c5_data |= (1 << 11); env->cp15.c6_data = address; - env->exception_index = EXCP_DATA_ABORT; + cs->exception_index = EXCP_DATA_ABORT; } return 1; } @@ -3622,6 +3732,8 @@ uint32_t HELPER(get_r13_banked)(CPUARMState *env, uint32_t mode) uint32_t HELPER(v7m_mrs)(CPUARMState *env, uint32_t reg) { + ARMCPU *cpu = arm_env_get_cpu(env); + switch (reg) { case 0: /* APSR */ return xpsr_read(env) & 0xf8000000; @@ -3652,13 +3764,15 @@ uint32_t HELPER(v7m_mrs)(CPUARMState *env, uint32_t reg) return env->v7m.control; default: /* ??? For debugging only. */ - cpu_abort(env, "Unimplemented system register read (%d)\n", reg); + cpu_abort(CPU(cpu), "Unimplemented system register read (%d)\n", reg); return 0; } } void HELPER(v7m_msr)(CPUARMState *env, uint32_t reg, uint32_t val) { + ARMCPU *cpu = arm_env_get_cpu(env); + switch (reg) { case 0: /* APSR */ xpsr_write(env, val, 0xf8000000); @@ -3721,7 +3835,7 @@ void HELPER(v7m_msr)(CPUARMState *env, uint32_t reg, uint32_t val) break; default: /* ??? For debugging only. */ - cpu_abort(env, "Unimplemented system register write (%d)\n", reg); + cpu_abort(CPU(cpu), "Unimplemented system register write (%d)\n", reg); return; } } @@ -4406,16 +4520,21 @@ float32 HELPER(rsqrts_f32)(float32 a, float32 b, CPUARMState *env) * int->float conversions at run-time. */ #define float64_256 make_float64(0x4070000000000000LL) #define float64_512 make_float64(0x4080000000000000LL) +#define float32_maxnorm make_float32(0x7f7fffff) +#define float64_maxnorm make_float64(0x7fefffffffffffffLL) -/* The algorithm that must be used to calculate the estimate - * is specified by the ARM ARM. +/* Reciprocal functions + * + * The algorithm that must be used to calculate the estimate + * is specified by the ARM ARM, see FPRecipEstimate() */ -static float64 recip_estimate(float64 a, CPUARMState *env) + +static float64 recip_estimate(float64 a, float_status *real_fp_status) { /* These calculations mustn't set any fp exception flags, * so we use a local copy of the fp_status. */ - float_status dummy_status = env->vfp.standard_fp_status; + float_status dummy_status = *real_fp_status; float_status *s = &dummy_status; /* q = (int)(a * 512.0) */ float64 q = float64_mul(float64_512, a, s); @@ -4436,56 +4555,178 @@ static float64 recip_estimate(float64 a, CPUARMState *env) return float64_div(int64_to_float64(q_int, s), float64_256, s); } -float32 HELPER(recpe_f32)(float32 a, CPUARMState *env) +/* Common wrapper to call recip_estimate */ +static float64 call_recip_estimate(float64 num, int off, float_status *fpst) { - float_status *s = &env->vfp.standard_fp_status; - float64 f64; - uint32_t val32 = float32_val(a); + uint64_t val64 = float64_val(num); + uint64_t frac = extract64(val64, 0, 52); + int64_t exp = extract64(val64, 52, 11); + uint64_t sbit; + float64 scaled, estimate; - int result_exp; - int a_exp = (val32 & 0x7f800000) >> 23; - int sign = val32 & 0x80000000; + /* Generate the scaled number for the estimate function */ + if (exp == 0) { + if (extract64(frac, 51, 1) == 0) { + exp = -1; + frac = extract64(frac, 0, 50) << 2; + } else { + frac = extract64(frac, 0, 51) << 1; + } + } - if (float32_is_any_nan(a)) { - if (float32_is_signaling_nan(a)) { - float_raise(float_flag_invalid, s); + /* scaled = '0' : '01111111110' : fraction<51:44> : Zeros(44); */ + scaled = make_float64((0x3feULL << 52) + | extract64(frac, 44, 8) << 44); + + estimate = recip_estimate(scaled, fpst); + + /* Build new result */ + val64 = float64_val(estimate); + sbit = 0x8000000000000000ULL & val64; + exp = off - exp; + frac = extract64(val64, 0, 52); + + if (exp == 0) { + frac = 1ULL << 51 | extract64(frac, 1, 51); + } else if (exp == -1) { + frac = 1ULL << 50 | extract64(frac, 2, 50); + exp = 0; + } + + return make_float64(sbit | (exp << 52) | frac); +} + +static bool round_to_inf(float_status *fpst, bool sign_bit) +{ + switch (fpst->float_rounding_mode) { + case float_round_nearest_even: /* Round to Nearest */ + return true; + case float_round_up: /* Round to +Inf */ + return !sign_bit; + case float_round_down: /* Round to -Inf */ + return sign_bit; + case float_round_to_zero: /* Round to Zero */ + return false; + } + + g_assert_not_reached(); +} + +float32 HELPER(recpe_f32)(float32 input, void *fpstp) +{ + float_status *fpst = fpstp; + float32 f32 = float32_squash_input_denormal(input, fpst); + uint32_t f32_val = float32_val(f32); + uint32_t f32_sbit = 0x80000000ULL & f32_val; + int32_t f32_exp = extract32(f32_val, 23, 8); + uint32_t f32_frac = extract32(f32_val, 0, 23); + float64 f64, r64; + uint64_t r64_val; + int64_t r64_exp; + uint64_t r64_frac; + + if (float32_is_any_nan(f32)) { + float32 nan = f32; + if (float32_is_signaling_nan(f32)) { + float_raise(float_flag_invalid, fpst); + nan = float32_maybe_silence_nan(f32); } - return float32_default_nan; - } else if (float32_is_infinity(a)) { - return float32_set_sign(float32_zero, float32_is_neg(a)); - } else if (float32_is_zero_or_denormal(a)) { - if (!float32_is_zero(a)) { - float_raise(float_flag_input_denormal, s); + if (fpst->default_nan_mode) { + nan = float32_default_nan; } - float_raise(float_flag_divbyzero, s); - return float32_set_sign(float32_infinity, float32_is_neg(a)); - } else if (a_exp >= 253) { - float_raise(float_flag_underflow, s); - return float32_set_sign(float32_zero, float32_is_neg(a)); + return nan; + } else if (float32_is_infinity(f32)) { + return float32_set_sign(float32_zero, float32_is_neg(f32)); + } else if (float32_is_zero(f32)) { + float_raise(float_flag_divbyzero, fpst); + return float32_set_sign(float32_infinity, float32_is_neg(f32)); + } else if ((f32_val & ~(1ULL << 31)) < (1ULL << 21)) { + /* Abs(value) < 2.0^-128 */ + float_raise(float_flag_overflow | float_flag_inexact, fpst); + if (round_to_inf(fpst, f32_sbit)) { + return float32_set_sign(float32_infinity, float32_is_neg(f32)); + } else { + return float32_set_sign(float32_maxnorm, float32_is_neg(f32)); + } + } else if (f32_exp >= 253 && fpst->flush_to_zero) { + float_raise(float_flag_underflow, fpst); + return float32_set_sign(float32_zero, float32_is_neg(f32)); } - f64 = make_float64((0x3feULL << 52) - | ((int64_t)(val32 & 0x7fffff) << 29)); - result_exp = 253 - a_exp; + f64 = make_float64(((int64_t)(f32_exp) << 52) | (int64_t)(f32_frac) << 29); + r64 = call_recip_estimate(f64, 253, fpst); + r64_val = float64_val(r64); + r64_exp = extract64(r64_val, 52, 11); + r64_frac = extract64(r64_val, 0, 52); - f64 = recip_estimate(f64, env); + /* result = sign : result_exp<7:0> : fraction<51:29>; */ + return make_float32(f32_sbit | + (r64_exp & 0xff) << 23 | + extract64(r64_frac, 29, 24)); +} - val32 = sign - | ((result_exp & 0xff) << 23) - | ((float64_val(f64) >> 29) & 0x7fffff); - return make_float32(val32); +float64 HELPER(recpe_f64)(float64 input, void *fpstp) +{ + float_status *fpst = fpstp; + float64 f64 = float64_squash_input_denormal(input, fpst); + uint64_t f64_val = float64_val(f64); + uint64_t f64_sbit = 0x8000000000000000ULL & f64_val; + int64_t f64_exp = extract64(f64_val, 52, 11); + float64 r64; + uint64_t r64_val; + int64_t r64_exp; + uint64_t r64_frac; + + /* Deal with any special cases */ + if (float64_is_any_nan(f64)) { + float64 nan = f64; + if (float64_is_signaling_nan(f64)) { + float_raise(float_flag_invalid, fpst); + nan = float64_maybe_silence_nan(f64); + } + if (fpst->default_nan_mode) { + nan = float64_default_nan; + } + return nan; + } else if (float64_is_infinity(f64)) { + return float64_set_sign(float64_zero, float64_is_neg(f64)); + } else if (float64_is_zero(f64)) { + float_raise(float_flag_divbyzero, fpst); + return float64_set_sign(float64_infinity, float64_is_neg(f64)); + } else if ((f64_val & ~(1ULL << 63)) < (1ULL << 50)) { + /* Abs(value) < 2.0^-1024 */ + float_raise(float_flag_overflow | float_flag_inexact, fpst); + if (round_to_inf(fpst, f64_sbit)) { + return float64_set_sign(float64_infinity, float64_is_neg(f64)); + } else { + return float64_set_sign(float64_maxnorm, float64_is_neg(f64)); + } + } else if (f64_exp >= 1023 && fpst->flush_to_zero) { + float_raise(float_flag_underflow, fpst); + return float64_set_sign(float64_zero, float64_is_neg(f64)); + } + + r64 = call_recip_estimate(f64, 2045, fpst); + r64_val = float64_val(r64); + r64_exp = extract64(r64_val, 52, 11); + r64_frac = extract64(r64_val, 0, 52); + + /* result = sign : result_exp<10:0> : fraction<51:0> */ + return make_float64(f64_sbit | + ((r64_exp & 0x7ff) << 52) | + r64_frac); } /* The algorithm that must be used to calculate the estimate * is specified by the ARM ARM. */ -static float64 recip_sqrt_estimate(float64 a, CPUARMState *env) +static float64 recip_sqrt_estimate(float64 a, float_status *real_fp_status) { /* These calculations mustn't set any fp exception flags, * so we use a local copy of the fp_status. */ - float_status dummy_status = env->vfp.standard_fp_status; + float_status dummy_status = *real_fp_status; float_status *s = &dummy_status; float64 q; int64_t q_int; @@ -4532,49 +4773,64 @@ static float64 recip_sqrt_estimate(float64 a, CPUARMState *env) return float64_div(int64_to_float64(q_int, s), float64_256, s); } -float32 HELPER(rsqrte_f32)(float32 a, CPUARMState *env) +float32 HELPER(rsqrte_f32)(float32 input, void *fpstp) { - float_status *s = &env->vfp.standard_fp_status; + float_status *s = fpstp; + float32 f32 = float32_squash_input_denormal(input, s); + uint32_t val = float32_val(f32); + uint32_t f32_sbit = 0x80000000 & val; + int32_t f32_exp = extract32(val, 23, 8); + uint32_t f32_frac = extract32(val, 0, 23); + uint64_t f64_frac; + uint64_t val64; int result_exp; float64 f64; - uint32_t val; - uint64_t val64; - val = float32_val(a); - - if (float32_is_any_nan(a)) { - if (float32_is_signaling_nan(a)) { + if (float32_is_any_nan(f32)) { + float32 nan = f32; + if (float32_is_signaling_nan(f32)) { float_raise(float_flag_invalid, s); + nan = float32_maybe_silence_nan(f32); } - return float32_default_nan; - } else if (float32_is_zero_or_denormal(a)) { - if (!float32_is_zero(a)) { - float_raise(float_flag_input_denormal, s); + if (s->default_nan_mode) { + nan = float32_default_nan; } + return nan; + } else if (float32_is_zero(f32)) { float_raise(float_flag_divbyzero, s); - return float32_set_sign(float32_infinity, float32_is_neg(a)); - } else if (float32_is_neg(a)) { + return float32_set_sign(float32_infinity, float32_is_neg(f32)); + } else if (float32_is_neg(f32)) { float_raise(float_flag_invalid, s); return float32_default_nan; - } else if (float32_is_infinity(a)) { + } else if (float32_is_infinity(f32)) { return float32_zero; } - /* Normalize to a double-precision value between 0.25 and 1.0, + /* Scale and normalize to a double-precision value between 0.25 and 1.0, * preserving the parity of the exponent. */ - if ((val & 0x800000) == 0) { - f64 = make_float64(((uint64_t)(val & 0x80000000) << 32) + + f64_frac = ((uint64_t) f32_frac) << 29; + if (f32_exp == 0) { + while (extract64(f64_frac, 51, 1) == 0) { + f64_frac = f64_frac << 1; + f32_exp = f32_exp-1; + } + f64_frac = extract64(f64_frac, 0, 51) << 1; + } + + if (extract64(f32_exp, 0, 1) == 0) { + f64 = make_float64(((uint64_t) f32_sbit) << 32 | (0x3feULL << 52) - | ((uint64_t)(val & 0x7fffff) << 29)); + | f64_frac); } else { - f64 = make_float64(((uint64_t)(val & 0x80000000) << 32) + f64 = make_float64(((uint64_t) f32_sbit) << 32 | (0x3fdULL << 52) - | ((uint64_t)(val & 0x7fffff) << 29)); + | f64_frac); } - result_exp = (380 - ((val & 0x7f800000) >> 23)) / 2; + result_exp = (380 - f32_exp) / 2; - f64 = recip_sqrt_estimate(f64, env); + f64 = recip_sqrt_estimate(f64, s); val64 = float64_val(f64); @@ -4583,8 +4839,72 @@ float32 HELPER(rsqrte_f32)(float32 a, CPUARMState *env) return make_float32(val); } -uint32_t HELPER(recpe_u32)(uint32_t a, CPUARMState *env) +float64 HELPER(rsqrte_f64)(float64 input, void *fpstp) { + float_status *s = fpstp; + float64 f64 = float64_squash_input_denormal(input, s); + uint64_t val = float64_val(f64); + uint64_t f64_sbit = 0x8000000000000000ULL & val; + int64_t f64_exp = extract64(val, 52, 11); + uint64_t f64_frac = extract64(val, 0, 52); + int64_t result_exp; + uint64_t result_frac; + + if (float64_is_any_nan(f64)) { + float64 nan = f64; + if (float64_is_signaling_nan(f64)) { + float_raise(float_flag_invalid, s); + nan = float64_maybe_silence_nan(f64); + } + if (s->default_nan_mode) { + nan = float64_default_nan; + } + return nan; + } else if (float64_is_zero(f64)) { + float_raise(float_flag_divbyzero, s); + return float64_set_sign(float64_infinity, float64_is_neg(f64)); + } else if (float64_is_neg(f64)) { + float_raise(float_flag_invalid, s); + return float64_default_nan; + } else if (float64_is_infinity(f64)) { + return float64_zero; + } + + /* Scale and normalize to a double-precision value between 0.25 and 1.0, + * preserving the parity of the exponent. */ + + if (f64_exp == 0) { + while (extract64(f64_frac, 51, 1) == 0) { + f64_frac = f64_frac << 1; + f64_exp = f64_exp - 1; + } + f64_frac = extract64(f64_frac, 0, 51) << 1; + } + + if (extract64(f64_exp, 0, 1) == 0) { + f64 = make_float64(f64_sbit + | (0x3feULL << 52) + | f64_frac); + } else { + f64 = make_float64(f64_sbit + | (0x3fdULL << 52) + | f64_frac); + } + + result_exp = (3068 - f64_exp) / 2; + + f64 = recip_sqrt_estimate(f64, s); + + result_frac = extract64(float64_val(f64), 0, 52); + + return make_float64(f64_sbit | + ((result_exp & 0x7ff) << 52) | + result_frac); +} + +uint32_t HELPER(recpe_u32)(uint32_t a, void *fpstp) +{ + float_status *s = fpstp; float64 f64; if ((a & 0x80000000) == 0) { @@ -4594,13 +4914,14 @@ uint32_t HELPER(recpe_u32)(uint32_t a, CPUARMState *env) f64 = make_float64((0x3feULL << 52) | ((int64_t)(a & 0x7fffffff) << 21)); - f64 = recip_estimate (f64, env); + f64 = recip_estimate(f64, s); return 0x80000000 | ((float64_val(f64) >> 21) & 0x7fffffff); } -uint32_t HELPER(rsqrte_u32)(uint32_t a, CPUARMState *env) +uint32_t HELPER(rsqrte_u32)(uint32_t a, void *fpstp) { + float_status *fpst = fpstp; float64 f64; if ((a & 0xc0000000) == 0) { @@ -4615,7 +4936,7 @@ uint32_t HELPER(rsqrte_u32)(uint32_t a, CPUARMState *env) | ((uint64_t)(a & 0x3fffffff) << 22)); } - f64 = recip_sqrt_estimate(f64, env); + f64 = recip_sqrt_estimate(f64, fpst); return 0x80000000 | ((float64_val(f64) >> 21) & 0x7fffffff); } diff --git a/target-arm/helper.h b/target-arm/helper.h index 276f3a9149..366c1b3ea5 100644 --- a/target-arm/helper.h +++ b/target-arm/helper.h @@ -50,6 +50,7 @@ DEF_HELPER_FLAGS_3(sel_flags, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) DEF_HELPER_2(exception, void, env, i32) DEF_HELPER_1(wfi, void, env) +DEF_HELPER_1(wfe, void, env) DEF_HELPER_3(cpsr_write, void, env, i32, i32) DEF_HELPER_1(cpsr_read, i32, env) @@ -166,10 +167,12 @@ DEF_HELPER_4(vfp_muladds, f32, f32, f32, f32, ptr) DEF_HELPER_3(recps_f32, f32, f32, f32, env) DEF_HELPER_3(rsqrts_f32, f32, f32, f32, env) -DEF_HELPER_2(recpe_f32, f32, f32, env) -DEF_HELPER_2(rsqrte_f32, f32, f32, env) -DEF_HELPER_2(recpe_u32, i32, i32, env) -DEF_HELPER_2(rsqrte_u32, i32, i32, env) +DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, ptr) +DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, ptr) +DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, ptr) +DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, ptr) +DEF_HELPER_2(recpe_u32, i32, i32, ptr) +DEF_HELPER_FLAGS_2(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32, ptr) DEF_HELPER_5(neon_tbl, i32, env, i32, i32, i32, i32) DEF_HELPER_3(shl_cc, i32, env, i32, i32) @@ -183,12 +186,20 @@ DEF_HELPER_FLAGS_2(rints, TCG_CALL_NO_RWG, f32, f32, ptr) DEF_HELPER_FLAGS_2(rintd, TCG_CALL_NO_RWG, f64, f64, ptr) /* neon_helper.c */ -DEF_HELPER_3(neon_qadd_u8, i32, env, i32, i32) -DEF_HELPER_3(neon_qadd_s8, i32, env, i32, i32) -DEF_HELPER_3(neon_qadd_u16, i32, env, i32, i32) -DEF_HELPER_3(neon_qadd_s16, i32, env, i32, i32) -DEF_HELPER_3(neon_qadd_u32, i32, env, i32, i32) -DEF_HELPER_3(neon_qadd_s32, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_qadd_u8, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_qadd_s8, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_qadd_u16, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_qadd_s16, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_qadd_u32, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_qadd_s32, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_uqadd_s8, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_uqadd_s16, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_uqadd_s32, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_uqadd_s64, TCG_CALL_NO_RWG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(neon_sqadd_u8, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_sqadd_u16, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_sqadd_u32, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_3(neon_sqadd_u64, TCG_CALL_NO_RWG, i64, env, i64, i64) DEF_HELPER_3(neon_qsub_u8, i32, env, i32, i32) DEF_HELPER_3(neon_qsub_s8, i32, env, i32, i32) DEF_HELPER_3(neon_qsub_u16, i32, env, i32, i32) @@ -372,12 +383,14 @@ DEF_HELPER_2(neon_mull_s16, i64, i32, i32) DEF_HELPER_1(neon_negl_u16, i64, i64) DEF_HELPER_1(neon_negl_u32, i64, i64) -DEF_HELPER_2(neon_qabs_s8, i32, env, i32) -DEF_HELPER_2(neon_qabs_s16, i32, env, i32) -DEF_HELPER_2(neon_qabs_s32, i32, env, i32) -DEF_HELPER_2(neon_qneg_s8, i32, env, i32) -DEF_HELPER_2(neon_qneg_s16, i32, env, i32) -DEF_HELPER_2(neon_qneg_s32, i32, env, i32) +DEF_HELPER_FLAGS_2(neon_qabs_s8, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(neon_qabs_s16, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(neon_qabs_s32, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(neon_qabs_s64, TCG_CALL_NO_RWG, i64, env, i64) +DEF_HELPER_FLAGS_2(neon_qneg_s8, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(neon_qneg_s16, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(neon_qneg_s32, TCG_CALL_NO_RWG, i32, env, i32) +DEF_HELPER_FLAGS_2(neon_qneg_s64, TCG_CALL_NO_RWG, i64, env, i64) DEF_HELPER_3(neon_abd_f32, i32, i32, i32, ptr) DEF_HELPER_3(neon_ceq_f32, i32, i32, i32, ptr) diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c index 13752baf63..8d6f9a92f2 100644 --- a/target-arm/neon_helper.c +++ b/target-arm/neon_helper.c @@ -236,6 +236,171 @@ uint64_t HELPER(neon_qadd_s64)(CPUARMState *env, uint64_t src1, uint64_t src2) return res; } +/* Unsigned saturating accumulate of signed value + * + * Op1/Rn is treated as signed + * Op2/Rd is treated as unsigned + * + * Explicit casting is used to ensure the correct sign extension of + * inputs. The result is treated as a unsigned value and saturated as such. + * + * We use a macro for the 8/16 bit cases which expects signed integers of va, + * vb, and vr for interim calculation and an unsigned 32 bit result value r. + */ + +#define USATACC(bits, shift) \ + do { \ + va = sextract32(a, shift, bits); \ + vb = extract32(b, shift, bits); \ + vr = va + vb; \ + if (vr > UINT##bits##_MAX) { \ + SET_QC(); \ + vr = UINT##bits##_MAX; \ + } else if (vr < 0) { \ + SET_QC(); \ + vr = 0; \ + } \ + r = deposit32(r, shift, bits, vr); \ + } while (0) + +uint32_t HELPER(neon_uqadd_s8)(CPUARMState *env, uint32_t a, uint32_t b) +{ + int16_t va, vb, vr; + uint32_t r = 0; + + USATACC(8, 0); + USATACC(8, 8); + USATACC(8, 16); + USATACC(8, 24); + return r; +} + +uint32_t HELPER(neon_uqadd_s16)(CPUARMState *env, uint32_t a, uint32_t b) +{ + int32_t va, vb, vr; + uint64_t r = 0; + + USATACC(16, 0); + USATACC(16, 16); + return r; +} + +#undef USATACC + +uint32_t HELPER(neon_uqadd_s32)(CPUARMState *env, uint32_t a, uint32_t b) +{ + int64_t va = (int32_t)a; + int64_t vb = (uint32_t)b; + int64_t vr = va + vb; + if (vr > UINT32_MAX) { + SET_QC(); + vr = UINT32_MAX; + } else if (vr < 0) { + SET_QC(); + vr = 0; + } + return vr; +} + +uint64_t HELPER(neon_uqadd_s64)(CPUARMState *env, uint64_t a, uint64_t b) +{ + uint64_t res; + res = a + b; + /* We only need to look at the pattern of SIGN bits to detect + * +ve/-ve saturation + */ + if (~a & b & ~res & SIGNBIT64) { + SET_QC(); + res = UINT64_MAX; + } else if (a & ~b & res & SIGNBIT64) { + SET_QC(); + res = 0; + } + return res; +} + +/* Signed saturating accumulate of unsigned value + * + * Op1/Rn is treated as unsigned + * Op2/Rd is treated as signed + * + * The result is treated as a signed value and saturated as such + * + * We use a macro for the 8/16 bit cases which expects signed integers of va, + * vb, and vr for interim calculation and an unsigned 32 bit result value r. + */ + +#define SSATACC(bits, shift) \ + do { \ + va = extract32(a, shift, bits); \ + vb = sextract32(b, shift, bits); \ + vr = va + vb; \ + if (vr > INT##bits##_MAX) { \ + SET_QC(); \ + vr = INT##bits##_MAX; \ + } else if (vr < INT##bits##_MIN) { \ + SET_QC(); \ + vr = INT##bits##_MIN; \ + } \ + r = deposit32(r, shift, bits, vr); \ + } while (0) + +uint32_t HELPER(neon_sqadd_u8)(CPUARMState *env, uint32_t a, uint32_t b) +{ + int16_t va, vb, vr; + uint32_t r = 0; + + SSATACC(8, 0); + SSATACC(8, 8); + SSATACC(8, 16); + SSATACC(8, 24); + return r; +} + +uint32_t HELPER(neon_sqadd_u16)(CPUARMState *env, uint32_t a, uint32_t b) +{ + int32_t va, vb, vr; + uint32_t r = 0; + + SSATACC(16, 0); + SSATACC(16, 16); + + return r; +} + +#undef SSATACC + +uint32_t HELPER(neon_sqadd_u32)(CPUARMState *env, uint32_t a, uint32_t b) +{ + int64_t res; + int64_t op1 = (uint32_t)a; + int64_t op2 = (int32_t)b; + res = op1 + op2; + if (res > INT32_MAX) { + SET_QC(); + res = INT32_MAX; + } else if (res < INT32_MIN) { + SET_QC(); + res = INT32_MIN; + } + return res; +} + +uint64_t HELPER(neon_sqadd_u64)(CPUARMState *env, uint64_t a, uint64_t b) +{ + uint64_t res; + res = a + b; + /* We only need to look at the pattern of SIGN bits to detect an overflow */ + if (((a & res) + | (~b & res) + | (a & ~b)) & SIGNBIT64) { + SET_QC(); + res = INT64_MAX; + } + return res; +} + + #define NEON_USAT(dest, src1, src2, type) do { \ uint32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ if (tmp != (type)tmp) { \ @@ -1776,6 +1941,28 @@ uint32_t HELPER(neon_qneg_s32)(CPUARMState *env, uint32_t x) return x; } +uint64_t HELPER(neon_qabs_s64)(CPUARMState *env, uint64_t x) +{ + if (x == SIGNBIT64) { + SET_QC(); + x = ~SIGNBIT64; + } else if ((int64_t)x < 0) { + x = -x; + } + return x; +} + +uint64_t HELPER(neon_qneg_s64)(CPUARMState *env, uint64_t x) +{ + if (x == SIGNBIT64) { + SET_QC(); + x = ~SIGNBIT64; + } else { + x = -x; + } + return x; +} + /* NEON Float helpers. */ uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b, void *fpstp) { diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c index 7d06d2f9a5..21ff58e754 100644 --- a/target-arm/op_helper.c +++ b/target-arm/op_helper.c @@ -24,8 +24,11 @@ static void raise_exception(CPUARMState *env, int tt) { - env->exception_index = tt; - cpu_loop_exit(env); + ARMCPU *cpu = arm_env_get_cpu(env); + CPUState *cs = CPU(cpu); + + cs->exception_index = tt; + cpu_loop_exit(cs); } uint32_t HELPER(neon_tbl)(CPUARMState *env, uint32_t ireg, uint32_t def, @@ -69,20 +72,24 @@ uint32_t HELPER(neon_tbl)(CPUARMState *env, uint32_t ireg, uint32_t def, #include "exec/softmmu_template.h" /* try to fill the TLB and return an exception if error. If retaddr is - NULL, it means that the function was called in C code (i.e. not - from generated code or from helper.c) */ -void tlb_fill(CPUARMState *env, target_ulong addr, int is_write, int mmu_idx, + * NULL, it means that the function was called in C code (i.e. not + * from generated code or from helper.c) + */ +void tlb_fill(CPUState *cs, target_ulong addr, int is_write, int mmu_idx, uintptr_t retaddr) { int ret; - ret = cpu_arm_handle_mmu_fault(env, addr, is_write, mmu_idx); + ret = arm_cpu_handle_mmu_fault(cs, addr, is_write, mmu_idx); if (unlikely(ret)) { + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + if (retaddr) { /* now we have a real cpu fault */ - cpu_restore_state(env, retaddr); + cpu_restore_state(cs, retaddr); } - raise_exception(env, env->exception_index); + raise_exception(env, cs->exception_index); } } #endif @@ -220,15 +227,28 @@ void HELPER(wfi)(CPUARMState *env) { CPUState *cs = CPU(arm_env_get_cpu(env)); - env->exception_index = EXCP_HLT; + cs->exception_index = EXCP_HLT; cs->halted = 1; - cpu_loop_exit(env); + cpu_loop_exit(cs); +} + +void HELPER(wfe)(CPUARMState *env) +{ + CPUState *cs = CPU(arm_env_get_cpu(env)); + + /* Don't actually halt the CPU, just yield back to top + * level loop + */ + cs->exception_index = EXCP_YIELD; + cpu_loop_exit(cs); } void HELPER(exception)(CPUARMState *env, uint32_t excp) { - env->exception_index = excp; - cpu_loop_exit(env); + CPUState *cs = CPU(arm_env_get_cpu(env)); + + cs->exception_index = excp; + cpu_loop_exit(cs); } uint32_t HELPER(cpsr_read)(CPUARMState *env) diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c index 08ac6591b6..9f0645075e 100644 --- a/target-arm/translate-a64.c +++ b/target-arm/translate-a64.c @@ -73,14 +73,17 @@ typedef struct AArch64DecodeTable { } AArch64DecodeTable; /* Function prototype for gen_ functions for calling Neon helpers */ +typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32); typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32); typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32); typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64); +typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64); typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64); typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64); typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32); typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); +typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64); /* initialize TCG globals. */ void a64_translate_init(void) @@ -210,7 +213,7 @@ static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest) if (use_goto_tb(s, n, dest)) { tcg_gen_goto_tb(n); gen_a64_set_pc_im(dest); - tcg_gen_exit_tb((tcg_target_long)tb + n); + tcg_gen_exit_tb((intptr_t)tb + n); s->is_jmp = DISAS_TB_JUMP; } else { gen_a64_set_pc_im(dest); @@ -3096,12 +3099,11 @@ static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn) /* non-flag setting ops may use SP */ if (!setflags) { - tcg_rn = read_cpu_reg_sp(s, rn, sf); tcg_rd = cpu_reg_sp(s, rd); } else { - tcg_rn = read_cpu_reg(s, rn, sf); tcg_rd = cpu_reg(s, rd); } + tcg_rn = read_cpu_reg_sp(s, rn, sf); tcg_rm = read_cpu_reg(s, rm, sf); ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3); @@ -5828,6 +5830,21 @@ static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src, } } +/* SRI: shift right with insert */ +static void handle_shri_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src, + int size, int shift) +{ + int esize = 8 << size; + + /* shift count same as element size is valid but does nothing; + * special case to avoid potential shift by 64. + */ + if (shift != esize) { + tcg_gen_shri_i64(tcg_src, tcg_src, shift); + tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, 0, esize - shift); + } +} + /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */ static void handle_scalar_simd_shri(DisasContext *s, bool is_u, int immh, int immb, @@ -5838,6 +5855,7 @@ static void handle_scalar_simd_shri(DisasContext *s, int shift = 2 * (8 << size) - immhb; bool accumulate = false; bool round = false; + bool insert = false; TCGv_i64 tcg_rn; TCGv_i64 tcg_rd; TCGv_i64 tcg_round; @@ -5857,6 +5875,9 @@ static void handle_scalar_simd_shri(DisasContext *s, case 0x06: /* SRSRA / URSRA (accum + rounding) */ accumulate = round = true; break; + case 0x08: /* SRI */ + insert = true; + break; } if (round) { @@ -5867,10 +5888,14 @@ static void handle_scalar_simd_shri(DisasContext *s, } tcg_rn = read_fp_dreg(s, rn); - tcg_rd = accumulate ? read_fp_dreg(s, rd) : tcg_temp_new_i64(); + tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64(); - handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, - accumulate, is_u, size, shift); + if (insert) { + handle_shri_with_ins(tcg_rd, tcg_rn, size, shift); + } else { + handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, + accumulate, is_u, size, shift); + } write_fp_dreg(s, rd, tcg_rd); @@ -5908,6 +5933,374 @@ static void handle_scalar_simd_shli(DisasContext *s, bool insert, tcg_temp_free_i64(tcg_rd); } +/* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with + * (signed/unsigned) narrowing */ +static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q, + bool is_u_shift, bool is_u_narrow, + int immh, int immb, int opcode, + int rn, int rd) +{ + int immhb = immh << 3 | immb; + int size = 32 - clz32(immh) - 1; + int esize = 8 << size; + int shift = (2 * esize) - immhb; + int elements = is_scalar ? 1 : (64 / esize); + bool round = extract32(opcode, 0, 1); + TCGMemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN); + TCGv_i64 tcg_rn, tcg_rd, tcg_round; + TCGv_i32 tcg_rd_narrowed; + TCGv_i64 tcg_final; + + static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = { + { gen_helper_neon_narrow_sat_s8, + gen_helper_neon_unarrow_sat8 }, + { gen_helper_neon_narrow_sat_s16, + gen_helper_neon_unarrow_sat16 }, + { gen_helper_neon_narrow_sat_s32, + gen_helper_neon_unarrow_sat32 }, + { NULL, NULL }, + }; + static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = { + gen_helper_neon_narrow_sat_u8, + gen_helper_neon_narrow_sat_u16, + gen_helper_neon_narrow_sat_u32, + NULL + }; + NeonGenNarrowEnvFn *narrowfn; + + int i; + + assert(size < 4); + + if (extract32(immh, 3, 1)) { + unallocated_encoding(s); + return; + } + + if (is_u_shift) { + narrowfn = unsigned_narrow_fns[size]; + } else { + narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0]; + } + + tcg_rn = tcg_temp_new_i64(); + tcg_rd = tcg_temp_new_i64(); + tcg_rd_narrowed = tcg_temp_new_i32(); + tcg_final = tcg_const_i64(0); + + if (round) { + uint64_t round_const = 1ULL << (shift - 1); + tcg_round = tcg_const_i64(round_const); + } else { + TCGV_UNUSED_I64(tcg_round); + } + + for (i = 0; i < elements; i++) { + read_vec_element(s, tcg_rn, rn, i, ldop); + handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, + false, is_u_shift, size+1, shift); + narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd); + tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed); + tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize); + } + + if (!is_q) { + clear_vec_high(s, rd); + write_vec_element(s, tcg_final, rd, 0, MO_64); + } else { + write_vec_element(s, tcg_final, rd, 1, MO_64); + } + + if (round) { + tcg_temp_free_i64(tcg_round); + } + tcg_temp_free_i64(tcg_rn); + tcg_temp_free_i64(tcg_rd); + tcg_temp_free_i32(tcg_rd_narrowed); + tcg_temp_free_i64(tcg_final); + return; +} + +/* SQSHLU, UQSHL, SQSHL: saturating left shifts */ +static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q, + bool src_unsigned, bool dst_unsigned, + int immh, int immb, int rn, int rd) +{ + int immhb = immh << 3 | immb; + int size = 32 - clz32(immh) - 1; + int shift = immhb - (8 << size); + int pass; + + assert(immh != 0); + assert(!(scalar && is_q)); + + if (!scalar) { + if (!is_q && extract32(immh, 3, 1)) { + unallocated_encoding(s); + return; + } + + /* Since we use the variable-shift helpers we must + * replicate the shift count into each element of + * the tcg_shift value. + */ + switch (size) { + case 0: + shift |= shift << 8; + /* fall through */ + case 1: + shift |= shift << 16; + break; + case 2: + case 3: + break; + default: + g_assert_not_reached(); + } + } + + if (size == 3) { + TCGv_i64 tcg_shift = tcg_const_i64(shift); + static NeonGenTwo64OpEnvFn * const fns[2][2] = { + { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 }, + { NULL, gen_helper_neon_qshl_u64 }, + }; + NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned]; + int maxpass = is_q ? 2 : 1; + + for (pass = 0; pass < maxpass; pass++) { + TCGv_i64 tcg_op = tcg_temp_new_i64(); + + read_vec_element(s, tcg_op, rn, pass, MO_64); + genfn(tcg_op, cpu_env, tcg_op, tcg_shift); + write_vec_element(s, tcg_op, rd, pass, MO_64); + + tcg_temp_free_i64(tcg_op); + } + tcg_temp_free_i64(tcg_shift); + + if (!is_q) { + clear_vec_high(s, rd); + } + } else { + TCGv_i32 tcg_shift = tcg_const_i32(shift); + static NeonGenTwoOpEnvFn * const fns[2][2][3] = { + { + { gen_helper_neon_qshl_s8, + gen_helper_neon_qshl_s16, + gen_helper_neon_qshl_s32 }, + { gen_helper_neon_qshlu_s8, + gen_helper_neon_qshlu_s16, + gen_helper_neon_qshlu_s32 } + }, { + { NULL, NULL, NULL }, + { gen_helper_neon_qshl_u8, + gen_helper_neon_qshl_u16, + gen_helper_neon_qshl_u32 } + } + }; + NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size]; + TCGMemOp memop = scalar ? size : MO_32; + int maxpass = scalar ? 1 : is_q ? 4 : 2; + + for (pass = 0; pass < maxpass; pass++) { + TCGv_i32 tcg_op = tcg_temp_new_i32(); + + read_vec_element_i32(s, tcg_op, rn, pass, memop); + genfn(tcg_op, cpu_env, tcg_op, tcg_shift); + if (scalar) { + switch (size) { + case 0: + tcg_gen_ext8u_i32(tcg_op, tcg_op); + break; + case 1: + tcg_gen_ext16u_i32(tcg_op, tcg_op); + break; + case 2: + break; + default: + g_assert_not_reached(); + } + write_fp_sreg(s, rd, tcg_op); + } else { + write_vec_element_i32(s, tcg_op, rd, pass, MO_32); + } + + tcg_temp_free_i32(tcg_op); + } + tcg_temp_free_i32(tcg_shift); + + if (!is_q && !scalar) { + clear_vec_high(s, rd); + } + } +} + +/* Common vector code for handling integer to FP conversion */ +static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn, + int elements, int is_signed, + int fracbits, int size) +{ + bool is_double = size == 3 ? true : false; + TCGv_ptr tcg_fpst = get_fpstatus_ptr(); + TCGv_i32 tcg_shift = tcg_const_i32(fracbits); + TCGv_i64 tcg_int = tcg_temp_new_i64(); + TCGMemOp mop = size | (is_signed ? MO_SIGN : 0); + int pass; + + for (pass = 0; pass < elements; pass++) { + read_vec_element(s, tcg_int, rn, pass, mop); + + if (is_double) { + TCGv_i64 tcg_double = tcg_temp_new_i64(); + if (is_signed) { + gen_helper_vfp_sqtod(tcg_double, tcg_int, + tcg_shift, tcg_fpst); + } else { + gen_helper_vfp_uqtod(tcg_double, tcg_int, + tcg_shift, tcg_fpst); + } + if (elements == 1) { + write_fp_dreg(s, rd, tcg_double); + } else { + write_vec_element(s, tcg_double, rd, pass, MO_64); + } + tcg_temp_free_i64(tcg_double); + } else { + TCGv_i32 tcg_single = tcg_temp_new_i32(); + if (is_signed) { + gen_helper_vfp_sqtos(tcg_single, tcg_int, + tcg_shift, tcg_fpst); + } else { + gen_helper_vfp_uqtos(tcg_single, tcg_int, + tcg_shift, tcg_fpst); + } + if (elements == 1) { + write_fp_sreg(s, rd, tcg_single); + } else { + write_vec_element_i32(s, tcg_single, rd, pass, MO_32); + } + tcg_temp_free_i32(tcg_single); + } + } + + if (!is_double && elements == 2) { + clear_vec_high(s, rd); + } + + tcg_temp_free_i64(tcg_int); + tcg_temp_free_ptr(tcg_fpst); + tcg_temp_free_i32(tcg_shift); +} + +/* UCVTF/SCVTF - Integer to FP conversion */ +static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar, + bool is_q, bool is_u, + int immh, int immb, int opcode, + int rn, int rd) +{ + bool is_double = extract32(immh, 3, 1); + int size = is_double ? MO_64 : MO_32; + int elements; + int immhb = immh << 3 | immb; + int fracbits = (is_double ? 128 : 64) - immhb; + + if (!extract32(immh, 2, 2)) { + unallocated_encoding(s); + return; + } + + if (is_scalar) { + elements = 1; + } else { + elements = is_double ? 2 : is_q ? 4 : 2; + if (is_double && !is_q) { + unallocated_encoding(s); + return; + } + } + /* immh == 0 would be a failure of the decode logic */ + g_assert(immh); + + handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size); +} + +/* FCVTZS, FVCVTZU - FP to fixedpoint conversion */ +static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, + bool is_q, bool is_u, + int immh, int immb, int rn, int rd) +{ + bool is_double = extract32(immh, 3, 1); + int immhb = immh << 3 | immb; + int fracbits = (is_double ? 128 : 64) - immhb; + int pass; + TCGv_ptr tcg_fpstatus; + TCGv_i32 tcg_rmode, tcg_shift; + + if (!extract32(immh, 2, 2)) { + unallocated_encoding(s); + return; + } + + if (!is_scalar && !is_q && is_double) { + unallocated_encoding(s); + return; + } + + assert(!(is_scalar && is_q)); + + tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO)); + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + tcg_fpstatus = get_fpstatus_ptr(); + tcg_shift = tcg_const_i32(fracbits); + + if (is_double) { + int maxpass = is_scalar ? 1 : is_q ? 2 : 1; + + for (pass = 0; pass < maxpass; pass++) { + TCGv_i64 tcg_op = tcg_temp_new_i64(); + + read_vec_element(s, tcg_op, rn, pass, MO_64); + if (is_u) { + gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); + } else { + gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); + } + write_vec_element(s, tcg_op, rd, pass, MO_64); + tcg_temp_free_i64(tcg_op); + } + if (!is_q) { + clear_vec_high(s, rd); + } + } else { + int maxpass = is_scalar ? 1 : is_q ? 4 : 2; + for (pass = 0; pass < maxpass; pass++) { + TCGv_i32 tcg_op = tcg_temp_new_i32(); + + read_vec_element_i32(s, tcg_op, rn, pass, MO_32); + if (is_u) { + gen_helper_vfp_touls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); + } else { + gen_helper_vfp_tosls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); + } + if (is_scalar) { + write_fp_sreg(s, rd, tcg_op); + } else { + write_vec_element_i32(s, tcg_op, rd, pass, MO_32); + } + tcg_temp_free_i32(tcg_op); + } + if (!is_q && !is_scalar) { + clear_vec_high(s, rd); + } + } + + tcg_temp_free_ptr(tcg_fpstatus); + tcg_temp_free_i32(tcg_shift); + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + tcg_temp_free_i32(tcg_rmode); +} + /* C3.6.9 AdvSIMD scalar shift by immediate * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 * +-----+---+-------------+------+------+--------+---+------+------+ @@ -5925,7 +6318,18 @@ static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn) int immh = extract32(insn, 19, 4); bool is_u = extract32(insn, 29, 1); + if (immh == 0) { + unallocated_encoding(s); + return; + } + switch (opcode) { + case 0x08: /* SRI */ + if (!is_u) { + unallocated_encoding(s); + return; + } + /* fall through */ case 0x00: /* SSHR / USHR */ case 0x02: /* SSRA / USRA */ case 0x04: /* SRSHR / URSHR */ @@ -5935,8 +6339,39 @@ static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn) case 0x0a: /* SHL / SLI */ handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd); break; + case 0x1c: /* SCVTF, UCVTF */ + handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb, + opcode, rn, rd); + break; + case 0x10: /* SQSHRUN, SQSHRUN2 */ + case 0x11: /* SQRSHRUN, SQRSHRUN2 */ + if (!is_u) { + unallocated_encoding(s); + return; + } + handle_vec_simd_sqshrn(s, true, false, false, true, + immh, immb, opcode, rn, rd); + break; + case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */ + case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */ + handle_vec_simd_sqshrn(s, true, false, is_u, is_u, + immh, immb, opcode, rn, rd); + break; + case 0xc: /* SQSHLU */ + if (!is_u) { + unallocated_encoding(s); + return; + } + handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd); + break; + case 0xe: /* SQSHL, UQSHL */ + handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd); + break; + case 0x1f: /* FCVTZS, FCVTZU */ + handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd); + break; default: - unsupported_encoding(s, insn); + unallocated_encoding(s); break; } } @@ -6483,21 +6918,38 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn) } static void handle_2misc_64(DisasContext *s, int opcode, bool u, - TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) + TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, + TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus) { /* Handle 64->64 opcodes which are shared between the scalar and * vector 2-reg-misc groups. We cover every integer opcode where size == 3 * is valid in either group and also the double-precision fp ops. + * The caller only need provide tcg_rmode and tcg_fpstatus if the op + * requires them. */ TCGCond cond; switch (opcode) { + case 0x4: /* CLS, CLZ */ + if (u) { + gen_helper_clz64(tcg_rd, tcg_rn); + } else { + gen_helper_cls64(tcg_rd, tcg_rn); + } + break; case 0x5: /* NOT */ /* This opcode is shared with CNT and RBIT but we have earlier * enforced that size == 3 if and only if this is the NOT insn. */ tcg_gen_not_i64(tcg_rd, tcg_rn); break; + case 0x7: /* SQABS, SQNEG */ + if (u) { + gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn); + } else { + gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn); + } + break; case 0xa: /* CMLT */ /* 64 bit integer comparison against zero, result is * test ? (2^64 - 1) : 0. We implement via setcond(!test) and @@ -6531,6 +6983,42 @@ static void handle_2misc_64(DisasContext *s, int opcode, bool u, case 0x6f: /* FNEG */ gen_helper_vfp_negd(tcg_rd, tcg_rn); break; + case 0x7f: /* FSQRT */ + gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env); + break; + case 0x1a: /* FCVTNS */ + case 0x1b: /* FCVTMS */ + case 0x1c: /* FCVTAS */ + case 0x3a: /* FCVTPS */ + case 0x3b: /* FCVTZS */ + { + TCGv_i32 tcg_shift = tcg_const_i32(0); + gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus); + tcg_temp_free_i32(tcg_shift); + break; + } + case 0x5a: /* FCVTNU */ + case 0x5b: /* FCVTMU */ + case 0x5c: /* FCVTAU */ + case 0x7a: /* FCVTPU */ + case 0x7b: /* FCVTZU */ + { + TCGv_i32 tcg_shift = tcg_const_i32(0); + gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus); + tcg_temp_free_i32(tcg_shift); + break; + } + case 0x18: /* FRINTN */ + case 0x19: /* FRINTM */ + case 0x38: /* FRINTP */ + case 0x39: /* FRINTZ */ + case 0x58: /* FRINTA */ + case 0x79: /* FRINTI */ + gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus); + break; + case 0x59: /* FRINTX */ + gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus); + break; default: g_assert_not_reached(); } @@ -6645,6 +7133,289 @@ static void handle_2misc_fcmp_zero(DisasContext *s, int opcode, tcg_temp_free_ptr(fpst); } +static void handle_2misc_reciprocal(DisasContext *s, int opcode, + bool is_scalar, bool is_u, bool is_q, + int size, int rn, int rd) +{ + bool is_double = (size == 3); + TCGv_ptr fpst = get_fpstatus_ptr(); + + if (is_double) { + TCGv_i64 tcg_op = tcg_temp_new_i64(); + TCGv_i64 tcg_res = tcg_temp_new_i64(); + int pass; + + for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { + read_vec_element(s, tcg_op, rn, pass, MO_64); + switch (opcode) { + case 0x3d: /* FRECPE */ + gen_helper_recpe_f64(tcg_res, tcg_op, fpst); + break; + case 0x3f: /* FRECPX */ + gen_helper_frecpx_f64(tcg_res, tcg_op, fpst); + break; + case 0x7d: /* FRSQRTE */ + gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst); + break; + default: + g_assert_not_reached(); + } + write_vec_element(s, tcg_res, rd, pass, MO_64); + } + if (is_scalar) { + clear_vec_high(s, rd); + } + + tcg_temp_free_i64(tcg_res); + tcg_temp_free_i64(tcg_op); + } else { + TCGv_i32 tcg_op = tcg_temp_new_i32(); + TCGv_i32 tcg_res = tcg_temp_new_i32(); + int pass, maxpasses; + + if (is_scalar) { + maxpasses = 1; + } else { + maxpasses = is_q ? 4 : 2; + } + + for (pass = 0; pass < maxpasses; pass++) { + read_vec_element_i32(s, tcg_op, rn, pass, MO_32); + + switch (opcode) { + case 0x3c: /* URECPE */ + gen_helper_recpe_u32(tcg_res, tcg_op, fpst); + break; + case 0x3d: /* FRECPE */ + gen_helper_recpe_f32(tcg_res, tcg_op, fpst); + break; + case 0x3f: /* FRECPX */ + gen_helper_frecpx_f32(tcg_res, tcg_op, fpst); + break; + case 0x7d: /* FRSQRTE */ + gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst); + break; + default: + g_assert_not_reached(); + } + + if (is_scalar) { + write_fp_sreg(s, rd, tcg_res); + } else { + write_vec_element_i32(s, tcg_res, rd, pass, MO_32); + } + } + tcg_temp_free_i32(tcg_res); + tcg_temp_free_i32(tcg_op); + if (!is_q && !is_scalar) { + clear_vec_high(s, rd); + } + } + tcg_temp_free_ptr(fpst); +} + +static void handle_2misc_narrow(DisasContext *s, bool scalar, + int opcode, bool u, bool is_q, + int size, int rn, int rd) +{ + /* Handle 2-reg-misc ops which are narrowing (so each 2*size element + * in the source becomes a size element in the destination). + */ + int pass; + TCGv_i32 tcg_res[2]; + int destelt = is_q ? 2 : 0; + int passes = scalar ? 1 : 2; + + if (scalar) { + tcg_res[1] = tcg_const_i32(0); + } + + for (pass = 0; pass < passes; pass++) { + TCGv_i64 tcg_op = tcg_temp_new_i64(); + NeonGenNarrowFn *genfn = NULL; + NeonGenNarrowEnvFn *genenvfn = NULL; + + if (scalar) { + read_vec_element(s, tcg_op, rn, pass, size + 1); + } else { + read_vec_element(s, tcg_op, rn, pass, MO_64); + } + tcg_res[pass] = tcg_temp_new_i32(); + + switch (opcode) { + case 0x12: /* XTN, SQXTUN */ + { + static NeonGenNarrowFn * const xtnfns[3] = { + gen_helper_neon_narrow_u8, + gen_helper_neon_narrow_u16, + tcg_gen_trunc_i64_i32, + }; + static NeonGenNarrowEnvFn * const sqxtunfns[3] = { + gen_helper_neon_unarrow_sat8, + gen_helper_neon_unarrow_sat16, + gen_helper_neon_unarrow_sat32, + }; + if (u) { + genenvfn = sqxtunfns[size]; + } else { + genfn = xtnfns[size]; + } + break; + } + case 0x14: /* SQXTN, UQXTN */ + { + static NeonGenNarrowEnvFn * const fns[3][2] = { + { gen_helper_neon_narrow_sat_s8, + gen_helper_neon_narrow_sat_u8 }, + { gen_helper_neon_narrow_sat_s16, + gen_helper_neon_narrow_sat_u16 }, + { gen_helper_neon_narrow_sat_s32, + gen_helper_neon_narrow_sat_u32 }, + }; + genenvfn = fns[size][u]; + break; + } + case 0x16: /* FCVTN, FCVTN2 */ + /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */ + if (size == 2) { + gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env); + } else { + TCGv_i32 tcg_lo = tcg_temp_new_i32(); + TCGv_i32 tcg_hi = tcg_temp_new_i32(); + tcg_gen_trunc_i64_i32(tcg_lo, tcg_op); + gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, cpu_env); + tcg_gen_shri_i64(tcg_op, tcg_op, 32); + tcg_gen_trunc_i64_i32(tcg_hi, tcg_op); + gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, cpu_env); + tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16); + tcg_temp_free_i32(tcg_lo); + tcg_temp_free_i32(tcg_hi); + } + break; + case 0x56: /* FCVTXN, FCVTXN2 */ + /* 64 bit to 32 bit float conversion + * with von Neumann rounding (round to odd) + */ + assert(size == 2); + gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env); + break; + default: + g_assert_not_reached(); + } + + if (genfn) { + genfn(tcg_res[pass], tcg_op); + } else if (genenvfn) { + genenvfn(tcg_res[pass], cpu_env, tcg_op); + } + + tcg_temp_free_i64(tcg_op); + } + + for (pass = 0; pass < 2; pass++) { + write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32); + tcg_temp_free_i32(tcg_res[pass]); + } + if (!is_q) { + clear_vec_high(s, rd); + } +} + +/* Remaining saturating accumulating ops */ +static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u, + bool is_q, int size, int rn, int rd) +{ + bool is_double = (size == 3); + + if (is_double) { + TCGv_i64 tcg_rn = tcg_temp_new_i64(); + TCGv_i64 tcg_rd = tcg_temp_new_i64(); + int pass; + + for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { + read_vec_element(s, tcg_rn, rn, pass, MO_64); + read_vec_element(s, tcg_rd, rd, pass, MO_64); + + if (is_u) { /* USQADD */ + gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd); + } else { /* SUQADD */ + gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd); + } + write_vec_element(s, tcg_rd, rd, pass, MO_64); + } + if (is_scalar) { + clear_vec_high(s, rd); + } + + tcg_temp_free_i64(tcg_rd); + tcg_temp_free_i64(tcg_rn); + } else { + TCGv_i32 tcg_rn = tcg_temp_new_i32(); + TCGv_i32 tcg_rd = tcg_temp_new_i32(); + int pass, maxpasses; + + if (is_scalar) { + maxpasses = 1; + } else { + maxpasses = is_q ? 4 : 2; + } + + for (pass = 0; pass < maxpasses; pass++) { + if (is_scalar) { + read_vec_element_i32(s, tcg_rn, rn, pass, size); + read_vec_element_i32(s, tcg_rd, rd, pass, size); + } else { + read_vec_element_i32(s, tcg_rn, rn, pass, MO_32); + read_vec_element_i32(s, tcg_rd, rd, pass, MO_32); + } + + if (is_u) { /* USQADD */ + switch (size) { + case 0: + gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd); + break; + case 1: + gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd); + break; + case 2: + gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd); + break; + default: + g_assert_not_reached(); + } + } else { /* SUQADD */ + switch (size) { + case 0: + gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd); + break; + case 1: + gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd); + break; + case 2: + gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd); + break; + default: + g_assert_not_reached(); + } + } + + if (is_scalar) { + TCGv_i64 tcg_zero = tcg_const_i64(0); + write_vec_element(s, tcg_zero, rd, 0, MO_64); + tcg_temp_free_i64(tcg_zero); + } + write_vec_element_i32(s, tcg_rd, rd, pass, MO_32); + } + + if (!is_q) { + clear_vec_high(s, rd); + } + + tcg_temp_free_i32(tcg_rd); + tcg_temp_free_i32(tcg_rn); + } +} + /* C3.6.12 AdvSIMD scalar two reg misc * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 * +-----+---+-----------+------+-----------+--------+-----+------+------+ @@ -6658,8 +7429,17 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) int opcode = extract32(insn, 12, 5); int size = extract32(insn, 22, 2); bool u = extract32(insn, 29, 1); + bool is_fcvt = false; + int rmode; + TCGv_i32 tcg_rmode; + TCGv_ptr tcg_fpstatus; switch (opcode) { + case 0x3: /* USQADD / SUQADD*/ + handle_2misc_satacc(s, true, u, false, size, rn, rd); + return; + case 0x7: /* SQABS / SQNEG */ + break; case 0xa: /* CMLT */ if (u) { unallocated_encoding(s); @@ -6674,6 +7454,19 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) return; } break; + case 0x12: /* SQXTUN */ + if (u) { + unallocated_encoding(s); + return; + } + /* fall through */ + case 0x14: /* SQXTN, UQXTN */ + if (size == 3) { + unallocated_encoding(s); + return; + } + handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd); + return; case 0xc ... 0xf: case 0x16 ... 0x1d: case 0x1f: @@ -6690,23 +7483,41 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) case 0x6d: /* FCMLE (zero) */ handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd); return; + case 0x1d: /* SCVTF */ + case 0x5d: /* UCVTF */ + { + bool is_signed = (opcode == 0x1d); + handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size); + return; + } + case 0x3d: /* FRECPE */ + case 0x3f: /* FRECPX */ + case 0x7d: /* FRSQRTE */ + handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd); + return; case 0x1a: /* FCVTNS */ case 0x1b: /* FCVTMS */ - case 0x1c: /* FCVTAS */ - case 0x1d: /* SCVTF */ case 0x3a: /* FCVTPS */ case 0x3b: /* FCVTZS */ - case 0x3d: /* FRECPE */ - case 0x3f: /* FRECPX */ - case 0x56: /* FCVTXN, FCVTXN2 */ case 0x5a: /* FCVTNU */ case 0x5b: /* FCVTMU */ - case 0x5c: /* FCVTAU */ - case 0x5d: /* UCVTF */ case 0x7a: /* FCVTPU */ case 0x7b: /* FCVTZU */ - case 0x7d: /* FRSQRTE */ - unsupported_encoding(s, insn); + is_fcvt = true; + rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); + break; + case 0x1c: /* FCVTAS */ + case 0x5c: /* FCVTAU */ + /* TIEAWAY doesn't fit in the usual rounding mode encoding */ + is_fcvt = true; + rmode = FPROUNDING_TIEAWAY; + break; + case 0x56: /* FCVTXN, FCVTXN2 */ + if (size == 2) { + unallocated_encoding(s); + return; + } + handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd); return; default: unallocated_encoding(s); @@ -6714,26 +7525,81 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) } break; default: - /* Other categories of encoding in this class: - * + SUQADD/USQADD/SQABS/SQNEG : size 8, 16, 32 or 64 - * + SQXTN/SQXTN2/SQXTUN/SQXTUN2/UQXTN/UQXTN2: - * narrowing saturate ops: size 64/32/16 -> 32/16/8 - */ - unsupported_encoding(s, insn); + unallocated_encoding(s); return; } + if (is_fcvt) { + tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + tcg_fpstatus = get_fpstatus_ptr(); + } else { + TCGV_UNUSED_I32(tcg_rmode); + TCGV_UNUSED_PTR(tcg_fpstatus); + } + if (size == 3) { TCGv_i64 tcg_rn = read_fp_dreg(s, rn); TCGv_i64 tcg_rd = tcg_temp_new_i64(); - handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn); + handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus); write_fp_dreg(s, rd, tcg_rd); tcg_temp_free_i64(tcg_rd); tcg_temp_free_i64(tcg_rn); } else { - /* the 'size might not be 64' ops aren't implemented yet */ - g_assert_not_reached(); + TCGv_i32 tcg_rn = tcg_temp_new_i32(); + TCGv_i32 tcg_rd = tcg_temp_new_i32(); + + read_vec_element_i32(s, tcg_rn, rn, 0, size); + + switch (opcode) { + case 0x7: /* SQABS, SQNEG */ + { + NeonGenOneOpEnvFn *genfn; + static NeonGenOneOpEnvFn * const fns[3][2] = { + { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, + { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, + { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 }, + }; + genfn = fns[size][u]; + genfn(tcg_rd, cpu_env, tcg_rn); + break; + } + case 0x1a: /* FCVTNS */ + case 0x1b: /* FCVTMS */ + case 0x1c: /* FCVTAS */ + case 0x3a: /* FCVTPS */ + case 0x3b: /* FCVTZS */ + { + TCGv_i32 tcg_shift = tcg_const_i32(0); + gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus); + tcg_temp_free_i32(tcg_shift); + break; + } + case 0x5a: /* FCVTNU */ + case 0x5b: /* FCVTMU */ + case 0x5c: /* FCVTAU */ + case 0x7a: /* FCVTPU */ + case 0x7b: /* FCVTZU */ + { + TCGv_i32 tcg_shift = tcg_const_i32(0); + gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus); + tcg_temp_free_i32(tcg_shift); + break; + } + default: + g_assert_not_reached(); + } + + write_fp_sreg(s, rd, tcg_rd); + tcg_temp_free_i32(tcg_rd); + tcg_temp_free_i32(tcg_rn); + } + + if (is_fcvt) { + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + tcg_temp_free_i32(tcg_rmode); + tcg_temp_free_ptr(tcg_fpstatus); } } @@ -6746,6 +7612,7 @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u, int shift = 2 * (8 << size) - immhb; bool accumulate = false; bool round = false; + bool insert = false; int dsize = is_q ? 128 : 64; int esize = 8 << size; int elements = dsize/esize; @@ -6775,6 +7642,9 @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u, case 0x06: /* SRSRA / URSRA (accum + rounding) */ accumulate = round = true; break; + case 0x08: /* SRI */ + insert = true; + break; } if (round) { @@ -6786,12 +7656,16 @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u, for (i = 0; i < elements; i++) { read_vec_element(s, tcg_rn, rn, i, memop); - if (accumulate) { + if (accumulate || insert) { read_vec_element(s, tcg_rd, rd, i, memop); } - handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, - accumulate, is_u, size, shift); + if (insert) { + handle_shri_with_ins(tcg_rd, tcg_rn, size, shift); + } else { + handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, + accumulate, is_u, size, shift); + } write_vec_element(s, tcg_rd, rd, i, size); } @@ -6878,6 +7752,62 @@ static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u, } } +/* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */ +static void handle_vec_simd_shrn(DisasContext *s, bool is_q, + int immh, int immb, int opcode, int rn, int rd) +{ + int immhb = immh << 3 | immb; + int size = 32 - clz32(immh) - 1; + int dsize = 64; + int esize = 8 << size; + int elements = dsize/esize; + int shift = (2 * esize) - immhb; + bool round = extract32(opcode, 0, 1); + TCGv_i64 tcg_rn, tcg_rd, tcg_final; + TCGv_i64 tcg_round; + int i; + + if (extract32(immh, 3, 1)) { + unallocated_encoding(s); + return; + } + + tcg_rn = tcg_temp_new_i64(); + tcg_rd = tcg_temp_new_i64(); + tcg_final = tcg_temp_new_i64(); + read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64); + + if (round) { + uint64_t round_const = 1ULL << (shift - 1); + tcg_round = tcg_const_i64(round_const); + } else { + TCGV_UNUSED_I64(tcg_round); + } + + for (i = 0; i < elements; i++) { + read_vec_element(s, tcg_rn, rn, i, size+1); + handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, + false, true, size+1, shift); + + tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize); + } + + if (!is_q) { + clear_vec_high(s, rd); + write_vec_element(s, tcg_final, rd, 0, MO_64); + } else { + write_vec_element(s, tcg_final, rd, 1, MO_64); + } + + if (round) { + tcg_temp_free_i64(tcg_round); + } + tcg_temp_free_i64(tcg_rn); + tcg_temp_free_i64(tcg_rd); + tcg_temp_free_i64(tcg_final); + return; +} + /* C3.6.14 AdvSIMD shift by immediate * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 @@ -6896,6 +7826,12 @@ static void disas_simd_shift_imm(DisasContext *s, uint32_t insn) bool is_q = extract32(insn, 30, 1); switch (opcode) { + case 0x08: /* SRI */ + if (!is_u) { + unallocated_encoding(s); + return; + } + /* fall through */ case 0x00: /* SSHR / USHR */ case 0x02: /* SSRA / USRA (accumulate) */ case 0x04: /* SRSHR / URSHR (rounding) */ @@ -6905,15 +7841,42 @@ static void disas_simd_shift_imm(DisasContext *s, uint32_t insn) case 0x0a: /* SHL / SLI */ handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd); break; + case 0x10: /* SHRN */ + case 0x11: /* RSHRN / SQRSHRUN */ + if (is_u) { + handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb, + opcode, rn, rd); + } else { + handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd); + } + break; + case 0x12: /* SQSHRN / UQSHRN */ + case 0x13: /* SQRSHRN / UQRSHRN */ + handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb, + opcode, rn, rd); + break; case 0x14: /* SSHLL / USHLL */ handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd); break; + case 0x1c: /* SCVTF / UCVTF */ + handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb, + opcode, rn, rd); + break; + case 0xc: /* SQSHLU */ + if (!is_u) { + unallocated_encoding(s); + return; + } + handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd); + break; + case 0xe: /* SQSHL, UQSHL */ + handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd); + break; + case 0x1f: /* FCVTZS/ FCVTZU */ + handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd); + return; default: - /* We don't currently implement any of the Narrow or saturating shifts; - * nor do we implement the fixed-point conversions in this - * encoding group (SCVTF, FCVTZS, UCVTF, FCVTZU). - */ - unsupported_encoding(s, insn); + unallocated_encoding(s); return; } } @@ -7124,6 +8087,10 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size, gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env, tcg_passres, tcg_passres); break; + case 14: /* PMULL */ + assert(size == 0); + gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2); + break; default: g_assert_not_reached(); } @@ -7243,6 +8210,30 @@ static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size, } } +static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm) +{ + /* PMULL of 64 x 64 -> 128 is an odd special case because it + * is the only three-reg-diff instruction which produces a + * 128-bit wide result from a single operation. However since + * it's possible to calculate the two halves more or less + * separately we just use two helper calls. + */ + TCGv_i64 tcg_op1 = tcg_temp_new_i64(); + TCGv_i64 tcg_op2 = tcg_temp_new_i64(); + TCGv_i64 tcg_res = tcg_temp_new_i64(); + + read_vec_element(s, tcg_op1, rn, is_q, MO_64); + read_vec_element(s, tcg_op2, rm, is_q, MO_64); + gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2); + write_vec_element(s, tcg_res, rd, 0, MO_64); + gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2); + write_vec_element(s, tcg_res, rd, 1, MO_64); + + tcg_temp_free_i64(tcg_op1); + tcg_temp_free_i64(tcg_op2); + tcg_temp_free_i64(tcg_res); +} + /* C3.6.15 AdvSIMD three different * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ @@ -7293,8 +8284,15 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) unallocated_encoding(s); return; } - unsupported_encoding(s, insn); - break; + if (size == 3) { + if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)) { + unallocated_encoding(s); + return; + } + handle_pmull_64(s, is_q, rd, rn, rm); + return; + } + goto is_widening; case 9: /* SQDMLAL, SQDMLAL2 */ case 11: /* SQDMLSL, SQDMLSL2 */ case 13: /* SQDMULL, SQDMULL2 */ @@ -7315,6 +8313,7 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) unallocated_encoding(s); return; } + is_widening: handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm); break; default: @@ -7991,76 +8990,48 @@ static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn) } } -static void handle_2misc_narrow(DisasContext *s, int opcode, bool u, bool is_q, - int size, int rn, int rd) +static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q, + int size, int rn, int rd) { - /* Handle 2-reg-misc ops which are narrowing (so each 2*size element - * in the source becomes a size element in the destination). + /* Handle 2-reg-misc ops which are widening (so each size element + * in the source becomes a 2*size element in the destination. + * The only instruction like this is FCVTL. */ int pass; - TCGv_i32 tcg_res[2]; - int destelt = is_q ? 2 : 0; - for (pass = 0; pass < 2; pass++) { - TCGv_i64 tcg_op = tcg_temp_new_i64(); - NeonGenNarrowFn *genfn = NULL; - NeonGenNarrowEnvFn *genenvfn = NULL; + if (size == 3) { + /* 32 -> 64 bit fp conversion */ + TCGv_i64 tcg_res[2]; + int srcelt = is_q ? 2 : 0; - read_vec_element(s, tcg_op, rn, pass, MO_64); - tcg_res[pass] = tcg_temp_new_i32(); + for (pass = 0; pass < 2; pass++) { + TCGv_i32 tcg_op = tcg_temp_new_i32(); + tcg_res[pass] = tcg_temp_new_i64(); - switch (opcode) { - case 0x12: /* XTN, SQXTUN */ - { - static NeonGenNarrowFn * const xtnfns[3] = { - gen_helper_neon_narrow_u8, - gen_helper_neon_narrow_u16, - tcg_gen_trunc_i64_i32, - }; - static NeonGenNarrowEnvFn * const sqxtunfns[3] = { - gen_helper_neon_unarrow_sat8, - gen_helper_neon_unarrow_sat16, - gen_helper_neon_unarrow_sat32, - }; - if (u) { - genenvfn = sqxtunfns[size]; - } else { - genfn = xtnfns[size]; - } - break; - } - case 0x14: /* SQXTN, UQXTN */ - { - static NeonGenNarrowEnvFn * const fns[3][2] = { - { gen_helper_neon_narrow_sat_s8, - gen_helper_neon_narrow_sat_u8 }, - { gen_helper_neon_narrow_sat_s16, - gen_helper_neon_narrow_sat_u16 }, - { gen_helper_neon_narrow_sat_s32, - gen_helper_neon_narrow_sat_u32 }, - }; - genenvfn = fns[size][u]; - break; - } - default: - g_assert_not_reached(); + read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32); + gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env); + tcg_temp_free_i32(tcg_op); } - - if (genfn) { - genfn(tcg_res[pass], tcg_op); - } else { - genenvfn(tcg_res[pass], cpu_env, tcg_op); + for (pass = 0; pass < 2; pass++) { + write_vec_element(s, tcg_res[pass], rd, pass, MO_64); + tcg_temp_free_i64(tcg_res[pass]); } + } else { + /* 16 -> 32 bit fp conversion */ + int srcelt = is_q ? 4 : 0; + TCGv_i32 tcg_res[4]; - tcg_temp_free_i64(tcg_op); - } + for (pass = 0; pass < 4; pass++) { + tcg_res[pass] = tcg_temp_new_i32(); - for (pass = 0; pass < 2; pass++) { - write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32); - tcg_temp_free_i32(tcg_res[pass]); - } - if (!is_q) { - clear_vec_high(s, rd); + read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16); + gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass], + cpu_env); + } + for (pass = 0; pass < 4; pass++) { + write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32); + tcg_temp_free_i32(tcg_res[pass]); + } } } @@ -8133,6 +9104,108 @@ static void handle_rev(DisasContext *s, int opcode, bool u, } } +static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u, + bool is_q, int size, int rn, int rd) +{ + /* Implement the pairwise operations from 2-misc: + * SADDLP, UADDLP, SADALP, UADALP. + * These all add pairs of elements in the input to produce a + * double-width result element in the output (possibly accumulating). + */ + bool accum = (opcode == 0x6); + int maxpass = is_q ? 2 : 1; + int pass; + TCGv_i64 tcg_res[2]; + + if (size == 2) { + /* 32 + 32 -> 64 op */ + TCGMemOp memop = size + (u ? 0 : MO_SIGN); + + for (pass = 0; pass < maxpass; pass++) { + TCGv_i64 tcg_op1 = tcg_temp_new_i64(); + TCGv_i64 tcg_op2 = tcg_temp_new_i64(); + + tcg_res[pass] = tcg_temp_new_i64(); + + read_vec_element(s, tcg_op1, rn, pass * 2, memop); + read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop); + tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2); + if (accum) { + read_vec_element(s, tcg_op1, rd, pass, MO_64); + tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1); + } + + tcg_temp_free_i64(tcg_op1); + tcg_temp_free_i64(tcg_op2); + } + } else { + for (pass = 0; pass < maxpass; pass++) { + TCGv_i64 tcg_op = tcg_temp_new_i64(); + NeonGenOneOpFn *genfn; + static NeonGenOneOpFn * const fns[2][2] = { + { gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8 }, + { gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16 }, + }; + + genfn = fns[size][u]; + + tcg_res[pass] = tcg_temp_new_i64(); + + read_vec_element(s, tcg_op, rn, pass, MO_64); + genfn(tcg_res[pass], tcg_op); + + if (accum) { + read_vec_element(s, tcg_op, rd, pass, MO_64); + if (size == 0) { + gen_helper_neon_addl_u16(tcg_res[pass], + tcg_res[pass], tcg_op); + } else { + gen_helper_neon_addl_u32(tcg_res[pass], + tcg_res[pass], tcg_op); + } + } + tcg_temp_free_i64(tcg_op); + } + } + if (!is_q) { + tcg_res[1] = tcg_const_i64(0); + } + for (pass = 0; pass < 2; pass++) { + write_vec_element(s, tcg_res[pass], rd, pass, MO_64); + tcg_temp_free_i64(tcg_res[pass]); + } +} + +static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd) +{ + /* Implement SHLL and SHLL2 */ + int pass; + int part = is_q ? 2 : 0; + TCGv_i64 tcg_res[2]; + + for (pass = 0; pass < 2; pass++) { + static NeonGenWidenFn * const widenfns[3] = { + gen_helper_neon_widen_u8, + gen_helper_neon_widen_u16, + tcg_gen_extu_i32_i64, + }; + NeonGenWidenFn *widenfn = widenfns[size]; + TCGv_i32 tcg_op = tcg_temp_new_i32(); + + read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32); + tcg_res[pass] = tcg_temp_new_i64(); + widenfn(tcg_res[pass], tcg_op); + tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size); + + tcg_temp_free_i32(tcg_op); + } + + for (pass = 0; pass < 2; pass++) { + write_vec_element(s, tcg_res[pass], rd, pass, MO_64); + tcg_temp_free_i64(tcg_res[pass]); + } +} + /* C3.6.17 AdvSIMD two reg misc * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ @@ -8147,6 +9220,11 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) bool is_q = extract32(insn, 30, 1); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); + bool need_fpstatus = false; + bool need_rmode = false; + int rmode = -1; + TCGv_i32 tcg_rmode; + TCGv_ptr tcg_fpstatus; switch (opcode) { case 0x0: /* REV64, REV32 */ @@ -8173,23 +9251,28 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) unallocated_encoding(s); return; } - handle_2misc_narrow(s, opcode, u, is_q, size, rn, rd); + handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd); return; - case 0x2: /* SADDLP, UADDLP */ case 0x4: /* CLS, CLZ */ + if (size == 3) { + unallocated_encoding(s); + return; + } + break; + case 0x2: /* SADDLP, UADDLP */ case 0x6: /* SADALP, UADALP */ if (size == 3) { unallocated_encoding(s); return; } - unsupported_encoding(s, insn); + handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd); return; case 0x13: /* SHLL, SHLL2 */ if (u == 0 || size == 3) { unallocated_encoding(s); return; } - unsupported_encoding(s, insn); + handle_shll(s, is_q, size, rn, rd); return; case 0xa: /* CMLT */ if (u == 1) { @@ -8206,13 +9289,18 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) } break; case 0x3: /* SUQADD, USQADD */ - case 0x7: /* SQABS, SQNEG */ if (size == 3 && !is_q) { unallocated_encoding(s); return; } - unsupported_encoding(s, insn); + handle_2misc_satacc(s, false, u, is_q, size, rn, rd); return; + case 0x7: /* SQABS, SQNEG */ + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } + break; case 0xc ... 0xf: case 0x16 ... 0x1d: case 0x1f: @@ -8220,8 +9308,9 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) /* Floating point: U, size[1] and opcode indicate operation; * size[0] indicates single or double precision. */ + int is_double = extract32(size, 0, 1); opcode |= (extract32(size, 1, 1) << 5) | (u << 6); - size = extract32(size, 0, 1) ? 3 : 2; + size = is_double ? 3 : 2; switch (opcode) { case 0x2f: /* FABS */ case 0x6f: /* FNEG */ @@ -8230,6 +9319,18 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) return; } break; + case 0x1d: /* SCVTF */ + case 0x5d: /* UCVTF */ + { + bool is_signed = (opcode == 0x1d) ? true : false; + int elements = is_double ? 2 : is_q ? 4 : 2; + if (is_double && !is_q) { + unallocated_encoding(s); + return; + } + handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size); + return; + } case 0x2c: /* FCMGT (zero) */ case 0x2d: /* FCMEQ (zero) */ case 0x2e: /* FCMLT (zero) */ @@ -8241,35 +9342,98 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) } handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd); return; - case 0x16: /* FCVTN, FCVTN2 */ - case 0x17: /* FCVTL, FCVTL2 */ - case 0x18: /* FRINTN */ - case 0x19: /* FRINTM */ + case 0x7f: /* FSQRT */ + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } + break; case 0x1a: /* FCVTNS */ case 0x1b: /* FCVTMS */ - case 0x1c: /* FCVTAS */ - case 0x1d: /* SCVTF */ - case 0x38: /* FRINTP */ - case 0x39: /* FRINTZ */ case 0x3a: /* FCVTPS */ case 0x3b: /* FCVTZS */ - case 0x3c: /* URECPE */ - case 0x3d: /* FRECPE */ - case 0x56: /* FCVTXN, FCVTXN2 */ - case 0x58: /* FRINTA */ - case 0x59: /* FRINTX */ case 0x5a: /* FCVTNU */ case 0x5b: /* FCVTMU */ - case 0x5c: /* FCVTAU */ - case 0x5d: /* UCVTF */ - case 0x79: /* FRINTI */ case 0x7a: /* FCVTPU */ case 0x7b: /* FCVTZU */ - case 0x7c: /* URSQRTE */ + need_fpstatus = true; + need_rmode = true; + rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } + break; + case 0x5c: /* FCVTAU */ + case 0x1c: /* FCVTAS */ + need_fpstatus = true; + need_rmode = true; + rmode = FPROUNDING_TIEAWAY; + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } + break; + case 0x3c: /* URECPE */ + if (size == 3) { + unallocated_encoding(s); + return; + } + /* fall through */ + case 0x3d: /* FRECPE */ case 0x7d: /* FRSQRTE */ - case 0x7f: /* FSQRT */ - unsupported_encoding(s, insn); + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } + handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd); return; + case 0x56: /* FCVTXN, FCVTXN2 */ + if (size == 2) { + unallocated_encoding(s); + return; + } + /* fall through */ + case 0x16: /* FCVTN, FCVTN2 */ + /* handle_2misc_narrow does a 2*size -> size operation, but these + * instructions encode the source size rather than dest size. + */ + handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd); + return; + case 0x17: /* FCVTL, FCVTL2 */ + handle_2misc_widening(s, opcode, is_q, size, rn, rd); + return; + case 0x18: /* FRINTN */ + case 0x19: /* FRINTM */ + case 0x38: /* FRINTP */ + case 0x39: /* FRINTZ */ + need_rmode = true; + rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); + /* fall through */ + case 0x59: /* FRINTX */ + case 0x79: /* FRINTI */ + need_fpstatus = true; + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } + break; + case 0x58: /* FRINTA */ + need_rmode = true; + rmode = FPROUNDING_TIEAWAY; + need_fpstatus = true; + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } + break; + case 0x7c: /* URSQRTE */ + if (size == 3) { + unallocated_encoding(s); + return; + } + need_fpstatus = true; + break; default: unallocated_encoding(s); return; @@ -8281,6 +9445,18 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) return; } + if (need_fpstatus) { + tcg_fpstatus = get_fpstatus_ptr(); + } else { + TCGV_UNUSED_PTR(tcg_fpstatus); + } + if (need_rmode) { + tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + } else { + TCGV_UNUSED_I32(tcg_rmode); + } + if (size == 3) { /* All 64-bit element operations can be shared with scalar 2misc */ int pass; @@ -8291,7 +9467,8 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) read_vec_element(s, tcg_op, rn, pass, MO_64); - handle_2misc_64(s, opcode, u, tcg_res, tcg_op); + handle_2misc_64(s, opcode, u, tcg_res, tcg_op, + tcg_rmode, tcg_fpstatus); write_vec_element(s, tcg_res, rd, pass, MO_64); @@ -8327,6 +9504,20 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) case 0x9: /* CMEQ, CMLE */ cond = u ? TCG_COND_LE : TCG_COND_EQ; goto do_cmop; + case 0x4: /* CLS */ + if (u) { + gen_helper_clz32(tcg_res, tcg_op); + } else { + gen_helper_cls32(tcg_res, tcg_op); + } + break; + case 0x7: /* SQABS, SQNEG */ + if (u) { + gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op); + } else { + gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op); + } + break; case 0xb: /* ABS, NEG */ if (u) { tcg_gen_neg_i32(tcg_res, tcg_op); @@ -8344,6 +9535,47 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) case 0x6f: /* FNEG */ gen_helper_vfp_negs(tcg_res, tcg_op); break; + case 0x7f: /* FSQRT */ + gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env); + break; + case 0x1a: /* FCVTNS */ + case 0x1b: /* FCVTMS */ + case 0x1c: /* FCVTAS */ + case 0x3a: /* FCVTPS */ + case 0x3b: /* FCVTZS */ + { + TCGv_i32 tcg_shift = tcg_const_i32(0); + gen_helper_vfp_tosls(tcg_res, tcg_op, + tcg_shift, tcg_fpstatus); + tcg_temp_free_i32(tcg_shift); + break; + } + case 0x5a: /* FCVTNU */ + case 0x5b: /* FCVTMU */ + case 0x5c: /* FCVTAU */ + case 0x7a: /* FCVTPU */ + case 0x7b: /* FCVTZU */ + { + TCGv_i32 tcg_shift = tcg_const_i32(0); + gen_helper_vfp_touls(tcg_res, tcg_op, + tcg_shift, tcg_fpstatus); + tcg_temp_free_i32(tcg_shift); + break; + } + case 0x18: /* FRINTN */ + case 0x19: /* FRINTM */ + case 0x38: /* FRINTP */ + case 0x39: /* FRINTZ */ + case 0x58: /* FRINTA */ + case 0x79: /* FRINTI */ + gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus); + break; + case 0x59: /* FRINTX */ + gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus); + break; + case 0x7c: /* URSQRTE */ + gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus); + break; default: g_assert_not_reached(); } @@ -8360,6 +9592,17 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) gen_helper_neon_cnt_u8(tcg_res, tcg_op); } break; + case 0x7: /* SQABS, SQNEG */ + { + NeonGenOneOpEnvFn *genfn; + static NeonGenOneOpEnvFn * const fns[2][2] = { + { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, + { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, + }; + genfn = fns[size][u]; + genfn(tcg_res, cpu_env, tcg_op); + break; + } case 0x8: /* CMGT, CMGE */ case 0x9: /* CMEQ, CMLE */ case 0xa: /* CMLT */ @@ -8407,6 +9650,21 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) } } break; + case 0x4: /* CLS, CLZ */ + if (u) { + if (size == 0) { + gen_helper_neon_clz_u8(tcg_res, tcg_op); + } else { + gen_helper_neon_clz_u16(tcg_res, tcg_op); + } + } else { + if (size == 0) { + gen_helper_neon_cls_s8(tcg_res, tcg_op); + } else { + gen_helper_neon_cls_s16(tcg_res, tcg_op); + } + } + break; default: g_assert_not_reached(); } @@ -8421,6 +9679,14 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) if (!is_q) { clear_vec_high(s, rd); } + + if (need_rmode) { + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + tcg_temp_free_i32(tcg_rmode); + } + if (need_fpstatus) { + tcg_temp_free_ptr(tcg_fpstatus); + } } /* C3.6.13 AdvSIMD scalar x indexed element @@ -9045,6 +10311,7 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu, dc->vec_stride = 0; dc->cp_regs = cpu->cp_regs; dc->current_pl = arm_current_pl(env); + dc->features = env->features; init_tmp_a64_array(dc); @@ -9061,8 +10328,8 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu, tcg_clear_temp_count(); do { - if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) { - QTAILQ_FOREACH(bp, &env->breakpoints, entry) { + if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) { + QTAILQ_FOREACH(bp, &cs->breakpoints, entry) { if (bp->pc == dc->pc) { gen_exception_insn(dc, 0, EXCP_DEBUG); /* Advance PC so that clearing the breakpoint will diff --git a/target-arm/translate.c b/target-arm/translate.c index 253d2a13eb..56e3b4bf7f 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -3939,6 +3939,9 @@ static void gen_nop_hint(DisasContext *s, int val) s->is_jmp = DISAS_WFI; break; case 2: /* wfe */ + gen_set_pc_im(s, s->pc); + s->is_jmp = DISAS_WFE; + break; case 4: /* sev */ case 5: /* sevl */ /* TODO: Implement SEV, SEVL and WFE. May help SMP performance. */ @@ -6679,17 +6682,33 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins break; } case NEON_2RM_VRECPE: - gen_helper_recpe_u32(tmp, tmp, cpu_env); + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + gen_helper_recpe_u32(tmp, tmp, fpstatus); + tcg_temp_free_ptr(fpstatus); break; + } case NEON_2RM_VRSQRTE: - gen_helper_rsqrte_u32(tmp, tmp, cpu_env); + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + gen_helper_rsqrte_u32(tmp, tmp, fpstatus); + tcg_temp_free_ptr(fpstatus); break; + } case NEON_2RM_VRECPE_F: - gen_helper_recpe_f32(cpu_F0s, cpu_F0s, cpu_env); + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + gen_helper_recpe_f32(cpu_F0s, cpu_F0s, fpstatus); + tcg_temp_free_ptr(fpstatus); break; + } case NEON_2RM_VRSQRTE_F: - gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, cpu_env); + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, fpstatus); + tcg_temp_free_ptr(fpstatus); break; + } case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */ gen_vfp_sito(0, 1); break; @@ -10651,6 +10670,7 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu, dc->vec_stride = ARM_TBFLAG_VECSTRIDE(tb->flags); dc->cp_regs = cpu->cp_regs; dc->current_pl = arm_current_pl(env); + dc->features = env->features; cpu_F0s = tcg_temp_new_i32(); cpu_F1s = tcg_temp_new_i32(); @@ -10730,8 +10750,8 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu, } #endif - if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) { - QTAILQ_FOREACH(bp, &env->breakpoints, entry) { + if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) { + QTAILQ_FOREACH(bp, &cs->breakpoints, entry) { if (bp->pc == dc->pc) { gen_exception_insn(dc, 0, EXCP_DEBUG); /* Advance PC so that clearing the breakpoint will @@ -10800,7 +10820,7 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu, if (dc->condjmp) { /* FIXME: This can theoretically happen with self-modifying code. */ - cpu_abort(env, "IO on conditional branch instruction"); + cpu_abort(cs, "IO on conditional branch instruction"); } gen_io_end(); } @@ -10857,6 +10877,9 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu, case DISAS_WFI: gen_helper_wfi(cpu_env); break; + case DISAS_WFE: + gen_helper_wfe(cpu_env); + break; case DISAS_SWI: gen_exception(EXCP_SWI); break; diff --git a/target-arm/translate.h b/target-arm/translate.h index 67da6996c9..3525ffcecb 100644 --- a/target-arm/translate.h +++ b/target-arm/translate.h @@ -26,6 +26,7 @@ typedef struct DisasContext { int aarch64; int current_pl; GHashTable *cp_regs; + uint64_t features; /* CPU features bits */ #define TMP_A64_MAX 16 int tmp_a64_count; TCGv_i64 tmp_a64[TMP_A64_MAX]; @@ -33,6 +34,11 @@ typedef struct DisasContext { extern TCGv_ptr cpu_env; +static inline int arm_dc_feature(DisasContext *dc, int feature) +{ + return (dc->features & (1ULL << feature)) != 0; +} + /* target-specific extra values for is_jmp */ /* These instructions trap after executing, so the A32/T32 decoder must * defer them until after the conditional execution state has been updated. @@ -44,6 +50,8 @@ extern TCGv_ptr cpu_env; * emitting unreachable code at the end of the TB in the A64 decoder */ #define DISAS_EXC 6 +/* WFE */ +#define DISAS_WFE 7 #ifdef TARGET_AARCH64 void a64_translate_init(void); |