diff options
Diffstat (limited to 'target/arm/kvm64.c')
| -rw-r--r-- | target/arm/kvm64.c | 323 |
1 files changed, 291 insertions, 32 deletions
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c index 28f6db57d5..876184b8fe 100644 --- a/target/arm/kvm64.c +++ b/target/arm/kvm64.c @@ -488,7 +488,9 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) * and then query that CPU for the relevant ID registers. */ int fdarray[3]; + bool sve_supported; uint64_t features = 0; + uint64_t t; int err; /* Old kernels may not know about the PREFERRED_TARGET ioctl: however @@ -502,7 +504,11 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) KVM_ARM_TARGET_CORTEX_A57, QEMU_KVM_ARM_TARGET_NONE }; - struct kvm_vcpu_init init; + /* + * target = -1 informs kvm_arm_create_scratch_host_vcpu() + * to use the preferred target + */ + struct kvm_vcpu_init init = { .target = -1, }; if (!kvm_arm_create_scratch_host_vcpu(cpus_to_try, fdarray, &init)) { return false; @@ -574,13 +580,23 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) ARM64_SYS_REG(3, 0, 0, 3, 2)); } + sve_supported = ioctl(fdarray[0], KVM_CHECK_EXTENSION, KVM_CAP_ARM_SVE) > 0; + kvm_arm_destroy_scratch_host_vcpu(fdarray); if (err < 0) { return false; } - /* We can assume any KVM supporting CPU is at least a v8 + /* Add feature bits that can't appear until after VCPU init. */ + if (sve_supported) { + t = ahcf->isar.id_aa64pfr0; + t = FIELD_DP64(t, ID_AA64PFR0, SVE, 1); + ahcf->isar.id_aa64pfr0 = t; + } + + /* + * We can assume any KVM supporting CPU is at least a v8 * with VFPv4+Neon; this in turn implies most of the other * feature bits. */ @@ -602,6 +618,107 @@ bool kvm_arm_aarch32_supported(CPUState *cpu) return kvm_check_extension(s, KVM_CAP_ARM_EL1_32BIT); } +bool kvm_arm_sve_supported(CPUState *cpu) +{ + KVMState *s = KVM_STATE(current_machine->accelerator); + + return kvm_check_extension(s, KVM_CAP_ARM_SVE); +} + +QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1); + +void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map) +{ + /* Only call this function if kvm_arm_sve_supported() returns true. */ + static uint64_t vls[KVM_ARM64_SVE_VLS_WORDS]; + static bool probed; + uint32_t vq = 0; + int i, j; + + bitmap_clear(map, 0, ARM_MAX_VQ); + + /* + * KVM ensures all host CPUs support the same set of vector lengths. + * So we only need to create the scratch VCPUs once and then cache + * the results. + */ + if (!probed) { + struct kvm_vcpu_init init = { + .target = -1, + .features[0] = (1 << KVM_ARM_VCPU_SVE), + }; + struct kvm_one_reg reg = { + .id = KVM_REG_ARM64_SVE_VLS, + .addr = (uint64_t)&vls[0], + }; + int fdarray[3], ret; + + probed = true; + + if (!kvm_arm_create_scratch_host_vcpu(NULL, fdarray, &init)) { + error_report("failed to create scratch VCPU with SVE enabled"); + abort(); + } + ret = ioctl(fdarray[2], KVM_GET_ONE_REG, ®); + kvm_arm_destroy_scratch_host_vcpu(fdarray); + if (ret) { + error_report("failed to get KVM_REG_ARM64_SVE_VLS: %s", + strerror(errno)); + abort(); + } + + for (i = KVM_ARM64_SVE_VLS_WORDS - 1; i >= 0; --i) { + if (vls[i]) { + vq = 64 - clz64(vls[i]) + i * 64; + break; + } + } + if (vq > ARM_MAX_VQ) { + warn_report("KVM supports vector lengths larger than " + "QEMU can enable"); + } + } + + for (i = 0; i < KVM_ARM64_SVE_VLS_WORDS; ++i) { + if (!vls[i]) { + continue; + } + for (j = 1; j <= 64; ++j) { + vq = j + i * 64; + if (vq > ARM_MAX_VQ) { + return; + } + if (vls[i] & (1UL << (j - 1))) { + set_bit(vq - 1, map); + } + } + } +} + +static int kvm_arm_sve_set_vls(CPUState *cs) +{ + uint64_t vls[KVM_ARM64_SVE_VLS_WORDS] = {0}; + struct kvm_one_reg reg = { + .id = KVM_REG_ARM64_SVE_VLS, + .addr = (uint64_t)&vls[0], + }; + ARMCPU *cpu = ARM_CPU(cs); + uint32_t vq; + int i, j; + + assert(cpu->sve_max_vq <= KVM_ARM64_SVE_VQ_MAX); + + for (vq = 1; vq <= cpu->sve_max_vq; ++vq) { + if (test_bit(vq - 1, cpu->sve_vq_map)) { + i = (vq - 1) / 64; + j = (vq - 1) % 64; + vls[i] |= 1UL << j; + } + } + + return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); +} + #define ARM_CPU_ID_MPIDR 3, 0, 0, 0, 5 int kvm_arch_init_vcpu(CPUState *cs) @@ -613,7 +730,7 @@ int kvm_arch_init_vcpu(CPUState *cs) if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE || !object_dynamic_cast(OBJECT(cpu), TYPE_AARCH64_CPU)) { - fprintf(stderr, "KVM is not supported for this guest CPU type\n"); + error_report("KVM is not supported for this guest CPU type"); return -EINVAL; } @@ -630,13 +747,17 @@ int kvm_arch_init_vcpu(CPUState *cs) cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_EL1_32BIT; } if (!kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PMU_V3)) { - cpu->has_pmu = false; + cpu->has_pmu = false; } if (cpu->has_pmu) { cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PMU_V3; } else { unset_feature(&env->features, ARM_FEATURE_PMU); } + if (cpu_isar_feature(aa64_sve, cpu)) { + assert(kvm_arm_sve_supported(cs)); + cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_SVE; + } /* Do KVM_ARM_VCPU_INIT ioctl */ ret = kvm_arm_vcpu_init(cs); @@ -644,6 +765,17 @@ int kvm_arch_init_vcpu(CPUState *cs) return ret; } + if (cpu_isar_feature(aa64_sve, cpu)) { + ret = kvm_arm_sve_set_vls(cs); + if (ret) { + return ret; + } + ret = kvm_arm_vcpu_finalize(cs, KVM_ARM_VCPU_SVE); + if (ret) { + return ret; + } + } + /* * When KVM is in use, PSCI is emulated in-kernel and not by qemu. * Currently KVM has its own idea about MPIDR assignment, so we @@ -671,11 +803,12 @@ int kvm_arch_destroy_vcpu(CPUState *cs) bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx) { /* Return true if the regidx is a register we should synchronize - * via the cpreg_tuples array (ie is not a core reg we sync by - * hand in kvm_arch_get/put_registers()) + * via the cpreg_tuples array (ie is not a core or sve reg that + * we sync by hand in kvm_arch_get/put_registers()) */ switch (regidx & KVM_REG_ARM_COPROC_MASK) { case KVM_REG_ARM_CORE: + case KVM_REG_ARM64_SVE: return false; default: return true; @@ -721,10 +854,8 @@ int kvm_arm_cpreg_level(uint64_t regidx) static int kvm_arch_put_fpsimd(CPUState *cs) { - ARMCPU *cpu = ARM_CPU(cs); - CPUARMState *env = &cpu->env; + CPUARMState *env = &ARM_CPU(cs)->env; struct kvm_one_reg reg; - uint32_t fpr; int i, ret; for (i = 0; i < 32; i++) { @@ -742,17 +873,73 @@ static int kvm_arch_put_fpsimd(CPUState *cs) } } - reg.addr = (uintptr_t)(&fpr); - fpr = vfp_get_fpsr(env); - reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr); - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); - if (ret) { - return ret; + return 0; +} + +/* + * SVE registers are encoded in KVM's memory in an endianness-invariant format. + * The byte at offset i from the start of the in-memory representation contains + * the bits [(7 + 8 * i) : (8 * i)] of the register value. As this means the + * lowest offsets are stored in the lowest memory addresses, then that nearly + * matches QEMU's representation, which is to use an array of host-endian + * uint64_t's, where the lower offsets are at the lower indices. To complete + * the translation we just need to byte swap the uint64_t's on big-endian hosts. + */ +static uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr) +{ +#ifdef HOST_WORDS_BIGENDIAN + int i; + + for (i = 0; i < nr; ++i) { + dst[i] = bswap64(src[i]); } - reg.addr = (uintptr_t)(&fpr); - fpr = vfp_get_fpcr(env); - reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr); + return dst; +#else + return src; +#endif +} + +/* + * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits + * and PREGS and the FFR have a slice size of 256 bits. However we simply hard + * code the slice index to zero for now as it's unlikely we'll need more than + * one slice for quite some time. + */ +static int kvm_arch_put_sve(CPUState *cs) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + uint64_t tmp[ARM_MAX_VQ * 2]; + uint64_t *r; + struct kvm_one_reg reg; + int n, ret; + + for (n = 0; n < KVM_ARM64_SVE_NUM_ZREGS; ++n) { + r = sve_bswap64(tmp, &env->vfp.zregs[n].d[0], cpu->sve_max_vq * 2); + reg.addr = (uintptr_t)r; + reg.id = KVM_REG_ARM64_SVE_ZREG(n, 0); + ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + if (ret) { + return ret; + } + } + + for (n = 0; n < KVM_ARM64_SVE_NUM_PREGS; ++n) { + r = sve_bswap64(tmp, r = &env->vfp.pregs[n].p[0], + DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); + reg.addr = (uintptr_t)r; + reg.id = KVM_REG_ARM64_SVE_PREG(n, 0); + ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + if (ret) { + return ret; + } + } + + r = sve_bswap64(tmp, &env->vfp.pregs[FFR_PRED_NUM].p[0], + DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); + reg.addr = (uintptr_t)r; + reg.id = KVM_REG_ARM64_SVE_FFR(0); ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); if (ret) { return ret; @@ -765,6 +952,7 @@ int kvm_arch_put_registers(CPUState *cs, int level) { struct kvm_one_reg reg; uint64_t val; + uint32_t fpr; int i, ret; unsigned int el; @@ -855,7 +1043,27 @@ int kvm_arch_put_registers(CPUState *cs, int level) } } - ret = kvm_arch_put_fpsimd(cs); + if (cpu_isar_feature(aa64_sve, cpu)) { + ret = kvm_arch_put_sve(cs); + } else { + ret = kvm_arch_put_fpsimd(cs); + } + if (ret) { + return ret; + } + + reg.addr = (uintptr_t)(&fpr); + fpr = vfp_get_fpsr(env); + reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr); + ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + if (ret) { + return ret; + } + + reg.addr = (uintptr_t)(&fpr); + fpr = vfp_get_fpcr(env); + reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr); + ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); if (ret) { return ret; } @@ -878,10 +1086,8 @@ int kvm_arch_put_registers(CPUState *cs, int level) static int kvm_arch_get_fpsimd(CPUState *cs) { - ARMCPU *cpu = ARM_CPU(cs); - CPUARMState *env = &cpu->env; + CPUARMState *env = &ARM_CPU(cs)->env; struct kvm_one_reg reg; - uint32_t fpr; int i, ret; for (i = 0; i < 32; i++) { @@ -899,21 +1105,53 @@ static int kvm_arch_get_fpsimd(CPUState *cs) } } - reg.addr = (uintptr_t)(&fpr); - reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr); - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); - if (ret) { - return ret; + return 0; +} + +/* + * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits + * and PREGS and the FFR have a slice size of 256 bits. However we simply hard + * code the slice index to zero for now as it's unlikely we'll need more than + * one slice for quite some time. + */ +static int kvm_arch_get_sve(CPUState *cs) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + struct kvm_one_reg reg; + uint64_t *r; + int n, ret; + + for (n = 0; n < KVM_ARM64_SVE_NUM_ZREGS; ++n) { + r = &env->vfp.zregs[n].d[0]; + reg.addr = (uintptr_t)r; + reg.id = KVM_REG_ARM64_SVE_ZREG(n, 0); + ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + if (ret) { + return ret; + } + sve_bswap64(r, r, cpu->sve_max_vq * 2); } - vfp_set_fpsr(env, fpr); - reg.addr = (uintptr_t)(&fpr); - reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr); + for (n = 0; n < KVM_ARM64_SVE_NUM_PREGS; ++n) { + r = &env->vfp.pregs[n].p[0]; + reg.addr = (uintptr_t)r; + reg.id = KVM_REG_ARM64_SVE_PREG(n, 0); + ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + if (ret) { + return ret; + } + sve_bswap64(r, r, DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); + } + + r = &env->vfp.pregs[FFR_PRED_NUM].p[0]; + reg.addr = (uintptr_t)r; + reg.id = KVM_REG_ARM64_SVE_FFR(0); ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); if (ret) { return ret; } - vfp_set_fpcr(env, fpr); + sve_bswap64(r, r, DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); return 0; } @@ -923,6 +1161,7 @@ int kvm_arch_get_registers(CPUState *cs) struct kvm_one_reg reg; uint64_t val; unsigned int el; + uint32_t fpr; int i, ret; ARMCPU *cpu = ARM_CPU(cs); @@ -1012,10 +1251,30 @@ int kvm_arch_get_registers(CPUState *cs) env->spsr = env->banked_spsr[i]; } - ret = kvm_arch_get_fpsimd(cs); + if (cpu_isar_feature(aa64_sve, cpu)) { + ret = kvm_arch_get_sve(cs); + } else { + ret = kvm_arch_get_fpsimd(cs); + } + if (ret) { + return ret; + } + + reg.addr = (uintptr_t)(&fpr); + reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr); + ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + if (ret) { + return ret; + } + vfp_set_fpsr(env, fpr); + + reg.addr = (uintptr_t)(&fpr); + reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr); + ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); if (ret) { return ret; } + vfp_set_fpcr(env, fpr); ret = kvm_get_vcpu_events(cpu); if (ret) { |