From e346bcbf448543829cda7e58b9183343b1f7d0e2 Mon Sep 17 00:00:00 2001 From: Yury Kotov Date: Fri, 7 Jun 2019 12:08:30 +0300 Subject: kvm-all: Add/update fprintf's for kvm_*_ioeventfd_del Signed-off-by: Yury Kotov Message-Id: <20190607090830.18807-1-yury-kotov@yandex-team.ru> Signed-off-by: Paolo Bonzini --- accel/kvm/kvm-all.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'accel/kvm/kvm-all.c') diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index b0c4bed6e3..d2f481a347 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -863,8 +863,8 @@ static void kvm_mem_ioeventfd_add(MemoryListener *listener, data, true, int128_get64(section->size), match_data); if (r < 0) { - fprintf(stderr, "%s: error adding ioeventfd: %s\n", - __func__, strerror(-r)); + fprintf(stderr, "%s: error adding ioeventfd: %s (%d)\n", + __func__, strerror(-r), -r); abort(); } } @@ -881,6 +881,8 @@ static void kvm_mem_ioeventfd_del(MemoryListener *listener, data, false, int128_get64(section->size), match_data); if (r < 0) { + fprintf(stderr, "%s: error deleting ioeventfd: %s (%d)\n", + __func__, strerror(-r), -r); abort(); } } @@ -897,8 +899,8 @@ static void kvm_io_ioeventfd_add(MemoryListener *listener, data, true, int128_get64(section->size), match_data); if (r < 0) { - fprintf(stderr, "%s: error adding ioeventfd: %s\n", - __func__, strerror(-r)); + fprintf(stderr, "%s: error adding ioeventfd: %s (%d)\n", + __func__, strerror(-r), -r); abort(); } } @@ -916,6 +918,8 @@ static void kvm_io_ioeventfd_del(MemoryListener *listener, data, false, int128_get64(section->size), match_data); if (r < 0) { + fprintf(stderr, "%s: error deleting ioeventfd: %s (%d)\n", + __func__, strerror(-r), -r); abort(); } } -- cgit 1.4.1 From b1115c99919cf158bb859865f14c3198a0e6f679 Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Wed, 19 Jun 2019 19:21:32 +0300 Subject: KVM: Introduce kvm_arch_destroy_vcpu() Simiar to how kvm_init_vcpu() calls kvm_arch_init_vcpu() to perform arch-dependent initialisation, introduce kvm_arch_destroy_vcpu() to be called from kvm_destroy_vcpu() to perform arch-dependent destruction. This was added because some architectures (Such as i386) currently do not free memory that it have allocated in kvm_arch_init_vcpu(). Suggested-by: Maran Wilson Reviewed-by: Maran Wilson Signed-off-by: Liran Alon Message-Id: <20190619162140.133674-3-liran.alon@oracle.com> Signed-off-by: Paolo Bonzini --- accel/kvm/kvm-all.c | 5 +++++ include/sysemu/kvm.h | 1 + target/arm/kvm32.c | 5 +++++ target/arm/kvm64.c | 5 +++++ target/i386/kvm.c | 12 ++++++++++++ target/mips/kvm.c | 5 +++++ target/ppc/kvm.c | 5 +++++ target/s390x/kvm.c | 10 ++++++++++ 8 files changed, 48 insertions(+) (limited to 'accel/kvm/kvm-all.c') diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index d2f481a347..f0f5ab833b 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -291,6 +291,11 @@ int kvm_destroy_vcpu(CPUState *cpu) DPRINTF("kvm_destroy_vcpu\n"); + ret = kvm_arch_destroy_vcpu(cpu); + if (ret < 0) { + goto err; + } + mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0); if (mmap_size < 0) { ret = mmap_size; diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index a6d1cd190f..64f55e519d 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -371,6 +371,7 @@ int kvm_arch_put_registers(CPUState *cpu, int level); int kvm_arch_init(MachineState *ms, KVMState *s); int kvm_arch_init_vcpu(CPUState *cpu); +int kvm_arch_destroy_vcpu(CPUState *cpu); bool kvm_vcpu_id_is_valid(int vcpu_id); diff --git a/target/arm/kvm32.c b/target/arm/kvm32.c index 4e54e372a6..51f78f722b 100644 --- a/target/arm/kvm32.c +++ b/target/arm/kvm32.c @@ -240,6 +240,11 @@ int kvm_arch_init_vcpu(CPUState *cs) return kvm_arm_init_cpreg_list(cpu); } +int kvm_arch_destroy_vcpu(CPUState *cs) +{ + return 0; +} + typedef struct Reg { uint64_t id; int offset; diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c index 998d21f399..22d19c9aec 100644 --- a/target/arm/kvm64.c +++ b/target/arm/kvm64.c @@ -654,6 +654,11 @@ int kvm_arch_init_vcpu(CPUState *cs) return kvm_arm_init_cpreg_list(cpu); } +int kvm_arch_destroy_vcpu(CPUState *cs) +{ + return 0; +} + bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx) { /* Return true if the regidx is a register we should synchronize diff --git a/target/i386/kvm.c b/target/i386/kvm.c index c5cbeaddd1..26d8c61a5f 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -1679,6 +1679,18 @@ int kvm_arch_init_vcpu(CPUState *cs) return r; } +int kvm_arch_destroy_vcpu(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + + if (cpu->kvm_msr_buf) { + g_free(cpu->kvm_msr_buf); + cpu->kvm_msr_buf = NULL; + } + + return 0; +} + void kvm_arch_reset_vcpu(X86CPU *cpu) { CPUX86State *env = &cpu->env; diff --git a/target/mips/kvm.c b/target/mips/kvm.c index 8e72850962..938f8f144b 100644 --- a/target/mips/kvm.c +++ b/target/mips/kvm.c @@ -91,6 +91,11 @@ int kvm_arch_init_vcpu(CPUState *cs) return ret; } +int kvm_arch_destroy_vcpu(CPUState *cs) +{ + return 0; +} + void kvm_mips_reset_vcpu(MIPSCPU *cpu) { CPUMIPSState *env = &cpu->env; diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index d4107dd70d..4b4989c0af 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -521,6 +521,11 @@ int kvm_arch_init_vcpu(CPUState *cs) return ret; } +int kvm_arch_destroy_vcpu(CPUState *cs) +{ + return 0; +} + static void kvm_sw_tlb_put(PowerPCCPU *cpu) { CPUPPCState *env = &cpu->env; diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c index bcec9795ec..0267c6c2f6 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c @@ -368,6 +368,16 @@ int kvm_arch_init_vcpu(CPUState *cs) return 0; } +int kvm_arch_destroy_vcpu(CPUState *cs) +{ + S390CPU *cpu = S390_CPU(cs); + + g_free(cpu->irqstate); + cpu->irqstate = NULL; + + return 0; +} + void kvm_s390_reset_vcpu(S390CPU *cpu) { CPUState *cs = CPU(cpu); -- cgit 1.4.1 From ebbfef2f34cfc749c045a4569dedb4f748ec024a Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Wed, 19 Jun 2019 19:21:38 +0300 Subject: target/i386: kvm: Add support for save and restore nested state Kernel commit 8fcc4b5923af ("kvm: nVMX: Introduce KVM_CAP_NESTED_STATE") introduced new IOCTLs to extract and restore vCPU state related to Intel VMX & AMD SVM. Utilize these IOCTLs to add support for migration of VMs which are running nested hypervisors. Reviewed-by: Nikita Leshenko Reviewed-by: Maran Wilson Tested-by: Maran Wilson Signed-off-by: Liran Alon Message-Id: <20190619162140.133674-9-liran.alon@oracle.com> Signed-off-by: Paolo Bonzini --- accel/kvm/kvm-all.c | 8 ++ include/sysemu/kvm.h | 1 + target/i386/cpu.h | 3 + target/i386/kvm.c | 80 ++++++++++++++++++++ target/i386/machine.c | 198 ++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 290 insertions(+) (limited to 'accel/kvm/kvm-all.c') diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index f0f5ab833b..e3cf72883b 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -87,6 +87,7 @@ struct KVMState #ifdef KVM_CAP_SET_GUEST_DEBUG QTAILQ_HEAD(, kvm_sw_breakpoint) kvm_sw_breakpoints; #endif + int max_nested_state_len; int many_ioeventfds; int intx_set_mask; bool sync_mmu; @@ -1681,6 +1682,8 @@ static int kvm_init(MachineState *ms) s->debugregs = kvm_check_extension(s, KVM_CAP_DEBUGREGS); #endif + s->max_nested_state_len = kvm_check_extension(s, KVM_CAP_NESTED_STATE); + #ifdef KVM_CAP_IRQ_ROUTING kvm_direct_msi_allowed = (kvm_check_extension(s, KVM_CAP_SIGNAL_MSI) > 0); #endif @@ -2248,6 +2251,11 @@ int kvm_has_debugregs(void) return kvm_state->debugregs; } +int kvm_max_nested_state_length(void) +{ + return kvm_state->max_nested_state_len; +} + int kvm_has_many_ioeventfds(void) { if (!kvm_enabled()) { diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 64f55e519d..acd90aebb6 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -210,6 +210,7 @@ bool kvm_has_sync_mmu(void); int kvm_has_vcpu_events(void); int kvm_has_robust_singlestep(void); int kvm_has_debugregs(void); +int kvm_max_nested_state_length(void); int kvm_has_pit_state2(void); int kvm_has_many_ioeventfds(void); int kvm_has_gsi_routing(void); diff --git a/target/i386/cpu.h b/target/i386/cpu.h index bf0c9c28c3..17116ef954 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1360,6 +1360,9 @@ typedef struct CPUX86State { #if defined(CONFIG_KVM) || defined(CONFIG_HVF) void *xsave_buf; #endif +#if defined(CONFIG_KVM) + struct kvm_nested_state *nested_state; +#endif #if defined(CONFIG_HVF) HVFX86EmulatorState *hvf_emul; #endif diff --git a/target/i386/kvm.c b/target/i386/kvm.c index f9872f1594..e924663f32 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -1324,6 +1324,7 @@ int kvm_arch_init_vcpu(CPUState *cs) struct kvm_cpuid_entry2 *c; uint32_t signature[3]; int kvm_base = KVM_CPUID_SIGNATURE; + int max_nested_state_len; int r; Error *local_err = NULL; @@ -1658,6 +1659,24 @@ int kvm_arch_init_vcpu(CPUState *cs) if (has_xsave) { env->xsave_buf = qemu_memalign(4096, sizeof(struct kvm_xsave)); } + + max_nested_state_len = kvm_max_nested_state_length(); + if (max_nested_state_len > 0) { + assert(max_nested_state_len >= offsetof(struct kvm_nested_state, data)); + env->nested_state = g_malloc0(max_nested_state_len); + + env->nested_state->size = max_nested_state_len; + + if (IS_INTEL_CPU(env)) { + struct kvm_vmx_nested_state_hdr *vmx_hdr = + &env->nested_state->hdr.vmx; + + env->nested_state->format = KVM_STATE_NESTED_FORMAT_VMX; + vmx_hdr->vmxon_pa = -1ull; + vmx_hdr->vmcs12_pa = -1ull; + } + } + cpu->kvm_msr_buf = g_malloc0(MSR_BUF_SIZE); if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP)) { @@ -1682,12 +1701,18 @@ int kvm_arch_init_vcpu(CPUState *cs) int kvm_arch_destroy_vcpu(CPUState *cs) { X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; if (cpu->kvm_msr_buf) { g_free(cpu->kvm_msr_buf); cpu->kvm_msr_buf = NULL; } + if (env->nested_state) { + g_free(env->nested_state); + env->nested_state = NULL; + } + return 0; } @@ -3411,6 +3436,52 @@ static int kvm_get_debugregs(X86CPU *cpu) return 0; } +static int kvm_put_nested_state(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + int max_nested_state_len = kvm_max_nested_state_length(); + + if (max_nested_state_len <= 0) { + return 0; + } + + assert(env->nested_state->size <= max_nested_state_len); + return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_NESTED_STATE, env->nested_state); +} + +static int kvm_get_nested_state(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + int max_nested_state_len = kvm_max_nested_state_length(); + int ret; + + if (max_nested_state_len <= 0) { + return 0; + } + + /* + * It is possible that migration restored a smaller size into + * nested_state->hdr.size than what our kernel support. + * We preserve migration origin nested_state->hdr.size for + * call to KVM_SET_NESTED_STATE but wish that our next call + * to KVM_GET_NESTED_STATE will use max size our kernel support. + */ + env->nested_state->size = max_nested_state_len; + + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_NESTED_STATE, env->nested_state); + if (ret < 0) { + return ret; + } + + if (env->nested_state->flags & KVM_STATE_NESTED_GUEST_MODE) { + env->hflags |= HF_GUEST_MASK; + } else { + env->hflags &= ~HF_GUEST_MASK; + } + + return ret; +} + int kvm_arch_put_registers(CPUState *cpu, int level) { X86CPU *x86_cpu = X86_CPU(cpu); @@ -3418,6 +3489,11 @@ int kvm_arch_put_registers(CPUState *cpu, int level) assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); + ret = kvm_put_nested_state(x86_cpu); + if (ret < 0) { + return ret; + } + if (level >= KVM_PUT_RESET_STATE) { ret = kvm_put_msr_feature_control(x86_cpu); if (ret < 0) { @@ -3533,6 +3609,10 @@ int kvm_arch_get_registers(CPUState *cs) if (ret < 0) { goto out; } + ret = kvm_get_nested_state(cpu); + if (ret < 0) { + goto out; + } ret = 0; out: cpu_sync_bndcs_hflags(&cpu->env); diff --git a/target/i386/machine.c b/target/i386/machine.c index a39ce7feea..a6afdf8720 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -231,6 +231,15 @@ static int cpu_pre_save(void *opaque) env->segs[R_SS].flags &= ~(env->segs[R_SS].flags & DESC_DPL_MASK); } +#ifdef CONFIG_KVM + /* Verify we have nested virtualization state from kernel if required */ + if (cpu_has_nested_virt(env) && !env->nested_state) { + error_report("Guest enabled nested virtualization but kernel " + "does not support saving of nested state"); + return -EINVAL; + } +#endif + return 0; } @@ -278,6 +287,16 @@ static int cpu_post_load(void *opaque, int version_id) env->hflags &= ~HF_CPL_MASK; env->hflags |= (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK; +#ifdef CONFIG_KVM + if ((env->hflags & HF_GUEST_MASK) && + (!env->nested_state || + !(env->nested_state->flags & KVM_STATE_NESTED_GUEST_MODE))) { + error_report("vCPU set in guest-mode inconsistent with " + "migrated kernel nested state"); + return -EINVAL; + } +#endif + env->fpstt = (env->fpus_vmstate >> 11) & 7; env->fpus = env->fpus_vmstate & ~0x3800; env->fptag_vmstate ^= 0xff; @@ -851,6 +870,182 @@ static const VMStateDescription vmstate_tsc_khz = { } }; +#ifdef CONFIG_KVM + +static bool vmx_vmcs12_needed(void *opaque) +{ + struct kvm_nested_state *nested_state = opaque; + return (nested_state->size > + offsetof(struct kvm_nested_state, data.vmx[0].vmcs12)); +} + +static const VMStateDescription vmstate_vmx_vmcs12 = { + .name = "cpu/kvm_nested_state/vmx/vmcs12", + .version_id = 1, + .minimum_version_id = 1, + .needed = vmx_vmcs12_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT8_ARRAY(data.vmx[0].vmcs12, + struct kvm_nested_state, + KVM_STATE_NESTED_VMX_VMCS_SIZE), + VMSTATE_END_OF_LIST() + } +}; + +static bool vmx_shadow_vmcs12_needed(void *opaque) +{ + struct kvm_nested_state *nested_state = opaque; + return (nested_state->size > + offsetof(struct kvm_nested_state, data.vmx[0].shadow_vmcs12)); +} + +static const VMStateDescription vmstate_vmx_shadow_vmcs12 = { + .name = "cpu/kvm_nested_state/vmx/shadow_vmcs12", + .version_id = 1, + .minimum_version_id = 1, + .needed = vmx_shadow_vmcs12_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT8_ARRAY(data.vmx[0].shadow_vmcs12, + struct kvm_nested_state, + KVM_STATE_NESTED_VMX_VMCS_SIZE), + VMSTATE_END_OF_LIST() + } +}; + +static bool vmx_nested_state_needed(void *opaque) +{ + struct kvm_nested_state *nested_state = opaque; + + return ((nested_state->format == KVM_STATE_NESTED_FORMAT_VMX) && + ((nested_state->hdr.vmx.vmxon_pa != -1ull) || + (nested_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON))); +} + +static const VMStateDescription vmstate_vmx_nested_state = { + .name = "cpu/kvm_nested_state/vmx", + .version_id = 1, + .minimum_version_id = 1, + .needed = vmx_nested_state_needed, + .fields = (VMStateField[]) { + VMSTATE_U64(hdr.vmx.vmxon_pa, struct kvm_nested_state), + VMSTATE_U64(hdr.vmx.vmcs12_pa, struct kvm_nested_state), + VMSTATE_U16(hdr.vmx.smm.flags, struct kvm_nested_state), + VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription*[]) { + &vmstate_vmx_vmcs12, + &vmstate_vmx_shadow_vmcs12, + NULL, + } +}; + +static bool svm_nested_state_needed(void *opaque) +{ + struct kvm_nested_state *nested_state = opaque; + + return (nested_state->format == KVM_STATE_NESTED_FORMAT_SVM); +} + +static const VMStateDescription vmstate_svm_nested_state = { + .name = "cpu/kvm_nested_state/svm", + .version_id = 1, + .minimum_version_id = 1, + .needed = svm_nested_state_needed, + .fields = (VMStateField[]) { + VMSTATE_END_OF_LIST() + } +}; + +static bool nested_state_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + return (env->nested_state && + (vmx_nested_state_needed(env->nested_state) || + svm_nested_state_needed(env->nested_state))); +} + +static int nested_state_post_load(void *opaque, int version_id) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + struct kvm_nested_state *nested_state = env->nested_state; + int min_nested_state_len = offsetof(struct kvm_nested_state, data); + int max_nested_state_len = kvm_max_nested_state_length(); + + /* + * If our kernel don't support setting nested state + * and we have received nested state from migration stream, + * we need to fail migration + */ + if (max_nested_state_len <= 0) { + error_report("Received nested state when kernel cannot restore it"); + return -EINVAL; + } + + /* + * Verify that the size of received nested_state struct + * at least cover required header and is not larger + * than the max size that our kernel support + */ + if (nested_state->size < min_nested_state_len) { + error_report("Received nested state size less than min: " + "len=%d, min=%d", + nested_state->size, min_nested_state_len); + return -EINVAL; + } + if (nested_state->size > max_nested_state_len) { + error_report("Recieved unsupported nested state size: " + "nested_state->size=%d, max=%d", + nested_state->size, max_nested_state_len); + return -EINVAL; + } + + /* Verify format is valid */ + if ((nested_state->format != KVM_STATE_NESTED_FORMAT_VMX) && + (nested_state->format != KVM_STATE_NESTED_FORMAT_SVM)) { + error_report("Received invalid nested state format: %d", + nested_state->format); + return -EINVAL; + } + + return 0; +} + +static const VMStateDescription vmstate_kvm_nested_state = { + .name = "cpu/kvm_nested_state", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_U16(flags, struct kvm_nested_state), + VMSTATE_U16(format, struct kvm_nested_state), + VMSTATE_U32(size, struct kvm_nested_state), + VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription*[]) { + &vmstate_vmx_nested_state, + &vmstate_svm_nested_state, + NULL + } +}; + +static const VMStateDescription vmstate_nested_state = { + .name = "cpu/nested_state", + .version_id = 1, + .minimum_version_id = 1, + .needed = nested_state_needed, + .post_load = nested_state_post_load, + .fields = (VMStateField[]) { + VMSTATE_STRUCT_POINTER(env.nested_state, X86CPU, + vmstate_kvm_nested_state, + struct kvm_nested_state), + VMSTATE_END_OF_LIST() + } +}; + +#endif + static bool mcg_ext_ctl_needed(void *opaque) { X86CPU *cpu = opaque; @@ -1112,6 +1307,9 @@ VMStateDescription vmstate_x86_cpu = { &vmstate_svm_npt, #ifndef TARGET_X86_64 &vmstate_efer32, +#endif +#ifdef CONFIG_KVM + &vmstate_nested_state, #endif NULL } -- cgit 1.4.1