diff options
Diffstat (limited to 'target')
| -rw-r--r-- | target/arm/kvm.c | 5 | ||||
| -rw-r--r-- | target/i386/confidential-guest.c | 33 | ||||
| -rw-r--r-- | target/i386/confidential-guest.h | 59 | ||||
| -rw-r--r-- | target/i386/cpu.c | 175 | ||||
| -rw-r--r-- | target/i386/cpu.h | 8 | ||||
| -rw-r--r-- | target/i386/host-cpu.c | 11 | ||||
| -rw-r--r-- | target/i386/kvm/kvm-cpu.c | 50 | ||||
| -rw-r--r-- | target/i386/kvm/kvm.c | 478 | ||||
| -rw-r--r-- | target/i386/kvm/kvm_i386.h | 2 | ||||
| -rw-r--r-- | target/i386/kvm/meson.build | 2 | ||||
| -rw-r--r-- | target/i386/kvm/sev-stub.c | 21 | ||||
| -rw-r--r-- | target/i386/meson.build | 2 | ||||
| -rw-r--r-- | target/i386/sev.c | 178 | ||||
| -rw-r--r-- | target/i386/sev.h | 2 | ||||
| -rw-r--r-- | target/i386/tcg/translate.c | 14 | ||||
| -rw-r--r-- | target/loongarch/kvm/kvm.c | 5 | ||||
| -rw-r--r-- | target/mips/kvm.c | 5 | ||||
| -rw-r--r-- | target/ppc/kvm.c | 5 | ||||
| -rw-r--r-- | target/riscv/kvm/kvm-cpu.c | 5 | ||||
| -rw-r--r-- | target/s390x/kvm/kvm.c | 5 | ||||
| -rw-r--r-- | target/s390x/kvm/pv.c | 10 | ||||
| -rw-r--r-- | target/s390x/kvm/pv.h | 14 |
22 files changed, 708 insertions, 381 deletions
diff --git a/target/arm/kvm.c b/target/arm/kvm.c index ab85d628a8..21ebbf3b8f 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -1598,11 +1598,6 @@ int kvm_arch_msi_data_to_gsi(uint32_t data) return (data - 32) & 0xffff; } -bool kvm_arch_cpu_check_are_resettable(void) -{ - return true; -} - static void kvm_arch_get_eager_split_size(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) diff --git a/target/i386/confidential-guest.c b/target/i386/confidential-guest.c new file mode 100644 index 0000000000..b3727845ad --- /dev/null +++ b/target/i386/confidential-guest.c @@ -0,0 +1,33 @@ +/* + * QEMU Confidential Guest support + * + * Copyright (C) 2024 Red Hat, Inc. + * + * Authors: + * Paolo Bonzini <pbonzini@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" + +#include "confidential-guest.h" + +OBJECT_DEFINE_ABSTRACT_TYPE(X86ConfidentialGuest, + x86_confidential_guest, + X86_CONFIDENTIAL_GUEST, + CONFIDENTIAL_GUEST_SUPPORT) + +static void x86_confidential_guest_class_init(ObjectClass *oc, void *data) +{ +} + +static void x86_confidential_guest_init(Object *obj) +{ +} + +static void x86_confidential_guest_finalize(Object *obj) +{ +} diff --git a/target/i386/confidential-guest.h b/target/i386/confidential-guest.h new file mode 100644 index 0000000000..532e172a60 --- /dev/null +++ b/target/i386/confidential-guest.h @@ -0,0 +1,59 @@ +/* + * x86-specific confidential guest methods. + * + * Copyright (c) 2024 Red Hat Inc. + * + * Authors: + * Paolo Bonzini <pbonzini@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef TARGET_I386_CG_H +#define TARGET_I386_CG_H + +#include "qom/object.h" + +#include "exec/confidential-guest-support.h" + +#define TYPE_X86_CONFIDENTIAL_GUEST "x86-confidential-guest" + +OBJECT_DECLARE_TYPE(X86ConfidentialGuest, + X86ConfidentialGuestClass, + X86_CONFIDENTIAL_GUEST) + +struct X86ConfidentialGuest { + /* <private> */ + ConfidentialGuestSupport parent_obj; +}; + +/** + * X86ConfidentialGuestClass: + * + * Class to be implemented by confidential-guest-support concrete objects + * for the x86 target. + */ +struct X86ConfidentialGuestClass { + /* <private> */ + ConfidentialGuestSupportClass parent; + + /* <public> */ + int (*kvm_type)(X86ConfidentialGuest *cg); +}; + +/** + * x86_confidential_guest_kvm_type: + * + * Calls #X86ConfidentialGuestClass.unplug callback of @plug_handler. + */ +static inline int x86_confidential_guest_kvm_type(X86ConfidentialGuest *cg) +{ + X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg); + + if (klass->kvm_type) { + return klass->kvm_type(cg); + } else { + return 0; + } +} +#endif diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 33760a2ee1..fd6af0d763 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1158,8 +1158,8 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { NULL, "sbdr-ssdp-no", "fbsdp-no", "psdp-no", NULL, "fb-clear", NULL, NULL, NULL, NULL, NULL, NULL, - "pbrsb-no", NULL, "gds-no", NULL, - NULL, NULL, NULL, NULL, + "pbrsb-no", NULL, "gds-no", "rfds-no", + "rfds-clear", NULL, NULL, NULL, }, .msr = { .index = MSR_IA32_ARCH_CAPABILITIES, @@ -3822,6 +3822,16 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } }, }, + { + .version = 7, + .note = "TSX, taa-no", + .props = (PropValue[]) { + /* Restore TSX features removed by -v2 above */ + { "hle", "on" }, + { "rtm", "on" }, + { /* end of list */ } + }, + }, { /* end of list */ } } }, @@ -4100,6 +4110,132 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, }, { + .name = "SierraForest", + .level = 0x23, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, + .model = 175, + .stepping = 0, + /* + * please keep the ascending order so that we can have a clear view of + * bit position of each feature. + */ + .features[FEAT_1_EDX] = + CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC | + CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | + CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | + CPUID_PAT | CPUID_PSE36 | CPUID_CLFLUSH | CPUID_MMX | CPUID_FXSR | + CPUID_SSE | CPUID_SSE2, + .features[FEAT_1_ECX] = + CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSSE3 | + CPUID_EXT_FMA | CPUID_EXT_CX16 | CPUID_EXT_PCID | CPUID_EXT_SSE41 | + CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE | + CPUID_EXT_POPCNT | CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_AES | + CPUID_EXT_XSAVE | CPUID_EXT_AVX | CPUID_EXT_F16C | CPUID_EXT_RDRAND, + .features[FEAT_8000_0001_EDX] = + CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB | + CPUID_EXT2_RDTSCP | CPUID_EXT2_LM, + .features[FEAT_8000_0001_ECX] = + CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH, + .features[FEAT_8000_0008_EBX] = + CPUID_8000_0008_EBX_WBNOINVD, + .features[FEAT_7_0_EBX] = + CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | + CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | + CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_SHA_NI, + .features[FEAT_7_0_ECX] = + CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_GFNI | + CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ | + CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_BUS_LOCK_DETECT, + .features[FEAT_7_0_EDX] = + CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_SERIALIZE | + CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_ARCH_CAPABILITIES | + CPUID_7_0_EDX_SPEC_CTRL_SSBD, + .features[FEAT_ARCH_CAPABILITIES] = + MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_IBRS_ALL | + MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO | + MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_SBDR_SSDP_NO | + MSR_ARCH_CAP_FBSDP_NO | MSR_ARCH_CAP_PSDP_NO | + MSR_ARCH_CAP_PBRSB_NO, + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | + CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, + .features[FEAT_7_1_EAX] = + CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_CMPCCXADD | + CPUID_7_1_EAX_FSRS | CPUID_7_1_EAX_AVX_IFMA, + .features[FEAT_7_1_EDX] = + CPUID_7_1_EDX_AVX_VNNI_INT8 | CPUID_7_1_EDX_AVX_NE_CONVERT, + .features[FEAT_7_2_EDX] = + CPUID_7_2_EDX_MCDT_NO, + .features[FEAT_VMX_BASIC] = + MSR_VMX_BASIC_INS_OUTS | MSR_VMX_BASIC_TRUE_CTLS, + .features[FEAT_VMX_ENTRY_CTLS] = + VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_IA32E_MODE | + VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | + VMX_VM_ENTRY_LOAD_IA32_PAT | VMX_VM_ENTRY_LOAD_IA32_EFER, + .features[FEAT_VMX_EPT_VPID_CAPS] = + MSR_VMX_EPT_EXECONLY | MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | + MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | MSR_VMX_EPT_1GB | + MSR_VMX_EPT_INVEPT | MSR_VMX_EPT_AD_BITS | + MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | + MSR_VMX_EPT_INVVPID_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS, + .features[FEAT_VMX_EXIT_CTLS] = + VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | + VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | + VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_IA32_PAT | + VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | + VMX_VM_EXIT_LOAD_IA32_EFER | VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, + .features[FEAT_VMX_MISC] = + MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_ACTIVITY_HLT | + MSR_VMX_MISC_VMWRITE_VMEXIT, + .features[FEAT_VMX_PINBASED_CTLS] = + VMX_PIN_BASED_EXT_INTR_MASK | VMX_PIN_BASED_NMI_EXITING | + VMX_PIN_BASED_VIRTUAL_NMIS | VMX_PIN_BASED_VMX_PREEMPTION_TIMER | + VMX_PIN_BASED_POSTED_INTR, + .features[FEAT_VMX_PROCBASED_CTLS] = + VMX_CPU_BASED_VIRTUAL_INTR_PENDING | + VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | + VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | + VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | + VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | + VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | + VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_VIRTUAL_NMI_PENDING | + VMX_CPU_BASED_MOV_DR_EXITING | VMX_CPU_BASED_UNCOND_IO_EXITING | + VMX_CPU_BASED_USE_IO_BITMAPS | VMX_CPU_BASED_MONITOR_TRAP_FLAG | + VMX_CPU_BASED_USE_MSR_BITMAPS | VMX_CPU_BASED_MONITOR_EXITING | + VMX_CPU_BASED_PAUSE_EXITING | + VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, + .features[FEAT_VMX_SECONDARY_CTLS] = + VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + VMX_SECONDARY_EXEC_ENABLE_EPT | VMX_SECONDARY_EXEC_DESC | + VMX_SECONDARY_EXEC_RDTSCP | + VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | + VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_WBINVD_EXITING | + VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | + VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | + VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | + VMX_SECONDARY_EXEC_RDRAND_EXITING | + VMX_SECONDARY_EXEC_ENABLE_INVPCID | + VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | + VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML | + VMX_SECONDARY_EXEC_XSAVES, + .features[FEAT_VMX_VMFUNC] = + MSR_VMX_VMFUNC_EPT_SWITCHING, + .xlevel = 0x80000008, + .model_id = "Intel Xeon Processor (SierraForest)", + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { /* end of list */ }, + }, + }, + { .name = "Denverton", .level = 21, .vendor = CPUID_VENDOR_INTEL, @@ -6570,6 +6706,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) { /* 64 bit processor */ *eax |= (cpu_x86_virtual_addr_width(env) << 8); + *eax |= (cpu->guest_phys_bits << 16); } *ebx = env->features[FEAT_8000_0008_EBX]; if (cs->nr_cores * cs->nr_threads > 1) { @@ -7230,7 +7367,6 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) X86CPUClass *xcc = X86_CPU_GET_CLASS(dev); CPUX86State *env = &cpu->env; Error *local_err = NULL; - static bool ht_warned; unsigned requested_lbr_fmt; #if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY) @@ -7329,6 +7465,14 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) goto out; } + if (cpu->guest_phys_bits == -1) { + /* + * If it was not set by the user, or by the accelerator via + * cpu_exec_realizefn, clear. + */ + cpu->guest_phys_bits = 0; + } + if (cpu->ucode_rev == 0) { /* * The default is the same as KVM's. Note that this check @@ -7379,6 +7523,14 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) if (cpu->phys_bits == 0) { cpu->phys_bits = TCG_PHYS_ADDR_BITS; } + if (cpu->guest_phys_bits && + (cpu->guest_phys_bits > cpu->phys_bits || + cpu->guest_phys_bits < 32)) { + error_setg(errp, "guest-phys-bits should be between 32 and %u " + " (but is %u)", + cpu->phys_bits, cpu->guest_phys_bits); + return; + } } else { /* For 32 bit systems don't use the user set value, but keep * phys_bits consistent with what we tell the guest. @@ -7387,6 +7539,10 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) error_setg(errp, "phys-bits is not user-configurable in 32 bit"); return; } + if (cpu->guest_phys_bits != 0) { + error_setg(errp, "guest-phys-bits is not user-configurable in 32 bit"); + return; + } if (env->features[FEAT_1_EDX] & (CPUID_PSE36 | CPUID_PAE)) { cpu->phys_bits = 36; @@ -7453,13 +7609,11 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) */ if (IS_AMD_CPU(env) && !(env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_TOPOEXT) && - cs->nr_threads > 1 && !ht_warned) { - warn_report("This family of AMD CPU doesn't support " - "hyperthreading(%d)", - cs->nr_threads); - error_printf("Please configure -smp options properly" - " or try enabling topoext feature.\n"); - ht_warned = true; + cs->nr_threads > 1) { + warn_report_once("This family of AMD CPU doesn't support " + "hyperthreading(%d). Please configure -smp " + "options properly or try enabling topoext " + "feature.", cs->nr_threads); } #ifndef CONFIG_USER_ONLY @@ -7887,6 +8041,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("x-force-features", X86CPU, force_features, false), DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true), DEFINE_PROP_UINT32("phys-bits", X86CPU, phys_bits, 0), + DEFINE_PROP_UINT32("guest-phys-bits", X86CPU, guest_phys_bits, -1), DEFINE_PROP_BOOL("host-phys-bits", X86CPU, host_phys_bits, false), DEFINE_PROP_UINT8("host-phys-bits-limit", X86CPU, host_phys_bits_limit, 0), DEFINE_PROP_BOOL("fill-mtrr-mask", X86CPU, fill_mtrr_mask, true), diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 6b05738079..6112e27bfd 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -2027,6 +2027,14 @@ struct ArchCPU { /* Number of physical address bits supported */ uint32_t phys_bits; + /* + * Number of guest physical address bits available. Usually this is + * identical to host physical address bits. With NPT or EPT 4-level + * paging, guest physical address space might be restricted to 48 bits + * even if the host cpu supports more physical address bits. + */ + uint32_t guest_phys_bits; + /* in order to simplify APIC support, we leave this pointer to the user */ struct DeviceState *apic_state; diff --git a/target/i386/host-cpu.c b/target/i386/host-cpu.c index 92ecb7254b..280e427c01 100644 --- a/target/i386/host-cpu.c +++ b/target/i386/host-cpu.c @@ -55,18 +55,15 @@ static uint32_t host_cpu_adjust_phys_bits(X86CPU *cpu) { uint32_t host_phys_bits = host_cpu_phys_bits(); uint32_t phys_bits = cpu->phys_bits; - static bool warned; /* * Print a warning if the user set it to a value that's not the * host value. */ - if (phys_bits != host_phys_bits && phys_bits != 0 && - !warned) { - warn_report("Host physical bits (%u)" - " does not match phys-bits property (%u)", - host_phys_bits, phys_bits); - warned = true; + if (phys_bits != host_phys_bits && phys_bits != 0) { + warn_report_once("Host physical bits (%u)" + " does not match phys-bits property (%u)", + host_phys_bits, phys_bits); } if (cpu->host_phys_bits) { diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c index 9c791b7b05..f76972e47e 100644 --- a/target/i386/kvm/kvm-cpu.c +++ b/target/i386/kvm/kvm-cpu.c @@ -18,10 +18,32 @@ #include "kvm_i386.h" #include "hw/core/accel-cpu.h" +static void kvm_set_guest_phys_bits(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + uint32_t eax, guest_phys_bits; + + eax = kvm_arch_get_supported_cpuid(cs->kvm_state, 0x80000008, 0, R_EAX); + guest_phys_bits = (eax >> 16) & 0xff; + if (!guest_phys_bits) { + return; + } + cpu->guest_phys_bits = guest_phys_bits; + if (cpu->guest_phys_bits > cpu->phys_bits) { + cpu->guest_phys_bits = cpu->phys_bits; + } + + if (cpu->host_phys_bits && cpu->host_phys_bits_limit && + cpu->guest_phys_bits > cpu->host_phys_bits_limit) { + cpu->guest_phys_bits = cpu->host_phys_bits_limit; + } +} + static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) { X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; + bool ret; /* * The realize order is important, since x86_cpu_realize() checks if @@ -32,13 +54,15 @@ static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) * * realize order: * - * x86_cpu_realize(): - * -> x86_cpu_expand_features() - * -> cpu_exec_realizefn(): - * -> accel_cpu_common_realize() - * kvm_cpu_realizefn() -> host_cpu_realizefn() - * -> cpu_common_realizefn() - * -> check/update ucode_rev, phys_bits, mwait + * x86_cpu_realizefn(): + * x86_cpu_expand_features() + * cpu_exec_realizefn(): + * accel_cpu_common_realize() + * kvm_cpu_realizefn() + * host_cpu_realizefn() + * kvm_set_guest_phys_bits() + * check/update ucode_rev, phys_bits, guest_phys_bits, mwait + * cpu_common_realizefn() (via xcc->parent_realize) */ if (cpu->max_features) { if (enable_cpu_pm && kvm_has_waitpkg()) { @@ -50,7 +74,17 @@ static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) MSR_IA32_UCODE_REV); } } - return host_cpu_realizefn(cs, errp); + ret = host_cpu_realizefn(cs, errp); + if (!ret) { + return ret; + } + + if ((env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) && + cpu->guest_phys_bits == -1) { + kvm_set_guest_phys_bits(cs); + } + + return true; } static bool lmce_supported(void) diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index e68cbe9293..c5943605ee 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -31,6 +31,7 @@ #include "sysemu/kvm_int.h" #include "sysemu/runstate.h" #include "kvm_i386.h" +#include "../confidential-guest.h" #include "sev.h" #include "xen-emu.h" #include "hyperv.h" @@ -161,6 +162,51 @@ static KVMMSRHandlers msr_handlers[KVM_MSR_FILTER_MAX_RANGES]; static RateLimit bus_lock_ratelimit_ctrl; static int kvm_get_one_msr(X86CPU *cpu, int index, uint64_t *value); +static const char *vm_type_name[] = { + [KVM_X86_DEFAULT_VM] = "default", + [KVM_X86_SEV_VM] = "SEV", + [KVM_X86_SEV_ES_VM] = "SEV-ES", +}; + +bool kvm_is_vm_type_supported(int type) +{ + uint32_t machine_types; + + /* + * old KVM doesn't support KVM_CAP_VM_TYPES but KVM_X86_DEFAULT_VM + * is always supported + */ + if (type == KVM_X86_DEFAULT_VM) { + return true; + } + + machine_types = kvm_check_extension(KVM_STATE(current_machine->accelerator), + KVM_CAP_VM_TYPES); + return !!(machine_types & BIT(type)); +} + +int kvm_get_vm_type(MachineState *ms) +{ + int kvm_type = KVM_X86_DEFAULT_VM; + + if (ms->cgs) { + if (!object_dynamic_cast(OBJECT(ms->cgs), TYPE_X86_CONFIDENTIAL_GUEST)) { + error_report("configuration type %s not supported for x86 guests", + object_get_typename(OBJECT(ms->cgs))); + exit(1); + } + kvm_type = x86_confidential_guest_kvm_type( + X86_CONFIDENTIAL_GUEST(ms->cgs)); + } + + if (!kvm_is_vm_type_supported(kvm_type)) { + error_report("vm-type %s not supported by KVM", vm_type_name[kvm_type]); + exit(1); + } + + return kvm_type; +} + bool kvm_has_smm(void) { return kvm_vm_check_extension(kvm_state, KVM_CAP_X86_SMM); @@ -1706,195 +1752,22 @@ static void kvm_init_nested_state(CPUX86State *env) } } -int kvm_arch_init_vcpu(CPUState *cs) +static uint32_t kvm_x86_build_cpuid(CPUX86State *env, + struct kvm_cpuid_entry2 *entries, + uint32_t cpuid_i) { - struct { - struct kvm_cpuid2 cpuid; - struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; - } cpuid_data; - /* - * The kernel defines these structs with padding fields so there - * should be no extra padding in our cpuid_data struct. - */ - QEMU_BUILD_BUG_ON(sizeof(cpuid_data) != - sizeof(struct kvm_cpuid2) + - sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES); - - X86CPU *cpu = X86_CPU(cs); - CPUX86State *env = &cpu->env; - uint32_t limit, i, j, cpuid_i; + uint32_t limit, i, j; uint32_t unused; struct kvm_cpuid_entry2 *c; - uint32_t signature[3]; - int kvm_base = KVM_CPUID_SIGNATURE; - int max_nested_state_len; - int r; - Error *local_err = NULL; - - memset(&cpuid_data, 0, sizeof(cpuid_data)); - - cpuid_i = 0; - - has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2); - - r = kvm_arch_set_tsc_khz(cs); - if (r < 0) { - return r; - } - - /* vcpu's TSC frequency is either specified by user, or following - * the value used by KVM if the former is not present. In the - * latter case, we query it from KVM and record in env->tsc_khz, - * so that vcpu's TSC frequency can be migrated later via this field. - */ - if (!env->tsc_khz) { - r = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ? - kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) : - -ENOTSUP; - if (r > 0) { - env->tsc_khz = r; - } - } - - env->apic_bus_freq = KVM_APIC_BUS_FREQUENCY; - - /* - * kvm_hyperv_expand_features() is called here for the second time in case - * KVM_CAP_SYS_HYPERV_CPUID is not supported. While we can't possibly handle - * 'query-cpu-model-expansion' in this case as we don't have a KVM vCPU to - * check which Hyper-V enlightenments are supported and which are not, we - * can still proceed and check/expand Hyper-V enlightenments here so legacy - * behavior is preserved. - */ - if (!kvm_hyperv_expand_features(cpu, &local_err)) { - error_report_err(local_err); - return -ENOSYS; - } - - if (hyperv_enabled(cpu)) { - r = hyperv_init_vcpu(cpu); - if (r) { - return r; - } - - cpuid_i = hyperv_fill_cpuids(cs, cpuid_data.entries); - kvm_base = KVM_CPUID_SIGNATURE_NEXT; - has_msr_hv_hypercall = true; - } - - if (cs->kvm_state->xen_version) { -#ifdef CONFIG_XEN_EMU - struct kvm_cpuid_entry2 *xen_max_leaf; - - memcpy(signature, "XenVMMXenVMM", 12); - - xen_max_leaf = c = &cpuid_data.entries[cpuid_i++]; - c->function = kvm_base + XEN_CPUID_SIGNATURE; - c->eax = kvm_base + XEN_CPUID_TIME; - c->ebx = signature[0]; - c->ecx = signature[1]; - c->edx = signature[2]; - - c = &cpuid_data.entries[cpuid_i++]; - c->function = kvm_base + XEN_CPUID_VENDOR; - c->eax = cs->kvm_state->xen_version; - c->ebx = 0; - c->ecx = 0; - c->edx = 0; - - c = &cpuid_data.entries[cpuid_i++]; - c->function = kvm_base + XEN_CPUID_HVM_MSR; - /* Number of hypercall-transfer pages */ - c->eax = 1; - /* Hypercall MSR base address */ - if (hyperv_enabled(cpu)) { - c->ebx = XEN_HYPERCALL_MSR_HYPERV; - kvm_xen_init(cs->kvm_state, c->ebx); - } else { - c->ebx = XEN_HYPERCALL_MSR; - } - c->ecx = 0; - c->edx = 0; - - c = &cpuid_data.entries[cpuid_i++]; - c->function = kvm_base + XEN_CPUID_TIME; - c->eax = ((!!tsc_is_stable_and_known(env) << 1) | - (!!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP) << 2)); - /* default=0 (emulate if necessary) */ - c->ebx = 0; - /* guest tsc frequency */ - c->ecx = env->user_tsc_khz; - /* guest tsc incarnation (migration count) */ - c->edx = 0; - - c = &cpuid_data.entries[cpuid_i++]; - c->function = kvm_base + XEN_CPUID_HVM; - xen_max_leaf->eax = kvm_base + XEN_CPUID_HVM; - if (cs->kvm_state->xen_version >= XEN_VERSION(4, 5)) { - c->function = kvm_base + XEN_CPUID_HVM; - - if (cpu->xen_vapic) { - c->eax |= XEN_HVM_CPUID_APIC_ACCESS_VIRT; - c->eax |= XEN_HVM_CPUID_X2APIC_VIRT; - } - - c->eax |= XEN_HVM_CPUID_IOMMU_MAPPINGS; - - if (cs->kvm_state->xen_version >= XEN_VERSION(4, 6)) { - c->eax |= XEN_HVM_CPUID_VCPU_ID_PRESENT; - c->ebx = cs->cpu_index; - } - - if (cs->kvm_state->xen_version >= XEN_VERSION(4, 17)) { - c->eax |= XEN_HVM_CPUID_UPCALL_VECTOR; - } - } - - r = kvm_xen_init_vcpu(cs); - if (r) { - return r; - } - - kvm_base += 0x100; -#else /* CONFIG_XEN_EMU */ - /* This should never happen as kvm_arch_init() would have died first. */ - fprintf(stderr, "Cannot enable Xen CPUID without Xen support\n"); - abort(); -#endif - } else if (cpu->expose_kvm) { - memcpy(signature, "KVMKVMKVM\0\0\0", 12); - c = &cpuid_data.entries[cpuid_i++]; - c->function = KVM_CPUID_SIGNATURE | kvm_base; - c->eax = KVM_CPUID_FEATURES | kvm_base; - c->ebx = signature[0]; - c->ecx = signature[1]; - c->edx = signature[2]; - - c = &cpuid_data.entries[cpuid_i++]; - c->function = KVM_CPUID_FEATURES | kvm_base; - c->eax = env->features[FEAT_KVM]; - c->edx = env->features[FEAT_KVM_HINTS]; - } cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused); - if (cpu->kvm_pv_enforce_cpuid) { - r = kvm_vcpu_enable_cap(cs, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 0, 1); - if (r < 0) { - fprintf(stderr, - "failed to enable KVM_CAP_ENFORCE_PV_FEATURE_CPUID: %s", - strerror(-r)); - abort(); - } - } - for (i = 0; i <= limit; i++) { + j = 0; if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { - fprintf(stderr, "unsupported level value: 0x%x\n", limit); - abort(); + goto full; } - c = &cpuid_data.entries[cpuid_i++]; - + c = &entries[cpuid_i++]; switch (i) { case 2: { /* Keep reading function 2 till all the input is received */ @@ -1908,11 +1781,9 @@ int kvm_arch_init_vcpu(CPUState *cs) for (j = 1; j < times; ++j) { if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { - fprintf(stderr, "cpuid_data is full, no space for " - "cpuid(eax:2):eax & 0xf = 0x%x\n", times); - abort(); + goto full; } - c = &cpuid_data.entries[cpuid_i++]; + c = &entries[cpuid_i++]; c->function = i; c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC; cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); @@ -1951,11 +1822,9 @@ int kvm_arch_init_vcpu(CPUState *cs) continue; } if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { - fprintf(stderr, "cpuid_data is full, no space for " - "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); - abort(); + goto full; } - c = &cpuid_data.entries[cpuid_i++]; + c = &entries[cpuid_i++]; } break; case 0x12: @@ -1970,11 +1839,9 @@ int kvm_arch_init_vcpu(CPUState *cs) } if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { - fprintf(stderr, "cpuid_data is full, no space for " - "cpuid(eax:0x12,ecx:0x%x)\n", j); - abort(); + goto full; } - c = &cpuid_data.entries[cpuid_i++]; + c = &entries[cpuid_i++]; } break; case 0x7: @@ -1991,11 +1858,9 @@ int kvm_arch_init_vcpu(CPUState *cs) for (j = 1; j <= times; ++j) { if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { - fprintf(stderr, "cpuid_data is full, no space for " - "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); - abort(); + goto full; } - c = &cpuid_data.entries[cpuid_i++]; + c = &entries[cpuid_i++]; c->function = i; c->index = j; c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; @@ -2048,11 +1913,11 @@ int kvm_arch_init_vcpu(CPUState *cs) cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused); for (i = 0x80000000; i <= limit; i++) { + j = 0; if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { - fprintf(stderr, "unsupported xlevel value: 0x%x\n", limit); - abort(); + goto full; } - c = &cpuid_data.entries[cpuid_i++]; + c = &entries[cpuid_i++]; switch (i) { case 0x8000001d: @@ -2067,11 +1932,9 @@ int kvm_arch_init_vcpu(CPUState *cs) break; } if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { - fprintf(stderr, "cpuid_data is full, no space for " - "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); - abort(); + goto full; } - c = &cpuid_data.entries[cpuid_i++]; + c = &entries[cpuid_i++]; } break; default: @@ -2094,11 +1957,11 @@ int kvm_arch_init_vcpu(CPUState *cs) cpu_x86_cpuid(env, 0xC0000000, 0, &limit, &unused, &unused, &unused); for (i = 0xC0000000; i <= limit; i++) { + j = 0; if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { - fprintf(stderr, "unsupported xlevel2 value: 0x%x\n", limit); - abort(); + goto full; } - c = &cpuid_data.entries[cpuid_i++]; + c = &entries[cpuid_i++]; c->function = i; c->flags = 0; @@ -2106,6 +1969,194 @@ int kvm_arch_init_vcpu(CPUState *cs) } } + return cpuid_i; + +full: + fprintf(stderr, "cpuid_data is full, no space for " + "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); + abort(); +} + +int kvm_arch_init_vcpu(CPUState *cs) +{ + struct { + struct kvm_cpuid2 cpuid; + struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; + } cpuid_data; + /* + * The kernel defines these structs with padding fields so there + * should be no extra padding in our cpuid_data struct. + */ + QEMU_BUILD_BUG_ON(sizeof(cpuid_data) != + sizeof(struct kvm_cpuid2) + + sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES); + + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + uint32_t cpuid_i; + struct kvm_cpuid_entry2 *c; + uint32_t signature[3]; + int kvm_base = KVM_CPUID_SIGNATURE; + int max_nested_state_len; + int r; + Error *local_err = NULL; + + memset(&cpuid_data, 0, sizeof(cpuid_data)); + + cpuid_i = 0; + + has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2); + + r = kvm_arch_set_tsc_khz(cs); + if (r < 0) { + return r; + } + + /* vcpu's TSC frequency is either specified by user, or following + * the value used by KVM if the former is not present. In the + * latter case, we query it from KVM and record in env->tsc_khz, + * so that vcpu's TSC frequency can be migrated later via this field. + */ + if (!env->tsc_khz) { + r = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ? + kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) : + -ENOTSUP; + if (r > 0) { + env->tsc_khz = r; + } + } + + env->apic_bus_freq = KVM_APIC_BUS_FREQUENCY; + + /* + * kvm_hyperv_expand_features() is called here for the second time in case + * KVM_CAP_SYS_HYPERV_CPUID is not supported. While we can't possibly handle + * 'query-cpu-model-expansion' in this case as we don't have a KVM vCPU to + * check which Hyper-V enlightenments are supported and which are not, we + * can still proceed and check/expand Hyper-V enlightenments here so legacy + * behavior is preserved. + */ + if (!kvm_hyperv_expand_features(cpu, &local_err)) { + error_report_err(local_err); + return -ENOSYS; + } + + if (hyperv_enabled(cpu)) { + r = hyperv_init_vcpu(cpu); + if (r) { + return r; + } + + cpuid_i = hyperv_fill_cpuids(cs, cpuid_data.entries); + kvm_base = KVM_CPUID_SIGNATURE_NEXT; + has_msr_hv_hypercall = true; + } + + if (cs->kvm_state->xen_version) { +#ifdef CONFIG_XEN_EMU + struct kvm_cpuid_entry2 *xen_max_leaf; + + memcpy(signature, "XenVMMXenVMM", 12); + + xen_max_leaf = c = &cpuid_data.entries[cpuid_i++]; + c->function = kvm_base + XEN_CPUID_SIGNATURE; + c->eax = kvm_base + XEN_CPUID_TIME; + c->ebx = signature[0]; + c->ecx = signature[1]; + c->edx = signature[2]; + + c = &cpuid_data.entries[cpuid_i++]; + c->function = kvm_base + XEN_CPUID_VENDOR; + c->eax = cs->kvm_state->xen_version; + c->ebx = 0; + c->ecx = 0; + c->edx = 0; + + c = &cpuid_data.entries[cpuid_i++]; + c->function = kvm_base + XEN_CPUID_HVM_MSR; + /* Number of hypercall-transfer pages */ + c->eax = 1; + /* Hypercall MSR base address */ + if (hyperv_enabled(cpu)) { + c->ebx = XEN_HYPERCALL_MSR_HYPERV; + kvm_xen_init(cs->kvm_state, c->ebx); + } else { + c->ebx = XEN_HYPERCALL_MSR; + } + c->ecx = 0; + c->edx = 0; + + c = &cpuid_data.entries[cpuid_i++]; + c->function = kvm_base + XEN_CPUID_TIME; + c->eax = ((!!tsc_is_stable_and_known(env) << 1) | + (!!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP) << 2)); + /* default=0 (emulate if necessary) */ + c->ebx = 0; + /* guest tsc frequency */ + c->ecx = env->user_tsc_khz; + /* guest tsc incarnation (migration count) */ + c->edx = 0; + + c = &cpuid_data.entries[cpuid_i++]; + c->function = kvm_base + XEN_CPUID_HVM; + xen_max_leaf->eax = kvm_base + XEN_CPUID_HVM; + if (cs->kvm_state->xen_version >= XEN_VERSION(4, 5)) { + c->function = kvm_base + XEN_CPUID_HVM; + + if (cpu->xen_vapic) { + c->eax |= XEN_HVM_CPUID_APIC_ACCESS_VIRT; + c->eax |= XEN_HVM_CPUID_X2APIC_VIRT; + } + + c->eax |= XEN_HVM_CPUID_IOMMU_MAPPINGS; + + if (cs->kvm_state->xen_version >= XEN_VERSION(4, 6)) { + c->eax |= XEN_HVM_CPUID_VCPU_ID_PRESENT; + c->ebx = cs->cpu_index; + } + + if (cs->kvm_state->xen_version >= XEN_VERSION(4, 17)) { + c->eax |= XEN_HVM_CPUID_UPCALL_VECTOR; + } + } + + r = kvm_xen_init_vcpu(cs); + if (r) { + return r; + } + + kvm_base += 0x100; +#else /* CONFIG_XEN_EMU */ + /* This should never happen as kvm_arch_init() would have died first. */ + fprintf(stderr, "Cannot enable Xen CPUID without Xen support\n"); + abort(); +#endif + } else if (cpu->expose_kvm) { + memcpy(signature, "KVMKVMKVM\0\0\0", 12); + c = &cpuid_data.entries[cpuid_i++]; + c->function = KVM_CPUID_SIGNATURE | kvm_base; + c->eax = KVM_CPUID_FEATURES | kvm_base; + c->ebx = signature[0]; + c->ecx = signature[1]; + c->edx = signature[2]; + + c = &cpuid_data.entries[cpuid_i++]; + c->function = KVM_CPUID_FEATURES | kvm_base; + c->eax = env->features[FEAT_KVM]; + c->edx = env->features[FEAT_KVM_HINTS]; + } + + if (cpu->kvm_pv_enforce_cpuid) { + r = kvm_vcpu_enable_cap(cs, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 0, 1); + if (r < 0) { + fprintf(stderr, + "failed to enable KVM_CAP_ENFORCE_PV_FEATURE_CPUID: %s", + strerror(-r)); + abort(); + } + } + + cpuid_i = kvm_x86_build_cpuid(env, cpuid_data.entries, cpuid_i); cpuid_data.cpuid.nent = cpuid_i; if (((env->cpuid_version >> 8)&0xF) >= 6 @@ -2538,10 +2589,12 @@ int kvm_arch_init(MachineState *ms, KVMState *s) * mechanisms are supported in future (e.g. TDX), they'll need * their own initialization either here or elsewhere. */ - ret = sev_kvm_init(ms->cgs, &local_err); - if (ret < 0) { - error_report_err(local_err); - return ret; + if (ms->cgs) { + ret = confidential_guest_kvm_init(ms->cgs, &local_err); + if (ret < 0) { + error_report_err(local_err); + return ret; + } } has_xcrs = kvm_check_extension(s, KVM_CAP_XCRS); @@ -5612,11 +5665,6 @@ bool kvm_has_waitpkg(void) return has_msr_umwait; } -bool kvm_arch_cpu_check_are_resettable(void) -{ - return !sev_es_enabled(); -} - #define ARCH_REQ_XCOMP_GUEST_PERM 0x1025 void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask) diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h index 30fedcffea..6b44844d95 100644 --- a/target/i386/kvm/kvm_i386.h +++ b/target/i386/kvm/kvm_i386.h @@ -37,6 +37,7 @@ bool kvm_hv_vpindex_settable(void); bool kvm_enable_sgx_provisioning(KVMState *s); bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp); +int kvm_get_vm_type(MachineState *ms); void kvm_arch_reset_vcpu(X86CPU *cs); void kvm_arch_after_reset_vcpu(X86CPU *cpu); void kvm_arch_do_init_vcpu(X86CPU *cs); @@ -49,6 +50,7 @@ void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask); #ifdef CONFIG_KVM +bool kvm_is_vm_type_supported(int type); bool kvm_has_adjust_clock_stable(void); bool kvm_has_exception_payload(void); void kvm_synchronize_all_tsc(void); diff --git a/target/i386/kvm/meson.build b/target/i386/kvm/meson.build index 84d9143e60..e7850981e6 100644 --- a/target/i386/kvm/meson.build +++ b/target/i386/kvm/meson.build @@ -7,8 +7,6 @@ i386_kvm_ss.add(files( i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c')) -i386_kvm_ss.add(when: 'CONFIG_SEV', if_false: files('sev-stub.c')) - i386_system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c')) i386_system_ss.add_all(when: 'CONFIG_KVM', if_true: i386_kvm_ss) diff --git a/target/i386/kvm/sev-stub.c b/target/i386/kvm/sev-stub.c deleted file mode 100644 index 1be5341e8a..0000000000 --- a/target/i386/kvm/sev-stub.c +++ /dev/null @@ -1,21 +0,0 @@ -/* - * QEMU SEV stub - * - * Copyright Advanced Micro Devices 2018 - * - * Authors: - * Brijesh Singh <brijesh.singh@amd.com> - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#include "qemu/osdep.h" -#include "sev.h" - -int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) -{ - /* If we get here, cgs must be some non-SEV thing */ - return 0; -} diff --git a/target/i386/meson.build b/target/i386/meson.build index 7c74bfa859..8abce725f8 100644 --- a/target/i386/meson.build +++ b/target/i386/meson.build @@ -6,7 +6,7 @@ i386_ss.add(files( 'xsave_helper.c', 'cpu-dump.c', )) -i386_ss.add(when: 'CONFIG_SEV', if_true: files('host-cpu.c')) +i386_ss.add(when: 'CONFIG_SEV', if_true: files('host-cpu.c', 'confidential-guest.c')) # x86 cpu type i386_ss.add(when: 'CONFIG_KVM', if_true: files('host-cpu.c')) diff --git a/target/i386/sev.c b/target/i386/sev.c index 72930ff0dc..d30b68c11e 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -26,6 +26,7 @@ #include "qemu/error-report.h" #include "crypto/hash.h" #include "sysemu/kvm.h" +#include "kvm/kvm_i386.h" #include "sev.h" #include "sysemu/sysemu.h" #include "sysemu/runstate.h" @@ -35,7 +36,7 @@ #include "monitor/monitor.h" #include "monitor/hmp-target.h" #include "qapi/qapi-commands-misc-target.h" -#include "exec/confidential-guest-support.h" +#include "confidential-guest.h" #include "hw/i386/pc.h" #include "exec/address-spaces.h" @@ -54,7 +55,9 @@ OBJECT_DECLARE_SIMPLE_TYPE(SevGuestState, SEV_GUEST) * -machine ...,memory-encryption=sev0 */ struct SevGuestState { - ConfidentialGuestSupport parent_obj; + X86ConfidentialGuest parent_obj; + + int kvm_type; /* configuration parameters */ char *sev_device; @@ -64,6 +67,7 @@ struct SevGuestState { uint32_t cbitpos; uint32_t reduced_phys_bits; bool kernel_hashes; + bool legacy_vm_type; /* runtime state */ uint32_t handle; @@ -353,63 +357,16 @@ static void sev_guest_set_kernel_hashes(Object *obj, bool value, Error **errp) sev->kernel_hashes = value; } -static void -sev_guest_class_init(ObjectClass *oc, void *data) +static bool sev_guest_get_legacy_vm_type(Object *obj, Error **errp) { - object_class_property_add_str(oc, "sev-device", - sev_guest_get_sev_device, - sev_guest_set_sev_device); - object_class_property_set_description(oc, "sev-device", - "SEV device to use"); - object_class_property_add_str(oc, "dh-cert-file", - sev_guest_get_dh_cert_file, - sev_guest_set_dh_cert_file); - object_class_property_set_description(oc, "dh-cert-file", - "guest owners DH certificate (encoded with base64)"); - object_class_property_add_str(oc, "session-file", - sev_guest_get_session_file, - sev_guest_set_session_file); - object_class_property_set_description(oc, "session-file", - "guest owners session parameters (encoded with base64)"); - object_class_property_add_bool(oc, "kernel-hashes", - sev_guest_get_kernel_hashes, - sev_guest_set_kernel_hashes); - object_class_property_set_description(oc, "kernel-hashes", - "add kernel hashes to guest firmware for measured Linux boot"); + return SEV_GUEST(obj)->legacy_vm_type; } -static void -sev_guest_instance_init(Object *obj) +static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp) { - SevGuestState *sev = SEV_GUEST(obj); - - sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE); - sev->policy = DEFAULT_GUEST_POLICY; - object_property_add_uint32_ptr(obj, "policy", &sev->policy, - OBJ_PROP_FLAG_READWRITE); - object_property_add_uint32_ptr(obj, "handle", &sev->handle, - OBJ_PROP_FLAG_READWRITE); - object_property_add_uint32_ptr(obj, "cbitpos", &sev->cbitpos, - OBJ_PROP_FLAG_READWRITE); - object_property_add_uint32_ptr(obj, "reduced-phys-bits", - &sev->reduced_phys_bits, - OBJ_PROP_FLAG_READWRITE); + SEV_GUEST(obj)->legacy_vm_type = value; } -/* sev guest info */ -static const TypeInfo sev_guest_info = { - .parent = TYPE_CONFIDENTIAL_GUEST_SUPPORT, - .name = TYPE_SEV_GUEST, - .instance_size = sizeof(SevGuestState), - .instance_finalize = sev_guest_finalize, - .class_init = sev_guest_class_init, - .instance_init = sev_guest_instance_init, - .interfaces = (InterfaceInfo[]) { - { TYPE_USER_CREATABLE }, - { } - } -}; - bool sev_enabled(void) { @@ -812,6 +769,7 @@ sev_launch_get_measure(Notifier *notifier, void *unused) if (ret) { exit(1); } + kvm_mark_guest_state_protected(); } /* query the measurement blob length */ @@ -906,20 +864,35 @@ sev_vm_state_change(void *opaque, bool running, RunState state) } } -int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +static int sev_kvm_type(X86ConfidentialGuest *cg) +{ + SevGuestState *sev = SEV_GUEST(cg); + int kvm_type; + + if (sev->kvm_type != -1) { + goto out; + } + + kvm_type = (sev->policy & SEV_POLICY_ES) ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; + if (kvm_is_vm_type_supported(kvm_type) && !sev->legacy_vm_type) { + sev->kvm_type = kvm_type; + } else { + sev->kvm_type = KVM_X86_DEFAULT_VM; + } + +out: + return sev->kvm_type; +} + +static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { - SevGuestState *sev - = (SevGuestState *)object_dynamic_cast(OBJECT(cgs), TYPE_SEV_GUEST); + SevGuestState *sev = SEV_GUEST(cgs); char *devname; int ret, fw_error, cmd; uint32_t ebx; uint32_t host_cbitpos; struct sev_user_data_status status = {}; - if (!sev) { - return 0; - } - ret = ram_block_discard_disable(true); if (ret) { error_report("%s: cannot disable RAM discard", __func__); @@ -990,13 +963,19 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) __func__); goto err; } - cmd = KVM_SEV_ES_INIT; - } else { - cmd = KVM_SEV_INIT; } trace_kvm_sev_init(); - ret = sev_ioctl(sev->sev_fd, cmd, NULL, &fw_error); + if (sev_kvm_type(X86_CONFIDENTIAL_GUEST(sev)) == KVM_X86_DEFAULT_VM) { + cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT; + + ret = sev_ioctl(sev->sev_fd, cmd, NULL, &fw_error); + } else { + struct kvm_sev_init args = { 0 }; + + ret = sev_ioctl(sev->sev_fd, KVM_SEV_INIT2, &args, &fw_error); + } + if (ret) { error_setg(errp, "%s: failed to initialize ret=%d fw_error=%d '%s'", __func__, ret, fw_error, fw_error_to_str(fw_error)); @@ -1385,6 +1364,77 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) } static void +sev_guest_class_init(ObjectClass *oc, void *data) +{ + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + + klass->kvm_init = sev_kvm_init; + x86_klass->kvm_type = sev_kvm_type; + + object_class_property_add_str(oc, "sev-device", + sev_guest_get_sev_device, + sev_guest_set_sev_device); + object_class_property_set_description(oc, "sev-device", + "SEV device to use"); + object_class_property_add_str(oc, "dh-cert-file", + sev_guest_get_dh_cert_file, + sev_guest_set_dh_cert_file); + object_class_property_set_description(oc, "dh-cert-file", + "guest owners DH certificate (encoded with base64)"); + object_class_property_add_str(oc, "session-file", + sev_guest_get_session_file, + sev_guest_set_session_file); + object_class_property_set_description(oc, "session-file", + "guest owners session parameters (encoded with base64)"); + object_class_property_add_bool(oc, "kernel-hashes", + sev_guest_get_kernel_hashes, + sev_guest_set_kernel_hashes); + object_class_property_set_description(oc, "kernel-hashes", + "add kernel hashes to guest firmware for measured Linux boot"); + object_class_property_add_bool(oc, "legacy-vm-type", + sev_guest_get_legacy_vm_type, + sev_guest_set_legacy_vm_type); + object_class_property_set_description(oc, "legacy-vm-type", + "use legacy VM type to maintain measurement compatibility with older QEMU or kernel versions."); +} + +static void +sev_guest_instance_init(Object *obj) +{ + SevGuestState *sev = SEV_GUEST(obj); + + sev->kvm_type = -1; + + sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE); + sev->policy = DEFAULT_GUEST_POLICY; + object_property_add_uint32_ptr(obj, "policy", &sev->policy, + OBJ_PROP_FLAG_READWRITE); + object_property_add_uint32_ptr(obj, "handle", &sev->handle, + OBJ_PROP_FLAG_READWRITE); + object_property_add_uint32_ptr(obj, "cbitpos", &sev->cbitpos, + OBJ_PROP_FLAG_READWRITE); + object_property_add_uint32_ptr(obj, "reduced-phys-bits", + &sev->reduced_phys_bits, + OBJ_PROP_FLAG_READWRITE); + object_apply_compat_props(obj); +} + +/* sev guest info */ +static const TypeInfo sev_guest_info = { + .parent = TYPE_X86_CONFIDENTIAL_GUEST, + .name = TYPE_SEV_GUEST, + .instance_size = sizeof(SevGuestState), + .instance_finalize = sev_guest_finalize, + .class_init = sev_guest_class_init, + .instance_init = sev_guest_instance_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + } +}; + +static void sev_register_types(void) { type_register_static(&sev_guest_info); diff --git a/target/i386/sev.h b/target/i386/sev.h index e7499c95b1..9e10d09539 100644 --- a/target/i386/sev.h +++ b/target/i386/sev.h @@ -57,6 +57,4 @@ int sev_inject_launch_secret(const char *hdr, const char *secret, int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size); void sev_es_set_reset_vector(CPUState *cpu); -int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); - #endif diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 76a42c679c..c05d9e5225 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -5846,9 +5846,10 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) gen_op_st_v(s, MO_16, s->T0, s->A0); gen_add_A0_im(s, 2); tcg_gen_ld_tl(s->T0, tcg_env, offsetof(CPUX86State, gdt.base)); - if (dflag == MO_16) { - tcg_gen_andi_tl(s->T0, s->T0, 0xffffff); - } + /* + * NB: Despite a confusing description in Intel CPU documentation, + * all 32-bits are written regardless of operand size. + */ gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0); break; @@ -5901,9 +5902,10 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) gen_op_st_v(s, MO_16, s->T0, s->A0); gen_add_A0_im(s, 2); tcg_gen_ld_tl(s->T0, tcg_env, offsetof(CPUX86State, idt.base)); - if (dflag == MO_16) { - tcg_gen_andi_tl(s->T0, s->T0, 0xffffff); - } + /* + * NB: Despite a confusing description in Intel CPU documentation, + * all 32-bits are written regardless of operand size. + */ gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0); break; diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c index d630cc39cb..8224d94333 100644 --- a/target/loongarch/kvm/kvm.c +++ b/target/loongarch/kvm/kvm.c @@ -733,11 +733,6 @@ bool kvm_arch_stop_on_emulation_error(CPUState *cs) return true; } -bool kvm_arch_cpu_check_are_resettable(void) -{ - return true; -} - int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) { int ret = 0; diff --git a/target/mips/kvm.c b/target/mips/kvm.c index 6c52e59f55..a631ab544f 100644 --- a/target/mips/kvm.c +++ b/target/mips/kvm.c @@ -1273,11 +1273,6 @@ int kvm_arch_get_default_type(MachineState *machine) return -1; } -bool kvm_arch_cpu_check_are_resettable(void) -{ - return true; -} - void kvm_arch_accel_class_init(ObjectClass *oc) { } diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 8231feb2d4..63930d4a77 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -2956,11 +2956,6 @@ void kvmppc_set_reg_tb_offset(PowerPCCPU *cpu, int64_t tb_offset) } } -bool kvm_arch_cpu_check_are_resettable(void) -{ - return true; -} - void kvm_arch_accel_class_init(ObjectClass *oc) { } diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index 6a6c6cae80..49d2f3ad58 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -1475,11 +1475,6 @@ void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level) } } -bool kvm_arch_cpu_check_are_resettable(void) -{ - return true; -} - static int aia_mode; static const char *kvm_aia_mode_str(uint64_t mode) diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c index 4ce809c5d4..4dcd757cdc 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c @@ -2622,11 +2622,6 @@ void kvm_s390_stop_interrupt(S390CPU *cpu) kvm_s390_vcpu_interrupt(cpu, &irq); } -bool kvm_arch_cpu_check_are_resettable(void) -{ - return true; -} - int kvm_s390_get_zpci_op(void) { return cap_zpci_op; diff --git a/target/s390x/kvm/pv.c b/target/s390x/kvm/pv.c index 7ca7faec73..dde836d21a 100644 --- a/target/s390x/kvm/pv.c +++ b/target/s390x/kvm/pv.c @@ -334,12 +334,17 @@ static bool s390_pv_guest_check(ConfidentialGuestSupport *cgs, Error **errp) return s390_pv_check_cpus(errp); } -int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +static int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { if (!object_dynamic_cast(OBJECT(cgs), TYPE_S390_PV_GUEST)) { return 0; } + if (!kvm_enabled()) { + error_setg(errp, "Protected Virtualization requires KVM"); + return -1; + } + if (!s390_has_feat(S390_FEAT_UNPACK)) { error_setg(errp, "CPU model does not support Protected Virtualization"); @@ -364,6 +369,9 @@ OBJECT_DEFINE_TYPE_WITH_INTERFACES(S390PVGuest, static void s390_pv_guest_class_init(ObjectClass *oc, void *data) { + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); + + klass->kvm_init = s390_pv_kvm_init; } static void s390_pv_guest_init(Object *obj) diff --git a/target/s390x/kvm/pv.h b/target/s390x/kvm/pv.h index 5877d28ff1..4b40817439 100644 --- a/target/s390x/kvm/pv.h +++ b/target/s390x/kvm/pv.h @@ -80,18 +80,4 @@ static inline int kvm_s390_dump_mem_state(uint64_t addr, size_t len, static inline int kvm_s390_dump_completion_data(void *buff) { return 0; } #endif /* CONFIG_KVM */ -int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); -static inline int s390_pv_init(ConfidentialGuestSupport *cgs, Error **errp) -{ - if (!cgs) { - return 0; - } - if (kvm_enabled()) { - return s390_pv_kvm_init(cgs, errp); - } - - error_setg(errp, "Protected Virtualization requires KVM"); - return -1; -} - #endif /* HW_S390_PV_H */ |