diff options
Diffstat (limited to 'accel/kvm/kvm-all.c')
| -rw-r--r-- | accel/kvm/kvm-all.c | 274 |
1 files changed, 242 insertions, 32 deletions
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 931f74256e..d7281b93f3 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -91,6 +91,8 @@ bool kvm_msi_use_devid; static bool kvm_has_guest_debug; static int kvm_sstep_flags; static bool kvm_immediate_exit; +static uint64_t kvm_supported_memory_attributes; +static bool kvm_guest_memfd_supported; static hwaddr kvm_max_slot_size = ~0; static const KVMCapabilityInfo kvm_required_capabilites[] = { @@ -282,34 +284,58 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram, static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot, bool new) { KVMState *s = kvm_state; - struct kvm_userspace_memory_region mem; + struct kvm_userspace_memory_region2 mem; int ret; mem.slot = slot->slot | (kml->as_id << 16); mem.guest_phys_addr = slot->start_addr; mem.userspace_addr = (unsigned long)slot->ram; mem.flags = slot->flags; + mem.guest_memfd = slot->guest_memfd; + mem.guest_memfd_offset = slot->guest_memfd_offset; if (slot->memory_size && !new && (mem.flags ^ slot->old_flags) & KVM_MEM_READONLY) { /* Set the slot size to 0 before setting the slot to the desired * value. This is needed based on KVM commit 75d61fbc. */ mem.memory_size = 0; - ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); + + if (kvm_guest_memfd_supported) { + ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION2, &mem); + } else { + ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); + } if (ret < 0) { goto err; } } mem.memory_size = slot->memory_size; - ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); + if (kvm_guest_memfd_supported) { + ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION2, &mem); + } else { + ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); + } slot->old_flags = mem.flags; err: - trace_kvm_set_user_memory(mem.slot, mem.flags, mem.guest_phys_addr, - mem.memory_size, mem.userspace_addr, ret); + trace_kvm_set_user_memory(mem.slot >> 16, (uint16_t)mem.slot, mem.flags, + mem.guest_phys_addr, mem.memory_size, + mem.userspace_addr, mem.guest_memfd, + mem.guest_memfd_offset, ret); if (ret < 0) { - error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d," - " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s", - __func__, mem.slot, slot->start_addr, - (uint64_t)mem.memory_size, strerror(errno)); + if (kvm_guest_memfd_supported) { + error_report("%s: KVM_SET_USER_MEMORY_REGION2 failed, slot=%d," + " start=0x%" PRIx64 ", size=0x%" PRIx64 "," + " flags=0x%" PRIx32 ", guest_memfd=%" PRId32 "," + " guest_memfd_offset=0x%" PRIx64 ": %s", + __func__, mem.slot, slot->start_addr, + (uint64_t)mem.memory_size, mem.flags, + mem.guest_memfd, (uint64_t)mem.guest_memfd_offset, + strerror(errno)); + } else { + error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d," + " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s", + __func__, mem.slot, slot->start_addr, + (uint64_t)mem.memory_size, strerror(errno)); + } } return ret; } @@ -464,6 +490,10 @@ static int kvm_mem_flags(MemoryRegion *mr) if (readonly && kvm_readonly_mem_allowed) { flags |= KVM_MEM_READONLY; } + if (memory_region_has_guest_memfd(mr)) { + assert(kvm_guest_memfd_supported); + flags |= KVM_MEM_GUEST_MEMFD; + } return flags; } @@ -1265,6 +1295,36 @@ void kvm_set_max_memslot_size(hwaddr max_slot_size) kvm_max_slot_size = max_slot_size; } +static int kvm_set_memory_attributes(hwaddr start, uint64_t size, uint64_t attr) +{ + struct kvm_memory_attributes attrs; + int r; + + assert((attr & kvm_supported_memory_attributes) == attr); + attrs.attributes = attr; + attrs.address = start; + attrs.size = size; + attrs.flags = 0; + + r = kvm_vm_ioctl(kvm_state, KVM_SET_MEMORY_ATTRIBUTES, &attrs); + if (r) { + error_report("failed to set memory (0x%" HWADDR_PRIx "+0x%" PRIx64 ") " + "with attr 0x%" PRIx64 " error '%s'", + start, size, attr, strerror(errno)); + } + return r; +} + +int kvm_set_memory_attributes_private(hwaddr start, uint64_t size) +{ + return kvm_set_memory_attributes(start, size, KVM_MEMORY_ATTRIBUTE_PRIVATE); +} + +int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size) +{ + return kvm_set_memory_attributes(start, size, 0); +} + /* Called with KVMMemoryListener.slots_lock held */ static void kvm_set_phys_mem(KVMMemoryListener *kml, MemoryRegionSection *section, bool add) @@ -1361,6 +1421,9 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, mem->ram_start_offset = ram_start_offset; mem->ram = ram; mem->flags = kvm_mem_flags(mr); + mem->guest_memfd = mr->ram_block->guest_memfd; + mem->guest_memfd_offset = (uint8_t*)ram - mr->ram_block->host; + kvm_slot_init_dirty_bitmap(mem); err = kvm_set_user_memory_region(kml, mem, true); if (err) { @@ -1368,6 +1431,16 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, strerror(-err)); abort(); } + + if (memory_region_has_guest_memfd(mr)) { + err = kvm_set_memory_attributes_private(start_addr, slot_size); + if (err) { + error_report("%s: failed to set memory attribute private: %s", + __func__, strerror(-err)); + exit(1); + } + } + start_addr += slot_size; ram_start_offset += slot_size; ram += slot_size; @@ -2360,7 +2433,7 @@ static int kvm_init(MachineState *ms) s->sigmask_len = 8; accel_blocker_init(); -#ifdef KVM_CAP_SET_GUEST_DEBUG +#ifdef TARGET_KVM_HAVE_GUEST_DEBUG QTAILQ_INIT(&s->kvm_sw_breakpoints); #endif QLIST_INIT(&s->kvm_parked_vcpus); @@ -2386,6 +2459,12 @@ static int kvm_init(MachineState *ms) goto err; } + kvm_supported_memory_attributes = kvm_check_extension(s, KVM_CAP_MEMORY_ATTRIBUTES); + kvm_guest_memfd_supported = + kvm_check_extension(s, KVM_CAP_GUEST_MEMFD) && + kvm_check_extension(s, KVM_CAP_USER_MEMORY2) && + (kvm_supported_memory_attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE); + kvm_immediate_exit = kvm_check_extension(s, KVM_CAP_IMMEDIATE_EXIT); s->nr_slots = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS); @@ -2544,7 +2623,7 @@ static int kvm_init(MachineState *ms) kvm_vm_attributes_allowed = (kvm_check_extension(s, KVM_CAP_VM_ATTRIBUTES) > 0); -#ifdef KVM_CAP_SET_GUEST_DEBUG +#ifdef TARGET_KVM_HAVE_GUEST_DEBUG kvm_has_guest_debug = (kvm_check_extension(s, KVM_CAP_SET_GUEST_DEBUG) > 0); #endif @@ -2553,7 +2632,7 @@ static int kvm_init(MachineState *ms) if (kvm_has_guest_debug) { kvm_sstep_flags = SSTEP_ENABLE; -#if defined KVM_CAP_SET_GUEST_DEBUG2 +#if defined TARGET_KVM_HAVE_GUEST_DEBUG int guest_debug_flags = kvm_check_extension(s, KVM_CAP_SET_GUEST_DEBUG2); @@ -2696,14 +2775,9 @@ void kvm_flush_coalesced_mmio_buffer(void) s->coalesced_flush_in_progress = false; } -bool kvm_cpu_check_are_resettable(void) -{ - return kvm_arch_cpu_check_are_resettable(); -} - static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) { - if (!cpu->vcpu_dirty) { + if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) { int ret = kvm_arch_get_registers(cpu); if (ret) { error_report("Failed to get registers: %s", strerror(-ret)); @@ -2717,7 +2791,7 @@ static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) void kvm_cpu_synchronize_state(CPUState *cpu) { - if (!cpu->vcpu_dirty) { + if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) { run_on_cpu(cpu, do_kvm_cpu_synchronize_state, RUN_ON_CPU_NULL); } } @@ -2752,7 +2826,13 @@ static void do_kvm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) void kvm_cpu_synchronize_post_init(CPUState *cpu) { - run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL); + if (!kvm_state->guest_state_protected) { + /* + * This runs before the machine_init_done notifiers, and is the last + * opportunity to synchronize the state of confidential guests. + */ + run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL); + } } static void do_kvm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg) @@ -2820,6 +2900,94 @@ static void kvm_eat_signals(CPUState *cpu) } while (sigismember(&chkset, SIG_IPI)); } +int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private) +{ + MemoryRegionSection section; + ram_addr_t offset; + MemoryRegion *mr; + RAMBlock *rb; + void *addr; + int ret = -1; + + trace_kvm_convert_memory(start, size, to_private ? "shared_to_private" : "private_to_shared"); + + if (!QEMU_PTR_IS_ALIGNED(start, qemu_real_host_page_size()) || + !QEMU_PTR_IS_ALIGNED(size, qemu_real_host_page_size())) { + return -1; + } + + if (!size) { + return -1; + } + + section = memory_region_find(get_system_memory(), start, size); + mr = section.mr; + if (!mr) { + /* + * Ignore converting non-assigned region to shared. + * + * TDX requires vMMIO region to be shared to inject #VE to guest. + * OVMF issues conservatively MapGPA(shared) on 32bit PCI MMIO region, + * and vIO-APIC 0xFEC00000 4K page. + * OVMF assigns 32bit PCI MMIO region to + * [top of low memory: typically 2GB=0xC000000, 0xFC00000) + */ + if (!to_private) { + return 0; + } + return -1; + } + + if (!memory_region_has_guest_memfd(mr)) { + /* + * Because vMMIO region must be shared, guest TD may convert vMMIO + * region to shared explicitly. Don't complain such case. See + * memory_region_type() for checking if the region is MMIO region. + */ + if (!to_private && + !memory_region_is_ram(mr) && + !memory_region_is_ram_device(mr) && + !memory_region_is_rom(mr) && + !memory_region_is_romd(mr)) { + ret = 0; + } else { + error_report("Convert non guest_memfd backed memory region " + "(0x%"HWADDR_PRIx" ,+ 0x%"HWADDR_PRIx") to %s", + start, size, to_private ? "private" : "shared"); + } + goto out_unref; + } + + if (to_private) { + ret = kvm_set_memory_attributes_private(start, size); + } else { + ret = kvm_set_memory_attributes_shared(start, size); + } + if (ret) { + goto out_unref; + } + + addr = memory_region_get_ram_ptr(mr) + section.offset_within_region; + rb = qemu_ram_block_from_host(addr, false, &offset); + + if (to_private) { + if (rb->page_size != qemu_real_host_page_size()) { + /* + * shared memory is backed by hugetlb, which is supposed to be + * pre-allocated and doesn't need to be discarded + */ + goto out_unref; + } + ret = ram_block_discard_range(rb, offset, size); + } else { + ret = ram_block_discard_guest_memfd_range(rb, offset, size); + } + +out_unref: + memory_region_unref(mr); + return ret; +} + int kvm_cpu_exec(CPUState *cpu) { struct kvm_run *run = cpu->kvm_run; @@ -2887,18 +3055,20 @@ int kvm_cpu_exec(CPUState *cpu) ret = EXCP_INTERRUPT; break; } - fprintf(stderr, "error: kvm run failed %s\n", - strerror(-run_ret)); + if (!(run_ret == -EFAULT && run->exit_reason == KVM_EXIT_MEMORY_FAULT)) { + fprintf(stderr, "error: kvm run failed %s\n", + strerror(-run_ret)); #ifdef TARGET_PPC - if (run_ret == -EBUSY) { - fprintf(stderr, - "This is probably because your SMT is enabled.\n" - "VCPU can only run on primary threads with all " - "secondary threads offline.\n"); - } + if (run_ret == -EBUSY) { + fprintf(stderr, + "This is probably because your SMT is enabled.\n" + "VCPU can only run on primary threads with all " + "secondary threads offline.\n"); + } #endif - ret = -1; - break; + ret = -1; + break; + } } trace_kvm_run_exit(cpu->cpu_index, run->exit_reason); @@ -2981,6 +3151,19 @@ int kvm_cpu_exec(CPUState *cpu) break; } break; + case KVM_EXIT_MEMORY_FAULT: + trace_kvm_memory_fault(run->memory_fault.gpa, + run->memory_fault.size, + run->memory_fault.flags); + if (run->memory_fault.flags & ~KVM_MEMORY_EXIT_FLAG_PRIVATE) { + error_report("KVM_EXIT_MEMORY_FAULT: Unknown flag 0x%" PRIx64, + (uint64_t)run->memory_fault.flags); + ret = -1; + break; + } + ret = kvm_convert_memory(run->memory_fault.gpa, run->memory_fault.size, + run->memory_fault.flags & KVM_MEMORY_EXIT_FLAG_PRIVATE); + break; default: ret = kvm_arch_handle_exit(cpu, run); break; @@ -3157,7 +3340,7 @@ bool kvm_arm_supports_user_irq(void) return kvm_check_extension(kvm_state, KVM_CAP_ARM_USER_IRQ); } -#ifdef KVM_CAP_SET_GUEST_DEBUG +#ifdef TARGET_KVM_HAVE_GUEST_DEBUG struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *cpu, vaddr pc) { struct kvm_sw_breakpoint *bp; @@ -3317,7 +3500,7 @@ void kvm_remove_all_breakpoints(CPUState *cpu) } } -#endif /* !KVM_CAP_SET_GUEST_DEBUG */ +#endif /* !TARGET_KVM_HAVE_GUEST_DEBUG */ static int kvm_set_signal_mask(CPUState *cpu, const sigset_t *sigset) { @@ -4099,3 +4282,30 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp) query_stats_schema_vcpu(first_cpu, &stats_args); } } + +void kvm_mark_guest_state_protected(void) +{ + kvm_state->guest_state_protected = true; +} + +int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp) +{ + int fd; + struct kvm_create_guest_memfd guest_memfd = { + .size = size, + .flags = flags, + }; + + if (!kvm_guest_memfd_supported) { + error_setg(errp, "KVM does not support guest_memfd"); + return -1; + } + + fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_GUEST_MEMFD, &guest_memfd); + if (fd < 0) { + error_setg_errno(errp, errno, "Error creating KVM guest_memfd"); + return -1; + } + + return fd; +} |