diff options
Diffstat (limited to 'include/hw')
| -rw-r--r-- | include/hw/hyperv/dynmem-proto.h | 423 | ||||
| -rw-r--r-- | include/hw/hyperv/hv-balloon.h | 18 | ||||
| -rw-r--r-- | include/hw/mem/memory-device.h | 7 | ||||
| -rw-r--r-- | include/hw/pci/pci.h | 36 | ||||
| -rw-r--r-- | include/hw/pci/pci_bus.h | 2 | ||||
| -rw-r--r-- | include/hw/vfio/vfio-common.h | 16 | ||||
| -rw-r--r-- | include/hw/vfio/vfio.h | 7 | ||||
| -rw-r--r-- | include/hw/virtio/virtio-iommu.h | 7 |
8 files changed, 491 insertions, 25 deletions
diff --git a/include/hw/hyperv/dynmem-proto.h b/include/hw/hyperv/dynmem-proto.h new file mode 100644 index 0000000000..d0f9090ac4 --- /dev/null +++ b/include/hw/hyperv/dynmem-proto.h @@ -0,0 +1,423 @@ +#ifndef HW_HYPERV_DYNMEM_PROTO_H +#define HW_HYPERV_DYNMEM_PROTO_H + +/* + * Hyper-V Dynamic Memory Protocol definitions + * + * Copyright (C) 2020-2023 Oracle and/or its affiliates. + * + * Based on drivers/hv/hv_balloon.c from Linux kernel: + * Copyright (c) 2012, Microsoft Corporation. + * + * Author: K. Y. Srinivasan <kys@microsoft.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. + * See the COPYING file in the top-level directory. + */ + +/* + * Protocol versions. The low word is the minor version, the high word the major + * version. + * + * History: + * Initial version 1.0 + * Changed to 0.1 on 2009/03/25 + * Changes to 0.2 on 2009/05/14 + * Changes to 0.3 on 2009/12/03 + * Changed to 1.0 on 2011/04/05 + * Changed to 2.0 on 2019/12/10 + */ + +#define DYNMEM_MAKE_VERSION(Major, Minor) ((uint32_t)(((Major) << 16) | (Minor))) +#define DYNMEM_MAJOR_VERSION(Version) ((uint32_t)(Version) >> 16) +#define DYNMEM_MINOR_VERSION(Version) ((uint32_t)(Version) & 0xff) + +enum { + DYNMEM_PROTOCOL_VERSION_1 = DYNMEM_MAKE_VERSION(0, 3), + DYNMEM_PROTOCOL_VERSION_2 = DYNMEM_MAKE_VERSION(1, 0), + DYNMEM_PROTOCOL_VERSION_3 = DYNMEM_MAKE_VERSION(2, 0), + + DYNMEM_PROTOCOL_VERSION_WIN7 = DYNMEM_PROTOCOL_VERSION_1, + DYNMEM_PROTOCOL_VERSION_WIN8 = DYNMEM_PROTOCOL_VERSION_2, + DYNMEM_PROTOCOL_VERSION_WIN10 = DYNMEM_PROTOCOL_VERSION_3, + + DYNMEM_PROTOCOL_VERSION_CURRENT = DYNMEM_PROTOCOL_VERSION_WIN10 +}; + + + +/* + * Message Types + */ + +enum dm_message_type { + /* + * Version 0.3 + */ + DM_ERROR = 0, + DM_VERSION_REQUEST = 1, + DM_VERSION_RESPONSE = 2, + DM_CAPABILITIES_REPORT = 3, + DM_CAPABILITIES_RESPONSE = 4, + DM_STATUS_REPORT = 5, + DM_BALLOON_REQUEST = 6, + DM_BALLOON_RESPONSE = 7, + DM_UNBALLOON_REQUEST = 8, + DM_UNBALLOON_RESPONSE = 9, + DM_MEM_HOT_ADD_REQUEST = 10, + DM_MEM_HOT_ADD_RESPONSE = 11, + DM_VERSION_03_MAX = 11, + /* + * Version 1.0. + */ + DM_INFO_MESSAGE = 12, + DM_VERSION_1_MAX = 12, + + /* + * Version 2.0 + */ + DM_MEM_HOT_REMOVE_REQUEST = 13, + DM_MEM_HOT_REMOVE_RESPONSE = 14 +}; + + +/* + * Structures defining the dynamic memory management + * protocol. + */ + +union dm_version { + struct { + uint16_t minor_version; + uint16_t major_version; + }; + uint32_t version; +} QEMU_PACKED; + + +union dm_caps { + struct { + uint64_t balloon:1; + uint64_t hot_add:1; + /* + * To support guests that may have alignment + * limitations on hot-add, the guest can specify + * its alignment requirements; a value of n + * represents an alignment of 2^n in mega bytes. + */ + uint64_t hot_add_alignment:4; + uint64_t hot_remove:1; + uint64_t reservedz:57; + } cap_bits; + uint64_t caps; +} QEMU_PACKED; + +union dm_mem_page_range { + struct { + /* + * The PFN number of the first page in the range. + * 40 bits is the architectural limit of a PFN + * number for AMD64. + */ + uint64_t start_page:40; + /* + * The number of pages in the range. + */ + uint64_t page_cnt:24; + } finfo; + uint64_t page_range; +} QEMU_PACKED; + + + +/* + * The header for all dynamic memory messages: + * + * type: Type of the message. + * size: Size of the message in bytes; including the header. + * trans_id: The guest is responsible for manufacturing this ID. + */ + +struct dm_header { + uint16_t type; + uint16_t size; + uint32_t trans_id; +} QEMU_PACKED; + +/* + * A generic message format for dynamic memory. + * Specific message formats are defined later in the file. + */ + +struct dm_message { + struct dm_header hdr; + uint8_t data[]; /* enclosed message */ +} QEMU_PACKED; + + +/* + * Specific message types supporting the dynamic memory protocol. + */ + +/* + * Version negotiation message. Sent from the guest to the host. + * The guest is free to try different versions until the host + * accepts the version. + * + * dm_version: The protocol version requested. + * is_last_attempt: If TRUE, this is the last version guest will request. + * reservedz: Reserved field, set to zero. + */ + +struct dm_version_request { + struct dm_header hdr; + union dm_version version; + uint32_t is_last_attempt:1; + uint32_t reservedz:31; +} QEMU_PACKED; + +/* + * Version response message; Host to Guest and indicates + * if the host has accepted the version sent by the guest. + * + * is_accepted: If TRUE, host has accepted the version and the guest + * should proceed to the next stage of the protocol. FALSE indicates that + * guest should re-try with a different version. + * + * reservedz: Reserved field, set to zero. + */ + +struct dm_version_response { + struct dm_header hdr; + uint64_t is_accepted:1; + uint64_t reservedz:63; +} QEMU_PACKED; + +/* + * Message reporting capabilities. This is sent from the guest to the + * host. + */ + +struct dm_capabilities { + struct dm_header hdr; + union dm_caps caps; + uint64_t min_page_cnt; + uint64_t max_page_number; +} QEMU_PACKED; + +/* + * Response to the capabilities message. This is sent from the host to the + * guest. This message notifies if the host has accepted the guest's + * capabilities. If the host has not accepted, the guest must shutdown + * the service. + * + * is_accepted: Indicates if the host has accepted guest's capabilities. + * reservedz: Must be 0. + */ + +struct dm_capabilities_resp_msg { + struct dm_header hdr; + uint64_t is_accepted:1; + uint64_t hot_remove:1; + uint64_t suppress_pressure_reports:1; + uint64_t reservedz:61; +} QEMU_PACKED; + +/* + * This message is used to report memory pressure from the guest. + * This message is not part of any transaction and there is no + * response to this message. + * + * num_avail: Available memory in pages. + * num_committed: Committed memory in pages. + * page_file_size: The accumulated size of all page files + * in the system in pages. + * zero_free: The nunber of zero and free pages. + * page_file_writes: The writes to the page file in pages. + * io_diff: An indicator of file cache efficiency or page file activity, + * calculated as File Cache Page Fault Count - Page Read Count. + * This value is in pages. + * + * Some of these metrics are Windows specific and fortunately + * the algorithm on the host side that computes the guest memory + * pressure only uses num_committed value. + */ + +struct dm_status { + struct dm_header hdr; + uint64_t num_avail; + uint64_t num_committed; + uint64_t page_file_size; + uint64_t zero_free; + uint32_t page_file_writes; + uint32_t io_diff; +} QEMU_PACKED; + + +/* + * Message to ask the guest to allocate memory - balloon up message. + * This message is sent from the host to the guest. The guest may not be + * able to allocate as much memory as requested. + * + * num_pages: number of pages to allocate. + */ + +struct dm_balloon { + struct dm_header hdr; + uint32_t num_pages; + uint32_t reservedz; +} QEMU_PACKED; + + +/* + * Balloon response message; this message is sent from the guest + * to the host in response to the balloon message. + * + * reservedz: Reserved; must be set to zero. + * more_pages: If FALSE, this is the last message of the transaction. + * if TRUE there will atleast one more message from the guest. + * + * range_count: The number of ranges in the range array. + * + * range_array: An array of page ranges returned to the host. + * + */ + +struct dm_balloon_response { + struct dm_header hdr; + uint32_t reservedz; + uint32_t more_pages:1; + uint32_t range_count:31; + union dm_mem_page_range range_array[]; +} QEMU_PACKED; + +/* + * Un-balloon message; this message is sent from the host + * to the guest to give guest more memory. + * + * more_pages: If FALSE, this is the last message of the transaction. + * if TRUE there will atleast one more message from the guest. + * + * reservedz: Reserved; must be set to zero. + * + * range_count: The number of ranges in the range array. + * + * range_array: An array of page ranges returned to the host. + * + */ + +struct dm_unballoon_request { + struct dm_header hdr; + uint32_t more_pages:1; + uint32_t reservedz:31; + uint32_t range_count; + union dm_mem_page_range range_array[]; +} QEMU_PACKED; + +/* + * Un-balloon response message; this message is sent from the guest + * to the host in response to an unballoon request. + * + */ + +struct dm_unballoon_response { + struct dm_header hdr; +} QEMU_PACKED; + + +/* + * Hot add request message. Message sent from the host to the guest. + * + * mem_range: Memory range to hot add. + * + */ + +struct dm_hot_add { + struct dm_header hdr; + union dm_mem_page_range range; +} QEMU_PACKED; + +/* + * Hot add response message. + * This message is sent by the guest to report the status of a hot add request. + * If page_count is less than the requested page count, then the host should + * assume all further hot add requests will fail, since this indicates that + * the guest has hit an upper physical memory barrier. + * + * Hot adds may also fail due to low resources; in this case, the guest must + * not complete this message until the hot add can succeed, and the host must + * not send a new hot add request until the response is sent. + * If VSC fails to hot add memory DYNMEM_NUMBER_OF_UNSUCCESSFUL_HOTADD_ATTEMPTS + * times it fails the request. + * + * + * page_count: number of pages that were successfully hot added. + * + * result: result of the operation 1: success, 0: failure. + * + */ + +struct dm_hot_add_response { + struct dm_header hdr; + uint32_t page_count; + uint32_t result; +} QEMU_PACKED; + +struct dm_hot_remove { + struct dm_header hdr; + uint32_t virtual_node; + uint32_t page_count; + uint32_t qos_flags; + uint32_t reservedZ; +} QEMU_PACKED; + +struct dm_hot_remove_response { + struct dm_header hdr; + uint32_t result; + uint32_t range_count; + uint64_t more_pages:1; + uint64_t reservedz:63; + union dm_mem_page_range range_array[]; +} QEMU_PACKED; + +#define DM_REMOVE_QOS_LARGE (1 << 0) +#define DM_REMOVE_QOS_LOCAL (1 << 1) +#define DM_REMOVE_QOS_MASK (0x3) + +/* + * Types of information sent from host to the guest. + */ + +enum dm_info_type { + INFO_TYPE_MAX_PAGE_CNT = 0, + MAX_INFO_TYPE +}; + + +/* + * Header for the information message. + */ + +struct dm_info_header { + enum dm_info_type type; + uint32_t data_size; + uint8_t data[]; +} QEMU_PACKED; + +/* + * This message is sent from the host to the guest to pass + * some relevant information (win8 addition). + * + * reserved: no used. + * info_size: size of the information blob. + * info: information blob. + */ + +struct dm_info_msg { + struct dm_header hdr; + uint32_t reserved; + uint32_t info_size; + uint8_t info[]; +}; + +#endif diff --git a/include/hw/hyperv/hv-balloon.h b/include/hw/hyperv/hv-balloon.h new file mode 100644 index 0000000000..c1efe70fc2 --- /dev/null +++ b/include/hw/hyperv/hv-balloon.h @@ -0,0 +1,18 @@ +/* + * QEMU Hyper-V Dynamic Memory Protocol driver + * + * Copyright (C) 2020-2023 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_HV_BALLOON_H +#define HW_HV_BALLOON_H + +#include "qom/object.h" + +#define TYPE_HV_BALLOON "hv-balloon" +OBJECT_DECLARE_SIMPLE_TYPE(HvBalloon, HV_BALLOON) + +#endif diff --git a/include/hw/mem/memory-device.h b/include/hw/mem/memory-device.h index 3354d6c166..a1d62cc551 100644 --- a/include/hw/mem/memory-device.h +++ b/include/hw/mem/memory-device.h @@ -38,6 +38,10 @@ typedef struct MemoryDeviceState MemoryDeviceState; * address in guest physical memory can either be specified explicitly * or get assigned automatically. * + * Some memory device might not own a memory region in certain device + * configurations. Such devices can logically get (un)plugged, however, + * empty memory devices are mostly ignored by the memory device code. + * * Conceptually, memory devices only span one memory region. If multiple * successive memory regions are used, a covering memory region has to * be provided. Scattered memory regions are not supported for single @@ -91,7 +95,8 @@ struct MemoryDeviceClass { uint64_t (*get_plugged_size)(const MemoryDeviceState *md, Error **errp); /* - * Return the memory region of the memory device. + * Return the memory region of the memory device. If the device is + * completely empty, returns NULL without an error. * * Called when (un)plugging the memory device, to (un)map the * memory region in guest physical memory, but also to detect the diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index ea5aff118b..fa6313aabc 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -363,10 +363,42 @@ void pci_bus_get_w64_range(PCIBus *bus, Range *range); void pci_device_deassert_intx(PCIDevice *dev); -typedef AddressSpace *(*PCIIOMMUFunc)(PCIBus *, void *, int); + +/** + * struct PCIIOMMUOps: callbacks structure for specific IOMMU handlers + * of a PCIBus + * + * Allows to modify the behavior of some IOMMU operations of the PCI + * framework for a set of devices on a PCI bus. + */ +typedef struct PCIIOMMUOps { + /** + * @get_address_space: get the address space for a set of devices + * on a PCI bus. + * + * Mandatory callback which returns a pointer to an #AddressSpace + * + * @bus: the #PCIBus being accessed. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @devfn: device and function number + */ + AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn); +} PCIIOMMUOps; AddressSpace *pci_device_iommu_address_space(PCIDevice *dev); -void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque); + +/** + * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus + * + * Let PCI host bridges define specific operations. + * + * @bus: the #PCIBus being updated. + * @ops: the #PCIIOMMUOps + * @opaque: passed to callbacks of the @ops structure. + */ +void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque); pcibus_t pci_bar_address(PCIDevice *d, int reg, uint8_t type, pcibus_t size); diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h index 5653175957..2261312546 100644 --- a/include/hw/pci/pci_bus.h +++ b/include/hw/pci/pci_bus.h @@ -33,7 +33,7 @@ enum PCIBusFlags { struct PCIBus { BusState qbus; enum PCIBusFlags flags; - PCIIOMMUFunc iommu_fn; + const PCIIOMMUOps *iommu_ops; void *iommu_opaque; uint8_t devfn_min; uint32_t slot_reserved_mask; diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 7780b9073a..a4a22accb9 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -99,6 +99,7 @@ typedef struct VFIOContainer { QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; QLIST_ENTRY(VFIOContainer) next; QLIST_HEAD(, VFIODevice) device_list; + GList *iova_ranges; } VFIOContainer; typedef struct VFIOGuestIOMMU { @@ -206,11 +207,6 @@ typedef struct { hwaddr pages; } VFIOBitmap; -void vfio_host_win_add(VFIOContainer *container, - hwaddr min_iova, hwaddr max_iova, - uint64_t iova_pgsizes); -int vfio_host_win_del(VFIOContainer *container, hwaddr min_iova, - hwaddr max_iova); VFIOAddressSpace *vfio_get_address_space(AddressSpace *as); void vfio_put_address_space(VFIOAddressSpace *space); bool vfio_devices_all_running_and_saving(VFIOContainer *container); @@ -224,11 +220,14 @@ int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start); int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap, hwaddr iova, hwaddr size); +/* SPAPR specific */ int vfio_container_add_section_window(VFIOContainer *container, MemoryRegionSection *section, Error **errp); void vfio_container_del_section_window(VFIOContainer *container, MemoryRegionSection *section); +int vfio_spapr_container_init(VFIOContainer *container, Error **errp); +void vfio_spapr_container_deinit(VFIOContainer *container); void vfio_disable_irqindex(VFIODevice *vbasedev, int index); void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index); @@ -288,13 +287,6 @@ vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id); struct vfio_info_cap_header * vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id); #endif -extern const MemoryListener vfio_prereg_listener; - -int vfio_spapr_create_window(VFIOContainer *container, - MemoryRegionSection *section, - hwaddr *pgsize); -int vfio_spapr_remove_window(VFIOContainer *container, - hwaddr offset_within_address_space); bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp); void vfio_migration_exit(VFIODevice *vbasedev); diff --git a/include/hw/vfio/vfio.h b/include/hw/vfio/vfio.h deleted file mode 100644 index 86248f5436..0000000000 --- a/include/hw/vfio/vfio.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef HW_VFIO_H -#define HW_VFIO_H - -bool vfio_eeh_as_ok(AddressSpace *as); -int vfio_eeh_as_op(AddressSpace *as, uint32_t op); - -#endif diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h index a93fc5383e..781ebaea8f 100644 --- a/include/hw/virtio/virtio-iommu.h +++ b/include/hw/virtio/virtio-iommu.h @@ -39,6 +39,9 @@ typedef struct IOMMUDevice { AddressSpace as; MemoryRegion root; /* The root container of the device */ MemoryRegion bypass_mr; /* The alias of shared memory MR */ + GList *resv_regions; + GList *host_resv_ranges; + bool probe_done; } IOMMUDevice; typedef struct IOMMUPciBus { @@ -55,8 +58,8 @@ struct VirtIOIOMMU { GHashTable *as_by_busptr; IOMMUPciBus *iommu_pcibus_by_bus_num[PCI_BUS_MAX]; PCIBus *primary_bus; - ReservedRegion *reserved_regions; - uint32_t nb_reserved_regions; + ReservedRegion *prop_resv_regions; + uint32_t nr_prop_resv_regions; GTree *domains; QemuRecMutex mutex; GTree *endpoints; |