diff options
Diffstat (limited to 'hw')
| -rw-r--r-- | hw/arm/Kconfig | 3 | ||||
| -rw-r--r-- | hw/arm/virt.c | 5 | ||||
| -rw-r--r-- | hw/core/sysbus-fdt.c | 383 | ||||
| -rw-r--r-- | hw/display/bcm2835_fb.c | 1 | ||||
| -rw-r--r-- | hw/mips/malta.c | 2 | ||||
| -rw-r--r-- | hw/ppc/spapr_pci_vfio.c | 2 | ||||
| -rw-r--r-- | hw/s390x/s390-pci-vfio.c | 14 | ||||
| -rw-r--r-- | hw/vfio-user/container.c | 26 | ||||
| -rw-r--r-- | hw/vfio-user/container.h | 7 | ||||
| -rw-r--r-- | hw/vfio-user/pci.c | 18 | ||||
| -rw-r--r-- | hw/vfio/Kconfig | 16 | ||||
| -rw-r--r-- | hw/vfio/amd-xgbe.c | 61 | ||||
| -rw-r--r-- | hw/vfio/calxeda-xgmac.c | 61 | ||||
| -rw-r--r-- | hw/vfio/container.c | 35 | ||||
| -rw-r--r-- | hw/vfio/cpr-legacy.c | 14 | ||||
| -rw-r--r-- | hw/vfio/cpr.c | 12 | ||||
| -rw-r--r-- | hw/vfio/device.c | 2 | ||||
| -rw-r--r-- | hw/vfio/igd.c | 45 | ||||
| -rw-r--r-- | hw/vfio/iommufd.c | 4 | ||||
| -rw-r--r-- | hw/vfio/listener.c | 20 | ||||
| -rw-r--r-- | hw/vfio/meson.build | 3 | ||||
| -rw-r--r-- | hw/vfio/pci-quirks.c | 48 | ||||
| -rw-r--r-- | hw/vfio/pci.c | 217 | ||||
| -rw-r--r-- | hw/vfio/pci.h | 20 | ||||
| -rw-r--r-- | hw/vfio/platform.c | 716 | ||||
| -rw-r--r-- | hw/vfio/spapr.c | 16 | ||||
| -rw-r--r-- | hw/vfio/trace-events | 11 | ||||
| -rw-r--r-- | hw/vfio/vfio-region.h | 48 |
28 files changed, 340 insertions, 1470 deletions
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig index 2aa4b5d778..3baa6c6c74 100644 --- a/hw/arm/Kconfig +++ b/hw/arm/Kconfig @@ -5,9 +5,6 @@ config ARM_VIRT depends on TCG || KVM || HVF imply PCI_DEVICES imply TEST_DEVICES - imply VFIO_AMD_XGBE - imply VFIO_PLATFORM - imply VFIO_XGMAC imply TPM_TIS_SYSBUS imply TPM_TIS_I2C imply NVDIMM diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 7b3f9b1cdf..02209fadcf 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -38,8 +38,6 @@ #include "hw/arm/primecell.h" #include "hw/arm/virt.h" #include "hw/block/flash.h" -#include "hw/vfio/vfio-calxeda-xgmac.h" -#include "hw/vfio/vfio-amd-xgbe.h" #include "hw/display/ramfb.h" #include "net/net.h" #include "system/device_tree.h" @@ -3276,10 +3274,7 @@ static void virt_machine_class_init(ObjectClass *oc, const void *data) * configuration of the particular instance. */ mc->max_cpus = 512; - machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_CALXEDA_XGMAC); - machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_AMD_XGBE); machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE); - machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_PLATFORM); machine_class_allow_dynamic_sysbus_dev(mc, TYPE_UEFI_VARS_SYSBUS); machine_class_allow_dynamic_sysbus_dev(mc, TYPE_ARM_SMMUV3); #ifdef CONFIG_TPM diff --git a/hw/core/sysbus-fdt.c b/hw/core/sysbus-fdt.c index e80776080b..59f1d17de1 100644 --- a/hw/core/sysbus-fdt.c +++ b/hw/core/sysbus-fdt.c @@ -33,10 +33,6 @@ #include "system/tpm.h" #include "hw/arm/smmuv3.h" #include "hw/platform-bus.h" -#include "hw/vfio/vfio-platform.h" -#include "hw/vfio/vfio-calxeda-xgmac.h" -#include "hw/vfio/vfio-amd-xgbe.h" -#include "hw/vfio/vfio-region.h" #include "hw/display/ramfb.h" #include "hw/uefi/var-service-api.h" #include "hw/arm/fdt.h" @@ -67,380 +63,6 @@ typedef struct HostProperty { bool optional; } HostProperty; -#ifdef CONFIG_LINUX - -/** - * copy_properties_from_host - * - * copies properties listed in an array from host device tree to - * guest device tree. If a non optional property is not found, the - * function asserts. An optional property is ignored if not found - * in the host device tree. - * @props: array of HostProperty to copy - * @nb_props: number of properties in the array - * @host_dt: host device tree blob - * @guest_dt: guest device tree blob - * @node_path: host dt node path where the property is supposed to be - found - * @nodename: guest node name the properties should be added to - */ -static void copy_properties_from_host(HostProperty *props, int nb_props, - void *host_fdt, void *guest_fdt, - char *node_path, char *nodename) -{ - int i, prop_len; - const void *r; - Error *err = NULL; - - for (i = 0; i < nb_props; i++) { - r = qemu_fdt_getprop(host_fdt, node_path, - props[i].name, - &prop_len, - &err); - if (r) { - qemu_fdt_setprop(guest_fdt, nodename, - props[i].name, r, prop_len); - } else { - if (props[i].optional && prop_len == -FDT_ERR_NOTFOUND) { - /* optional property does not exist */ - error_free(err); - } else { - error_report_err(err); - } - if (!props[i].optional) { - /* mandatory property not found: bail out */ - exit(1); - } - err = NULL; - } - } -} - -/* clock properties whose values are copied/pasted from host */ -static HostProperty clock_copied_properties[] = { - {"compatible", false}, - {"#clock-cells", false}, - {"clock-frequency", true}, - {"clock-output-names", true}, -}; - -/** - * fdt_build_clock_node - * - * Build a guest clock node, used as a dependency from a passthrough'ed - * device. Most information are retrieved from the host clock node. - * Also check the host clock is a fixed one. - * - * @host_fdt: host device tree blob from which info are retrieved - * @guest_fdt: guest device tree blob where the clock node is added - * @host_phandle: phandle of the clock in host device tree - * @guest_phandle: phandle to assign to the guest node - */ -static void fdt_build_clock_node(void *host_fdt, void *guest_fdt, - uint32_t host_phandle, - uint32_t guest_phandle) -{ - char *node_path = NULL; - char *nodename; - const void *r; - int ret, node_offset, prop_len, path_len = 16; - - node_offset = fdt_node_offset_by_phandle(host_fdt, host_phandle); - if (node_offset <= 0) { - error_report("not able to locate clock handle %d in host device tree", - host_phandle); - exit(1); - } - node_path = g_malloc(path_len); - while ((ret = fdt_get_path(host_fdt, node_offset, node_path, path_len)) - == -FDT_ERR_NOSPACE) { - path_len += 16; - node_path = g_realloc(node_path, path_len); - } - if (ret < 0) { - error_report("not able to retrieve node path for clock handle %d", - host_phandle); - exit(1); - } - - r = qemu_fdt_getprop(host_fdt, node_path, "compatible", &prop_len, - &error_fatal); - if (strcmp(r, "fixed-clock")) { - error_report("clock handle %d is not a fixed clock", host_phandle); - exit(1); - } - - nodename = strrchr(node_path, '/'); - qemu_fdt_add_subnode(guest_fdt, nodename); - - copy_properties_from_host(clock_copied_properties, - ARRAY_SIZE(clock_copied_properties), - host_fdt, guest_fdt, - node_path, nodename); - - qemu_fdt_setprop_cell(guest_fdt, nodename, "phandle", guest_phandle); - - g_free(node_path); -} - -/** - * sysfs_to_dt_name: convert the name found in sysfs into the node name - * for instance e0900000.xgmac is converted into xgmac@e0900000 - * @sysfs_name: directory name in sysfs - * - * returns the device tree name upon success or NULL in case the sysfs name - * does not match the expected format - */ -static char *sysfs_to_dt_name(const char *sysfs_name) -{ - gchar **substrings = g_strsplit(sysfs_name, ".", 2); - char *dt_name = NULL; - - if (!substrings || !substrings[0] || !substrings[1]) { - goto out; - } - dt_name = g_strdup_printf("%s@%s", substrings[1], substrings[0]); -out: - g_strfreev(substrings); - return dt_name; -} - -/* Device Specific Code */ - -/** - * add_calxeda_midway_xgmac_fdt_node - * - * Generates a simple node with following properties: - * compatible string, regs, interrupts, dma-coherent - */ -static int add_calxeda_midway_xgmac_fdt_node(SysBusDevice *sbdev, void *opaque) -{ - PlatformBusFDTData *data = opaque; - PlatformBusDevice *pbus = data->pbus; - void *fdt = data->fdt; - const char *parent_node = data->pbus_node_name; - int compat_str_len, i; - char *nodename; - uint32_t *irq_attr, *reg_attr; - uint64_t mmio_base, irq_number; - VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev); - VFIODevice *vbasedev = &vdev->vbasedev; - - mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, 0); - nodename = g_strdup_printf("%s/%s@%" PRIx64, parent_node, - vbasedev->name, mmio_base); - qemu_fdt_add_subnode(fdt, nodename); - - compat_str_len = strlen(vdev->compat) + 1; - qemu_fdt_setprop(fdt, nodename, "compatible", - vdev->compat, compat_str_len); - - qemu_fdt_setprop(fdt, nodename, "dma-coherent", "", 0); - - reg_attr = g_new(uint32_t, vbasedev->num_regions * 2); - for (i = 0; i < vbasedev->num_regions; i++) { - mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, i); - reg_attr[2 * i] = cpu_to_be32(mmio_base); - reg_attr[2 * i + 1] = cpu_to_be32( - memory_region_size(vdev->regions[i]->mem)); - } - qemu_fdt_setprop(fdt, nodename, "reg", reg_attr, - vbasedev->num_regions * 2 * sizeof(uint32_t)); - - irq_attr = g_new(uint32_t, vbasedev->num_irqs * 3); - for (i = 0; i < vbasedev->num_irqs; i++) { - irq_number = platform_bus_get_irqn(pbus, sbdev , i) - + data->irq_start; - irq_attr[3 * i] = cpu_to_be32(GIC_FDT_IRQ_TYPE_SPI); - irq_attr[3 * i + 1] = cpu_to_be32(irq_number); - irq_attr[3 * i + 2] = cpu_to_be32(GIC_FDT_IRQ_FLAGS_LEVEL_HI); - } - qemu_fdt_setprop(fdt, nodename, "interrupts", - irq_attr, vbasedev->num_irqs * 3 * sizeof(uint32_t)); - g_free(irq_attr); - g_free(reg_attr); - g_free(nodename); - return 0; -} - -/* AMD xgbe properties whose values are copied/pasted from host */ -static HostProperty amd_xgbe_copied_properties[] = { - {"compatible", false}, - {"dma-coherent", true}, - {"amd,per-channel-interrupt", true}, - {"phy-mode", false}, - {"mac-address", true}, - {"amd,speed-set", false}, - {"amd,serdes-blwc", true}, - {"amd,serdes-cdr-rate", true}, - {"amd,serdes-pq-skew", true}, - {"amd,serdes-tx-amp", true}, - {"amd,serdes-dfe-tap-config", true}, - {"amd,serdes-dfe-tap-enable", true}, - {"clock-names", false}, -}; - -/** - * add_amd_xgbe_fdt_node - * - * Generates the combined xgbe/phy node following kernel >=4.2 - * binding documentation: - * Documentation/devicetree/bindings/net/amd-xgbe.txt: - * Also 2 clock nodes are created (dma and ptp) - * - * Asserts in case of error - */ -static int add_amd_xgbe_fdt_node(SysBusDevice *sbdev, void *opaque) -{ - PlatformBusFDTData *data = opaque; - PlatformBusDevice *pbus = data->pbus; - VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev); - VFIODevice *vbasedev = &vdev->vbasedev; - VFIOINTp *intp; - const char *parent_node = data->pbus_node_name; - char **node_path, *nodename, *dt_name; - void *guest_fdt = data->fdt, *host_fdt; - const void *r; - int i, prop_len; - uint32_t *irq_attr, *reg_attr; - const uint32_t *host_clock_phandles; - uint64_t mmio_base, irq_number; - uint32_t guest_clock_phandles[2]; - - host_fdt = load_device_tree_from_sysfs(); - - dt_name = sysfs_to_dt_name(vbasedev->name); - if (!dt_name) { - error_report("%s incorrect sysfs device name %s", - __func__, vbasedev->name); - exit(1); - } - node_path = qemu_fdt_node_path(host_fdt, dt_name, vdev->compat, - &error_fatal); - if (!node_path || !node_path[0]) { - error_report("%s unable to retrieve node path for %s/%s", - __func__, dt_name, vdev->compat); - exit(1); - } - - if (node_path[1]) { - error_report("%s more than one node matching %s/%s!", - __func__, dt_name, vdev->compat); - exit(1); - } - - g_free(dt_name); - - if (vbasedev->num_regions != 5) { - error_report("%s Does the host dt node combine XGBE/PHY?", __func__); - exit(1); - } - - /* generate nodes for DMA_CLK and PTP_CLK */ - r = qemu_fdt_getprop(host_fdt, node_path[0], "clocks", - &prop_len, &error_fatal); - if (prop_len != 8) { - error_report("%s clocks property should contain 2 handles", __func__); - exit(1); - } - host_clock_phandles = r; - guest_clock_phandles[0] = qemu_fdt_alloc_phandle(guest_fdt); - guest_clock_phandles[1] = qemu_fdt_alloc_phandle(guest_fdt); - - /** - * clock handles fetched from host dt are in be32 layout whereas - * rest of the code uses cpu layout. Also guest clock handles are - * in cpu layout. - */ - fdt_build_clock_node(host_fdt, guest_fdt, - be32_to_cpu(host_clock_phandles[0]), - guest_clock_phandles[0]); - - fdt_build_clock_node(host_fdt, guest_fdt, - be32_to_cpu(host_clock_phandles[1]), - guest_clock_phandles[1]); - - /* combined XGBE/PHY node */ - mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, 0); - nodename = g_strdup_printf("%s/%s@%" PRIx64, parent_node, - vbasedev->name, mmio_base); - qemu_fdt_add_subnode(guest_fdt, nodename); - - copy_properties_from_host(amd_xgbe_copied_properties, - ARRAY_SIZE(amd_xgbe_copied_properties), - host_fdt, guest_fdt, - node_path[0], nodename); - - qemu_fdt_setprop_cells(guest_fdt, nodename, "clocks", - guest_clock_phandles[0], - guest_clock_phandles[1]); - - reg_attr = g_new(uint32_t, vbasedev->num_regions * 2); - for (i = 0; i < vbasedev->num_regions; i++) { - mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, i); - reg_attr[2 * i] = cpu_to_be32(mmio_base); - reg_attr[2 * i + 1] = cpu_to_be32( - memory_region_size(vdev->regions[i]->mem)); - } - qemu_fdt_setprop(guest_fdt, nodename, "reg", reg_attr, - vbasedev->num_regions * 2 * sizeof(uint32_t)); - - irq_attr = g_new(uint32_t, vbasedev->num_irqs * 3); - for (i = 0; i < vbasedev->num_irqs; i++) { - irq_number = platform_bus_get_irqn(pbus, sbdev , i) - + data->irq_start; - irq_attr[3 * i] = cpu_to_be32(GIC_FDT_IRQ_TYPE_SPI); - irq_attr[3 * i + 1] = cpu_to_be32(irq_number); - /* - * General device interrupt and PCS auto-negotiation interrupts are - * level-sensitive while the 4 per-channel interrupts are edge - * sensitive - */ - QLIST_FOREACH(intp, &vdev->intp_list, next) { - if (intp->pin == i) { - break; - } - } - if (intp->flags & VFIO_IRQ_INFO_AUTOMASKED) { - irq_attr[3 * i + 2] = cpu_to_be32(GIC_FDT_IRQ_FLAGS_LEVEL_HI); - } else { - irq_attr[3 * i + 2] = cpu_to_be32(GIC_FDT_IRQ_FLAGS_EDGE_LO_HI); - } - } - qemu_fdt_setprop(guest_fdt, nodename, "interrupts", - irq_attr, vbasedev->num_irqs * 3 * sizeof(uint32_t)); - - g_free(host_fdt); - g_strfreev(node_path); - g_free(irq_attr); - g_free(reg_attr); - g_free(nodename); - return 0; -} - -/* DT compatible matching */ -static bool vfio_platform_match(SysBusDevice *sbdev, - const BindingEntry *entry) -{ - VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev); - const char *compat; - unsigned int n; - - for (n = vdev->num_compat, compat = vdev->compat; n > 0; - n--, compat += strlen(compat) + 1) { - if (!strcmp(entry->compat, compat)) { - return true; - } - } - - return false; -} - -#define VFIO_PLATFORM_BINDING(compat, add_fn) \ - {TYPE_VFIO_PLATFORM, (compat), (add_fn), vfio_platform_match} - -#endif /* CONFIG_LINUX */ - #ifdef CONFIG_TPM /* * add_tpm_tis_fdt_node: Create a DT node for TPM TIS @@ -511,11 +133,6 @@ static bool type_match(SysBusDevice *sbdev, const BindingEntry *entry) /* list of supported dynamic sysbus bindings */ static const BindingEntry bindings[] = { -#ifdef CONFIG_LINUX - TYPE_BINDING(TYPE_VFIO_CALXEDA_XGMAC, add_calxeda_midway_xgmac_fdt_node), - TYPE_BINDING(TYPE_VFIO_AMD_XGBE, add_amd_xgbe_fdt_node), - VFIO_PLATFORM_BINDING("amd,xgbe-seattle-v1a", add_amd_xgbe_fdt_node), -#endif #ifdef CONFIG_TPM TYPE_BINDING(TYPE_TPM_TIS_SYSBUS, add_tpm_tis_fdt_node), #endif diff --git a/hw/display/bcm2835_fb.c b/hw/display/bcm2835_fb.c index 820e67ac8b..1bb2ee45a0 100644 --- a/hw/display/bcm2835_fb.c +++ b/hw/display/bcm2835_fb.c @@ -27,6 +27,7 @@ #include "hw/display/bcm2835_fb.h" #include "hw/hw.h" #include "hw/irq.h" +#include "ui/console.h" #include "framebuffer.h" #include "ui/pixel_ops.h" #include "hw/misc/bcm2835_mbox_defs.h" diff --git a/hw/mips/malta.c b/hw/mips/malta.c index 344dc8ca76..02da629b5a 100644 --- a/hw/mips/malta.c +++ b/hw/mips/malta.c @@ -1191,7 +1191,7 @@ void mips_malta_init(MachineState *machine) * In little endian mode the 32bit words in the bios are swapped, * a neat trick which allows bi-endian firmware. */ - if (!TARGET_BIG_ENDIAN) { + if (!TARGET_BIG_ENDIAN && bios_size > 0) { uint32_t *end, *addr; const size_t swapsize = MIN(bios_size, 0x3e0000); addr = rom_ptr(FLASH_ADDRESS, swapsize); diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c index e318d0d912..7e1c71ef59 100644 --- a/hw/ppc/spapr_pci_vfio.c +++ b/hw/ppc/spapr_pci_vfio.c @@ -106,7 +106,7 @@ static VFIOContainer *vfio_eeh_as_container(AddressSpace *as) out: vfio_address_space_put(space); - return container_of(bcontainer, VFIOContainer, bcontainer); + return VFIO_IOMMU_LEGACY(bcontainer); } static bool vfio_eeh_as_ok(AddressSpace *as) diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c index aaf91319b4..938a551171 100644 --- a/hw/s390x/s390-pci-vfio.c +++ b/hw/s390x/s390-pci-vfio.c @@ -62,7 +62,7 @@ S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s, { S390PCIDMACount *cnt; uint32_t avail; - VFIOPCIDevice *vpdev = container_of(pbdev->pdev, VFIOPCIDevice, pdev); + VFIOPCIDevice *vpdev = VFIO_PCI_BASE(pbdev->pdev); int id; assert(vpdev); @@ -108,7 +108,7 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev, { struct vfio_info_cap_header *hdr; struct vfio_device_info_cap_zpci_base *cap; - VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); + VFIOPCIDevice *vpci = VFIO_PCI_BASE(pbdev->pdev); uint64_t vfio_size; hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_BASE); @@ -162,7 +162,7 @@ static bool get_host_fh(S390PCIBusDevice *pbdev, struct vfio_device_info *info, { struct vfio_info_cap_header *hdr; struct vfio_device_info_cap_zpci_base *cap; - VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); + VFIOPCIDevice *vpci = VFIO_PCI_BASE(pbdev->pdev); hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_BASE); @@ -185,7 +185,7 @@ static void s390_pci_read_group(S390PCIBusDevice *pbdev, struct vfio_device_info_cap_zpci_group *cap; S390pciState *s = s390_get_phb(); ClpRspQueryPciGrp *resgrp; - VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); + VFIOPCIDevice *vpci = VFIO_PCI_BASE(pbdev->pdev); uint8_t start_gid = pbdev->zpci_fn.pfgid; hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_GROUP); @@ -264,7 +264,7 @@ static void s390_pci_read_util(S390PCIBusDevice *pbdev, { struct vfio_info_cap_header *hdr; struct vfio_device_info_cap_zpci_util *cap; - VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); + VFIOPCIDevice *vpci = VFIO_PCI_BASE(pbdev->pdev); hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_UTIL); @@ -291,7 +291,7 @@ static void s390_pci_read_pfip(S390PCIBusDevice *pbdev, { struct vfio_info_cap_header *hdr; struct vfio_device_info_cap_zpci_pfip *cap; - VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); + VFIOPCIDevice *vpci = VFIO_PCI_BASE(pbdev->pdev); hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_PFIP); @@ -314,7 +314,7 @@ static void s390_pci_read_pfip(S390PCIBusDevice *pbdev, static struct vfio_device_info *get_device_info(S390PCIBusDevice *pbdev) { - VFIOPCIDevice *vfio_pci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); + VFIOPCIDevice *vfio_pci = VFIO_PCI_BASE(pbdev->pdev); return vfio_get_device_info(vfio_pci->vbasedev.fd); } diff --git a/hw/vfio-user/container.c b/hw/vfio-user/container.c index d589dd90f5..3cdbd44c1a 100644 --- a/hw/vfio-user/container.c +++ b/hw/vfio-user/container.c @@ -24,16 +24,14 @@ */ static void vfio_user_listener_begin(VFIOContainerBase *bcontainer) { - VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer, - bcontainer); + VFIOUserContainer *container = VFIO_IOMMU_USER(bcontainer); container->proxy->async_ops = true; } static void vfio_user_listener_commit(VFIOContainerBase *bcontainer) { - VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer, - bcontainer); + VFIOUserContainer *container = VFIO_IOMMU_USER(bcontainer); /* wait here for any async requests sent during the transaction */ container->proxy->async_ops = false; @@ -44,8 +42,8 @@ static int vfio_user_dma_unmap(const VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, IOMMUTLBEntry *iotlb, bool unmap_all) { - VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer, - bcontainer); + VFIOUserContainer *container = VFIO_IOMMU_USER(bcontainer); + Error *local_err = NULL; int ret = 0; @@ -86,8 +84,8 @@ static int vfio_user_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, void *vaddr, bool readonly, MemoryRegion *mrp) { - VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer, - bcontainer); + VFIOUserContainer *container = VFIO_IOMMU_USER(bcontainer); + int fd = memory_region_get_fd(mrp); Error *local_err = NULL; int ret = 0; @@ -173,8 +171,7 @@ static int vfio_user_query_dirty_bitmap(const VFIOContainerBase *bcontainer, static bool vfio_user_setup(VFIOContainerBase *bcontainer, Error **errp) { - VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer, - bcontainer); + VFIOUserContainer *container = VFIO_IOMMU_USER(bcontainer); assert(container->proxy->dma_pgsizes != 0); bcontainer->pgsizes = container->proxy->dma_pgsizes; @@ -218,7 +215,7 @@ vfio_user_container_connect(AddressSpace *as, VFIODevice *vbasedev, goto put_space_exit; } - bcontainer = &container->bcontainer; + bcontainer = VFIO_IOMMU(container); ret = ram_block_uncoordinated_discard_disable(true); if (ret) { @@ -263,7 +260,7 @@ put_space_exit: static void vfio_user_container_disconnect(VFIOUserContainer *container) { - VFIOContainerBase *bcontainer = &container->bcontainer; + VFIOContainerBase *bcontainer = VFIO_IOMMU(container); VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); VFIOAddressSpace *space = bcontainer->space; @@ -291,7 +288,7 @@ static bool vfio_user_device_get(VFIOUserContainer *container, vbasedev->fd = -1; - vfio_device_prepare(vbasedev, &container->bcontainer, &info); + vfio_device_prepare(vbasedev, VFIO_IOMMU(container), &info); return true; } @@ -315,8 +312,7 @@ static bool vfio_user_device_attach(const char *name, VFIODevice *vbasedev, static void vfio_user_device_detach(VFIODevice *vbasedev) { - VFIOUserContainer *container = container_of(vbasedev->bcontainer, - VFIOUserContainer, bcontainer); + VFIOUserContainer *container = VFIO_IOMMU_USER(vbasedev->bcontainer); vfio_device_unprepare(vbasedev); diff --git a/hw/vfio-user/container.h b/hw/vfio-user/container.h index 2bb1fa1343..96aa6785d9 100644 --- a/hw/vfio-user/container.h +++ b/hw/vfio-user/container.h @@ -13,10 +13,11 @@ #include "hw/vfio-user/proxy.h" /* MMU container sub-class for vfio-user. */ -typedef struct VFIOUserContainer { - VFIOContainerBase bcontainer; +struct VFIOUserContainer { + VFIOContainerBase parent_obj; + VFIOUserProxy *proxy; -} VFIOUserContainer; +}; OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserContainer, VFIO_IOMMU_USER); diff --git a/hw/vfio-user/pci.c b/hw/vfio-user/pci.c index be71c77729..e2c309784f 100644 --- a/hw/vfio-user/pci.c +++ b/hw/vfio-user/pci.c @@ -20,7 +20,8 @@ OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserPCIDevice, VFIO_USER_PCI) struct VFIOUserPCIDevice { - VFIOPCIDevice device; + VFIOPCIDevice parent_obj; + SocketAddress *socket; bool send_queued; /* all sends are queued */ uint32_t wait_time; /* timeout for message replies */ @@ -64,7 +65,7 @@ static void vfio_user_msix_setup(VFIOPCIDevice *vdev) vdev->msix->pba_region = pba_reg; vfio_reg = vdev->bars[vdev->msix->pba_bar].mr; - msix_reg = &vdev->pdev.msix_pba_mmio; + msix_reg = &PCI_DEVICE(vdev)->msix_pba_mmio; memory_region_init_io(pba_reg, OBJECT(vdev), &vfio_user_pba_ops, vdev, "VFIO MSIX PBA", int128_get64(msix_reg->size)); memory_region_add_subregion_overlap(vfio_reg, vdev->msix->pba_offset, @@ -85,7 +86,7 @@ static void vfio_user_msix_teardown(VFIOPCIDevice *vdev) static void vfio_user_dma_read(VFIOPCIDevice *vdev, VFIOUserDMARW *msg) { - PCIDevice *pdev = &vdev->pdev; + PCIDevice *pdev = PCI_DEVICE(vdev); VFIOUserProxy *proxy = vdev->vbasedev.proxy; VFIOUserDMARW *res; MemTxResult r; @@ -133,7 +134,7 @@ static void vfio_user_dma_read(VFIOPCIDevice *vdev, VFIOUserDMARW *msg) static void vfio_user_dma_write(VFIOPCIDevice *vdev, VFIOUserDMARW *msg) { - PCIDevice *pdev = &vdev->pdev; + PCIDevice *pdev = PCI_DEVICE(vdev); VFIOUserProxy *proxy = vdev->vbasedev.proxy; MemTxResult r; @@ -213,8 +214,9 @@ static void vfio_user_compute_needs_reset(VFIODevice *vbasedev) static Object *vfio_user_pci_get_object(VFIODevice *vbasedev) { - VFIOUserPCIDevice *vdev = container_of(vbasedev, VFIOUserPCIDevice, - device.vbasedev); + VFIOUserPCIDevice *vdev = VFIO_USER_PCI(container_of(vbasedev, + VFIOPCIDevice, + vbasedev)); return OBJECT(vdev); } @@ -406,6 +408,8 @@ static const Property vfio_user_pci_dev_properties[] = { sub_vendor_id, PCI_ANY_ID), DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice, sub_device_id, PCI_ANY_ID), + DEFINE_PROP_UINT32("x-pci-class-code", VFIOPCIDevice, + class_code, PCI_ANY_ID), DEFINE_PROP_BOOL("x-send-queued", VFIOUserPCIDevice, send_queued, false), DEFINE_PROP_UINT32("x-msg-timeout", VFIOUserPCIDevice, wait_time, 5000), DEFINE_PROP_BOOL("x-no-posted-writes", VFIOUserPCIDevice, no_post, false), @@ -417,7 +421,7 @@ static void vfio_user_pci_set_socket(Object *obj, Visitor *v, const char *name, VFIOUserPCIDevice *udev = VFIO_USER_PCI(obj); bool success; - if (udev->device.vbasedev.proxy) { + if (VFIO_PCI_BASE(udev)->vbasedev.proxy) { error_setg(errp, "Proxy is connected"); return; } diff --git a/hw/vfio/Kconfig b/hw/vfio/Kconfig index 91d9023b79..27de24e4db 100644 --- a/hw/vfio/Kconfig +++ b/hw/vfio/Kconfig @@ -17,22 +17,6 @@ config VFIO_CCW select VFIO depends on LINUX && S390_CCW_VIRTIO -config VFIO_PLATFORM - bool - default y - select VFIO - depends on LINUX && PLATFORM_BUS - -config VFIO_XGMAC - bool - default y - depends on VFIO_PLATFORM - -config VFIO_AMD_XGBE - bool - default y - depends on VFIO_PLATFORM - config VFIO_AP bool default y diff --git a/hw/vfio/amd-xgbe.c b/hw/vfio/amd-xgbe.c deleted file mode 100644 index 58f590e385..0000000000 --- a/hw/vfio/amd-xgbe.c +++ /dev/null @@ -1,61 +0,0 @@ -/* - * AMD XGBE VFIO device - * - * Copyright Linaro Limited, 2015 - * - * Authors: - * Eric Auger <eric.auger@linaro.org> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * - */ - -#include "qemu/osdep.h" -#include "hw/vfio/vfio-amd-xgbe.h" -#include "migration/vmstate.h" -#include "qemu/module.h" -#include "qemu/error-report.h" - -static void amd_xgbe_realize(DeviceState *dev, Error **errp) -{ - VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(dev); - VFIOAmdXgbeDeviceClass *k = VFIO_AMD_XGBE_DEVICE_GET_CLASS(dev); - - warn_report("-device vfio-amd-xgbe is deprecated"); - vdev->compat = g_strdup("amd,xgbe-seattle-v1a"); - vdev->num_compat = 1; - - k->parent_realize(dev, errp); -} - -static const VMStateDescription vfio_platform_amd_xgbe_vmstate = { - .name = "vfio-amd-xgbe", - .unmigratable = 1, -}; - -static void vfio_amd_xgbe_class_init(ObjectClass *klass, const void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - VFIOAmdXgbeDeviceClass *vcxc = - VFIO_AMD_XGBE_DEVICE_CLASS(klass); - device_class_set_parent_realize(dc, amd_xgbe_realize, - &vcxc->parent_realize); - dc->desc = "VFIO AMD XGBE"; - dc->vmsd = &vfio_platform_amd_xgbe_vmstate; -} - -static const TypeInfo vfio_amd_xgbe_dev_info = { - .name = TYPE_VFIO_AMD_XGBE, - .parent = TYPE_VFIO_PLATFORM, - .instance_size = sizeof(VFIOAmdXgbeDevice), - .class_init = vfio_amd_xgbe_class_init, - .class_size = sizeof(VFIOAmdXgbeDeviceClass), -}; - -static void register_amd_xgbe_dev_type(void) -{ - type_register_static(&vfio_amd_xgbe_dev_info); -} - -type_init(register_amd_xgbe_dev_type) diff --git a/hw/vfio/calxeda-xgmac.c b/hw/vfio/calxeda-xgmac.c deleted file mode 100644 index 03f2ff5763..0000000000 --- a/hw/vfio/calxeda-xgmac.c +++ /dev/null @@ -1,61 +0,0 @@ -/* - * calxeda xgmac VFIO device - * - * Copyright Linaro Limited, 2014 - * - * Authors: - * Eric Auger <eric.auger@linaro.org> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * - */ - -#include "qemu/osdep.h" -#include "hw/vfio/vfio-calxeda-xgmac.h" -#include "migration/vmstate.h" -#include "qemu/module.h" -#include "qemu/error-report.h" - -static void calxeda_xgmac_realize(DeviceState *dev, Error **errp) -{ - VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(dev); - VFIOCalxedaXgmacDeviceClass *k = VFIO_CALXEDA_XGMAC_DEVICE_GET_CLASS(dev); - - warn_report("-device vfio-calxeda-xgmac is deprecated"); - vdev->compat = g_strdup("calxeda,hb-xgmac"); - vdev->num_compat = 1; - - k->parent_realize(dev, errp); -} - -static const VMStateDescription vfio_platform_calxeda_xgmac_vmstate = { - .name = "vfio-calxeda-xgmac", - .unmigratable = 1, -}; - -static void vfio_calxeda_xgmac_class_init(ObjectClass *klass, const void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - VFIOCalxedaXgmacDeviceClass *vcxc = - VFIO_CALXEDA_XGMAC_DEVICE_CLASS(klass); - device_class_set_parent_realize(dc, calxeda_xgmac_realize, - &vcxc->parent_realize); - dc->desc = "VFIO Calxeda XGMAC"; - dc->vmsd = &vfio_platform_calxeda_xgmac_vmstate; -} - -static const TypeInfo vfio_calxeda_xgmac_dev_info = { - .name = TYPE_VFIO_CALXEDA_XGMAC, - .parent = TYPE_VFIO_PLATFORM, - .instance_size = sizeof(VFIOCalxedaXgmacDevice), - .class_init = vfio_calxeda_xgmac_class_init, - .class_size = sizeof(VFIOCalxedaXgmacDeviceClass), -}; - -static void register_calxeda_xgmac_dev_type(void) -{ - type_register_static(&vfio_calxeda_xgmac_dev_info); -} - -type_init(register_calxeda_xgmac_dev_type) diff --git a/hw/vfio/container.c b/hw/vfio/container.c index 3e13feaa74..030c6d3f89 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -71,7 +71,7 @@ static int vfio_dma_unmap_bitmap(const VFIOContainer *container, hwaddr iova, ram_addr_t size, IOMMUTLBEntry *iotlb) { - const VFIOContainerBase *bcontainer = &container->bcontainer; + const VFIOContainerBase *bcontainer = VFIO_IOMMU(container); struct vfio_iommu_type1_dma_unmap *unmap; struct vfio_bitmap *bitmap; VFIOBitmap vbmap; @@ -124,8 +124,7 @@ static int vfio_legacy_dma_unmap_one(const VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, IOMMUTLBEntry *iotlb) { - const VFIOContainer *container = container_of(bcontainer, VFIOContainer, - bcontainer); + const VFIOContainer *container = VFIO_IOMMU_LEGACY(bcontainer); struct vfio_iommu_type1_dma_unmap unmap = { .argsz = sizeof(unmap), .flags = 0, @@ -213,8 +212,7 @@ static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, void *vaddr, bool readonly, MemoryRegion *mr) { - const VFIOContainer *container = container_of(bcontainer, VFIOContainer, - bcontainer); + const VFIOContainer *container = VFIO_IOMMU_LEGACY(bcontainer); struct vfio_iommu_type1_dma_map map = { .argsz = sizeof(map), .flags = VFIO_DMA_MAP_FLAG_READ, @@ -246,8 +244,7 @@ static int vfio_legacy_set_dirty_page_tracking(const VFIOContainerBase *bcontainer, bool start, Error **errp) { - const VFIOContainer *container = container_of(bcontainer, VFIOContainer, - bcontainer); + const VFIOContainer *container = VFIO_IOMMU_LEGACY(bcontainer); int ret; struct vfio_iommu_type1_dirty_bitmap dirty = { .argsz = sizeof(dirty), @@ -272,8 +269,7 @@ vfio_legacy_set_dirty_page_tracking(const VFIOContainerBase *bcontainer, static int vfio_legacy_query_dirty_bitmap(const VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp) { - const VFIOContainer *container = container_of(bcontainer, VFIOContainer, - bcontainer); + const VFIOContainer *container = VFIO_IOMMU_LEGACY(bcontainer); struct vfio_iommu_type1_dirty_bitmap *dbitmap; struct vfio_iommu_type1_dirty_bitmap_get *range; int ret; @@ -495,7 +491,7 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, { struct vfio_info_cap_header *hdr; struct vfio_iommu_type1_info_cap_migration *cap_mig; - VFIOContainerBase *bcontainer = &container->bcontainer; + VFIOContainerBase *bcontainer = VFIO_IOMMU(container); hdr = vfio_get_iommu_info_cap(info, VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION); if (!hdr) { @@ -518,8 +514,7 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, static bool vfio_legacy_setup(VFIOContainerBase *bcontainer, Error **errp) { - VFIOContainer *container = container_of(bcontainer, VFIOContainer, - bcontainer); + VFIOContainer *container = VFIO_IOMMU_LEGACY(bcontainer); g_autofree struct vfio_iommu_type1_info *info = NULL; int ret; @@ -634,7 +629,7 @@ static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as, if (!cpr_is_incoming()) { QLIST_FOREACH(bcontainer, &space->containers, next) { - container = container_of(bcontainer, VFIOContainer, bcontainer); + container = VFIO_IOMMU_LEGACY(bcontainer); if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { return vfio_container_group_add(container, group, errp); } @@ -652,7 +647,7 @@ static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as, * create the container struct and group list. */ QLIST_FOREACH(bcontainer, &space->containers, next) { - container = container_of(bcontainer, VFIOContainer, bcontainer); + container = VFIO_IOMMU_LEGACY(bcontainer); if (vfio_cpr_container_match(container, group, fd)) { return vfio_container_group_add(container, group, errp); @@ -672,7 +667,7 @@ static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as, goto fail; } new_container = true; - bcontainer = &container->bcontainer; + bcontainer = VFIO_IOMMU(container); if (!vfio_legacy_cpr_register_container(container, errp)) { goto fail; @@ -735,7 +730,7 @@ fail: static void vfio_container_disconnect(VFIOGroup *group) { VFIOContainer *container = group->container; - VFIOContainerBase *bcontainer = &container->bcontainer; + VFIOContainerBase *bcontainer = VFIO_IOMMU(container); VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); QLIST_REMOVE(group, container_next); @@ -781,7 +776,7 @@ static VFIOGroup *vfio_group_get(int groupid, AddressSpace *as, Error **errp) QLIST_FOREACH(group, &vfio_group_list, next) { if (group->groupid == groupid) { /* Found it. Now is it already in the right context? */ - if (group->container->bcontainer.space->as == as) { + if (VFIO_IOMMU(group->container)->space->as == as) { return group; } else { error_setg(errp, "group %d used in multiple address spaces", @@ -895,7 +890,7 @@ static bool vfio_device_get(VFIOGroup *group, const char *name, } } - vfio_device_prepare(vbasedev, &group->container->bcontainer, info); + vfio_device_prepare(vbasedev, VFIO_IOMMU(group->container), info); vbasedev->fd = fd; vbasedev->group = group; @@ -1087,7 +1082,7 @@ static int vfio_legacy_pci_hot_reset(VFIODevice *vbasedev, bool single) /* Prep dependent devices for reset and clear our marker. */ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { if (!vbasedev_iter->dev->realized || - vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { + !vfio_pci_from_vfio_device(vbasedev_iter)) { continue; } tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); @@ -1172,7 +1167,7 @@ out: QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { if (!vbasedev_iter->dev->realized || - vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { + !vfio_pci_from_vfio_device(vbasedev_iter)) { continue; } tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); diff --git a/hw/vfio/cpr-legacy.c b/hw/vfio/cpr-legacy.c index 553b203e9b..8f437194fa 100644 --- a/hw/vfio/cpr-legacy.c +++ b/hw/vfio/cpr-legacy.c @@ -41,8 +41,8 @@ static int vfio_legacy_cpr_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, void *vaddr, bool readonly, MemoryRegion *mr) { - const VFIOContainer *container = container_of(bcontainer, VFIOContainer, - bcontainer); + const VFIOContainer *container = VFIO_IOMMU_LEGACY(bcontainer); + struct vfio_iommu_type1_dma_map map = { .argsz = sizeof(map), .flags = VFIO_DMA_MAP_FLAG_VADDR, @@ -65,7 +65,7 @@ static void vfio_region_remap(MemoryListener *listener, { VFIOContainer *container = container_of(listener, VFIOContainer, cpr.remap_listener); - vfio_container_region_add(&container->bcontainer, section, true); + vfio_container_region_add(VFIO_IOMMU(container), section, true); } static bool vfio_cpr_supported(VFIOContainer *container, Error **errp) @@ -98,7 +98,7 @@ static int vfio_container_pre_save(void *opaque) static int vfio_container_post_load(void *opaque, int version_id) { VFIOContainer *container = opaque; - VFIOContainerBase *bcontainer = &container->bcontainer; + VFIOContainerBase *bcontainer = VFIO_IOMMU(container); VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); dma_map_fn saved_dma_map = vioc->dma_map; Error *local_err = NULL; @@ -135,7 +135,7 @@ static int vfio_cpr_fail_notifier(NotifierWithReturn *notifier, { VFIOContainer *container = container_of(notifier, VFIOContainer, cpr.transfer_notifier); - VFIOContainerBase *bcontainer = &container->bcontainer; + VFIOContainerBase *bcontainer = VFIO_IOMMU(container); if (e->type != MIG_EVENT_PRECOPY_FAILED) { return 0; @@ -167,7 +167,7 @@ static int vfio_cpr_fail_notifier(NotifierWithReturn *notifier, bool vfio_legacy_cpr_register_container(VFIOContainer *container, Error **errp) { - VFIOContainerBase *bcontainer = &container->bcontainer; + VFIOContainerBase *bcontainer = VFIO_IOMMU(container); Error **cpr_blocker = &container->cpr.blocker; migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier, @@ -191,7 +191,7 @@ bool vfio_legacy_cpr_register_container(VFIOContainer *container, Error **errp) void vfio_legacy_cpr_unregister_container(VFIOContainer *container) { - VFIOContainerBase *bcontainer = &container->bcontainer; + VFIOContainerBase *bcontainer = VFIO_IOMMU(container); migration_remove_notifier(&bcontainer->cpr_reboot_notifier); migrate_del_blocker(&container->cpr.blocker); diff --git a/hw/vfio/cpr.c b/hw/vfio/cpr.c index a831243e02..2c71fc1e8e 100644 --- a/hw/vfio/cpr.c +++ b/hw/vfio/cpr.c @@ -56,7 +56,7 @@ static void vfio_cpr_claim_vectors(VFIOPCIDevice *vdev, int nr_vectors, { int i, fd; bool pending = false; - PCIDevice *pdev = &vdev->pdev; + PCIDevice *pdev = PCI_DEVICE(vdev); vdev->nr_vectors = nr_vectors; vdev->msi_vectors = g_new0(VFIOMSIVector, nr_vectors); @@ -99,7 +99,7 @@ static void vfio_cpr_claim_vectors(VFIOPCIDevice *vdev, int nr_vectors, static int vfio_cpr_pci_pre_load(void *opaque) { VFIOPCIDevice *vdev = opaque; - PCIDevice *pdev = &vdev->pdev; + PCIDevice *pdev = PCI_DEVICE(vdev); int size = MIN(pci_config_size(pdev), vdev->config_size); int i; @@ -113,7 +113,7 @@ static int vfio_cpr_pci_pre_load(void *opaque) static int vfio_cpr_pci_post_load(void *opaque, int version_id) { VFIOPCIDevice *vdev = opaque; - PCIDevice *pdev = &vdev->pdev; + PCIDevice *pdev = PCI_DEVICE(vdev); int nr_vectors; vfio_sub_page_bar_update_mappings(vdev); @@ -173,8 +173,8 @@ const VMStateDescription vfio_cpr_pci_vmstate = { .post_load = vfio_cpr_pci_post_load, .needed = cpr_incoming_needed, .fields = (VMStateField[]) { - VMSTATE_PCI_DEVICE(pdev, VFIOPCIDevice), - VMSTATE_MSIX_TEST(pdev, VFIOPCIDevice, pci_msix_present), + VMSTATE_PCI_DEVICE(parent_obj, VFIOPCIDevice), + VMSTATE_MSIX_TEST(parent_obj, VFIOPCIDevice, pci_msix_present), VMSTATE_VFIO_INTX(intx, VFIOPCIDevice), VMSTATE_END_OF_LIST() } @@ -214,7 +214,7 @@ static int set_irqfd_notifier_gsi(KVMState *s, EventNotifier *n, static int vfio_cpr_set_msi_virq(VFIOPCIDevice *vdev, Error **errp, bool enable) { const char *op = (enable ? "enable" : "disable"); - PCIDevice *pdev = &vdev->pdev; + PCIDevice *pdev = PCI_DEVICE(vdev); int i, nr_vectors, ret = 0; if (msix_enabled(pdev)) { diff --git a/hw/vfio/device.c b/hw/vfio/device.c index 52a1996dc4..08f12ac31f 100644 --- a/hw/vfio/device.c +++ b/hw/vfio/device.c @@ -129,7 +129,7 @@ static inline const char *action_to_str(int action) static const char *index_to_str(VFIODevice *vbasedev, int index) { - if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) { + if (!vfio_pci_from_vfio_device(vbasedev)) { return NULL; } diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c index ee0767b0b8..4bfa2e0fcd 100644 --- a/hw/vfio/igd.c +++ b/hw/vfio/igd.c @@ -200,7 +200,7 @@ static bool vfio_pci_igd_opregion_detect(VFIOPCIDevice *vdev, } /* Hotplugging is not supported for opregion access */ - if (vdev->pdev.qdev.hotplugged) { + if (DEVICE(vdev)->hotplugged) { warn_report("IGD device detected, but OpRegion is not supported " "on hotplugged device."); return false; @@ -260,11 +260,12 @@ static int vfio_pci_igd_copy(VFIOPCIDevice *vdev, PCIDevice *pdev, static int vfio_pci_igd_host_init(VFIOPCIDevice *vdev, struct vfio_region_info *info) { + PCIDevice *pdev = PCI_DEVICE(vdev); PCIBus *bus; PCIDevice *host_bridge; int ret; - bus = pci_device_root_bus(&vdev->pdev); + bus = pci_device_root_bus(pdev); host_bridge = pci_find_device(bus, 0, PCI_DEVFN(0, 0)); if (!host_bridge) { @@ -327,13 +328,14 @@ type_init(vfio_pci_igd_register_types) static int vfio_pci_igd_lpc_init(VFIOPCIDevice *vdev, struct vfio_region_info *info) { + PCIDevice *pdev = PCI_DEVICE(vdev); PCIDevice *lpc_bridge; int ret; - lpc_bridge = pci_find_device(pci_device_root_bus(&vdev->pdev), + lpc_bridge = pci_find_device(pci_device_root_bus(pdev), 0, PCI_DEVFN(0x1f, 0)); if (!lpc_bridge) { - lpc_bridge = pci_create_simple(pci_device_root_bus(&vdev->pdev), + lpc_bridge = pci_create_simple(pci_device_root_bus(pdev), PCI_DEVFN(0x1f, 0), "vfio-pci-igd-lpc-bridge"); } @@ -350,13 +352,14 @@ static bool vfio_pci_igd_setup_lpc_bridge(VFIOPCIDevice *vdev, Error **errp) { struct vfio_region_info *host = NULL; struct vfio_region_info *lpc = NULL; + PCIDevice *pdev = PCI_DEVICE(vdev); PCIDevice *lpc_bridge; int ret; /* * Copying IDs or creating new devices are not supported on hotplug */ - if (vdev->pdev.qdev.hotplugged) { + if (DEVICE(vdev)->hotplugged) { error_setg(errp, "IGD LPC is not supported on hotplugged device"); return false; } @@ -366,7 +369,7 @@ static bool vfio_pci_igd_setup_lpc_bridge(VFIOPCIDevice *vdev, Error **errp) * can stuff host values into, so if there's already one there and it's not * one we can hack on, this quirk is no-go. Sorry Q35. */ - lpc_bridge = pci_find_device(pci_device_root_bus(&vdev->pdev), + lpc_bridge = pci_find_device(pci_device_root_bus(pdev), 0, PCI_DEVFN(0x1f, 0)); if (lpc_bridge && !object_dynamic_cast(OBJECT(lpc_bridge), "vfio-pci-igd-lpc-bridge")) { @@ -460,7 +463,7 @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) int gen; if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || - !vfio_is_vga(vdev) || nr != 0) { + !vfio_is_base_display(vdev) || nr != 0) { return; } @@ -510,6 +513,7 @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) { struct vfio_region_info *opregion = NULL; + PCIDevice *pdev = PCI_DEVICE(vdev); int ret, gen; uint64_t gms_size = 0; uint64_t *bdsm_size; @@ -518,7 +522,7 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) Error *err = NULL; if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || - !vfio_is_vga(vdev)) { + !vfio_is_base_display(vdev)) { return true; } @@ -529,21 +533,22 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) info_report("OpRegion detected on Intel display %x.", vdev->device_id); gen = igd_gen(vdev); - gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4); + gmch = vfio_pci_read_config(pdev, IGD_GMCH, 4); /* * For backward compatibility, enable legacy mode when * - Device geneation is 6 to 9 (including both) - * - IGD claims VGA cycles on host + * - IGD exposes itself as VGA controller and claims VGA cycles on host * - Machine type is i440fx (pc_piix) * - IGD device is at guest BDF 00:02.0 * - Not manually disabled by x-igd-legacy-mode=off */ if ((vdev->igd_legacy_mode != ON_OFF_AUTO_OFF) && + vfio_is_vga(vdev) && (gen >= 6 && gen <= 9) && !(gmch & IGD_GMCH_VGA_DISABLE) && !strcmp(MACHINE_GET_CLASS(qdev_get_machine())->family, "pc_piix") && - (&vdev->pdev == pci_find_device(pci_device_root_bus(&vdev->pdev), + (pdev == pci_find_device(pci_device_root_bus(pdev), 0, PCI_DEVFN(0x2, 0)))) { /* * IGD legacy mode requires: @@ -565,7 +570,7 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) */ ret = vfio_device_get_region_info(&vdev->vbasedev, VFIO_PCI_ROM_REGION_INDEX, &rom); - if ((ret || !rom->size) && !vdev->pdev.romfile) { + if ((ret || !rom->size) && !pdev->romfile) { error_setg(&err, "Device has no ROM"); goto error; } @@ -610,8 +615,8 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) * ASLS (OpRegion address) is read-only, emulated * It contains HPA, guest firmware need to reprogram it with GPA. */ - pci_set_long(vdev->pdev.config + IGD_ASLS, 0); - pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0); + pci_set_long(pdev->config + IGD_ASLS, 0); + pci_set_long(pdev->wmask + IGD_ASLS, ~0); pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0); /* @@ -625,8 +630,8 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) } /* GMCH is read-only, emulated */ - pci_set_long(vdev->pdev.config + IGD_GMCH, gmch); - pci_set_long(vdev->pdev.wmask + IGD_GMCH, 0); + pci_set_long(pdev->config + IGD_GMCH, gmch); + pci_set_long(pdev->wmask + IGD_GMCH, 0); pci_set_long(vdev->emulated_config_bits + IGD_GMCH, ~0); } @@ -635,12 +640,12 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) /* BDSM is read-write, emulated. BIOS needs to be able to write it */ if (gen < 11) { - pci_set_long(vdev->pdev.config + IGD_BDSM, 0); - pci_set_long(vdev->pdev.wmask + IGD_BDSM, ~0); + pci_set_long(pdev->config + IGD_BDSM, 0); + pci_set_long(pdev->wmask + IGD_BDSM, ~0); pci_set_long(vdev->emulated_config_bits + IGD_BDSM, ~0); } else { - pci_set_quad(vdev->pdev.config + IGD_BDSM_GEN11, 0); - pci_set_quad(vdev->pdev.wmask + IGD_BDSM_GEN11, ~0); + pci_set_quad(pdev->config + IGD_BDSM_GEN11, 0); + pci_set_quad(pdev->wmask + IGD_BDSM_GEN11, ~0); pci_set_quad(vdev->emulated_config_bits + IGD_BDSM_GEN11, ~0); } } diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index 48c590b6a9..8c27222f75 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -737,8 +737,8 @@ iommufd_cdev_dep_get_realized_vpdev(struct vfio_pci_dependent_device *dep_dev, } vbasedev_tmp = iommufd_cdev_pci_find_by_devid(dep_dev->devid); - if (!vbasedev_tmp || !vbasedev_tmp->dev->realized || - vbasedev_tmp->type != VFIO_DEVICE_TYPE_PCI) { + if (!vfio_pci_from_vfio_device(vbasedev_tmp) || + !vbasedev_tmp->dev->realized) { return NULL; } diff --git a/hw/vfio/listener.c b/hw/vfio/listener.c index f498e23a93..e093833165 100644 --- a/hw/vfio/listener.c +++ b/hw/vfio/listener.c @@ -250,8 +250,9 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, return 0; } -static void vfio_ram_discard_register_listener(VFIOContainerBase *bcontainer, - MemoryRegionSection *section) +static bool vfio_ram_discard_register_listener(VFIOContainerBase *bcontainer, + MemoryRegionSection *section, + Error **errp) { RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); int target_page_size = qemu_target_page_size(); @@ -316,13 +317,15 @@ static void vfio_ram_discard_register_listener(VFIOContainerBase *bcontainer, if (vrdl_mappings + max_memslots - vrdl_count > bcontainer->dma_max_mappings) { - warn_report("%s: possibly running out of DMA mappings. E.g., try" + error_setg(errp, "%s: possibly running out of DMA mappings. E.g., try" " increasing the 'block-size' of virtio-mem devies." " Maximum possible DMA mappings: %d, Maximum possible" " memslots: %d", __func__, bcontainer->dma_max_mappings, max_memslots); + return false; } } + return true; } static void vfio_ram_discard_unregister_listener(VFIOContainerBase *bcontainer, @@ -450,7 +453,7 @@ static void vfio_device_error_append(VFIODevice *vbasedev, Error **errp) * MMIO region mapping failures are not fatal but in this case PCI * peer-to-peer transactions are broken. */ - if (vbasedev && vbasedev->type == VFIO_DEVICE_TYPE_PCI) { + if (vfio_pci_from_vfio_device(vbasedev)) { error_append_hint(errp, "%s: PCI peer-to-peer transactions " "on BARs are not supported.\n", vbasedev->name); } @@ -571,9 +574,14 @@ void vfio_container_region_add(VFIOContainerBase *bcontainer, */ if (memory_region_has_ram_discard_manager(section->mr)) { if (!cpr_remap) { - vfio_ram_discard_register_listener(bcontainer, section); + if (!vfio_ram_discard_register_listener(bcontainer, section, &err)) { + goto fail; + } } else if (!vfio_cpr_ram_discard_register_listener(bcontainer, section)) { + error_setg(&err, + "vfio_cpr_ram_discard_register_listener for %s failed", + memory_region_name(section->mr)); goto fail; } return; @@ -751,7 +759,7 @@ static bool vfio_section_is_vfio_pci(MemoryRegionSection *section, owner = memory_region_owner(section->mr); QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { - if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) { + if (!vfio_pci_from_vfio_device(vbasedev)) { continue; } pcidev = container_of(vbasedev, VFIOPCIDevice, vbasedev); diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build index bfaf6be805..d3ed3cb7ac 100644 --- a/hw/vfio/meson.build +++ b/hw/vfio/meson.build @@ -13,14 +13,11 @@ vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files( 'pci.c', )) vfio_ss.add(when: 'CONFIG_VFIO_CCW', if_true: files('ccw.c')) -vfio_ss.add(when: 'CONFIG_VFIO_PLATFORM', if_true: files('platform.c')) vfio_ss.add(when: 'CONFIG_VFIO_AP', if_true: files('ap.c')) vfio_ss.add(when: 'CONFIG_VFIO_IGD', if_true: files('igd.c')) specific_ss.add_all(when: 'CONFIG_VFIO', if_true: vfio_ss) -system_ss.add(when: 'CONFIG_VFIO_XGMAC', if_true: files('calxeda-xgmac.c')) -system_ss.add(when: 'CONFIG_VFIO_AMD_XGBE', if_true: files('amd-xgbe.c')) system_ss.add(when: 'CONFIG_VFIO', if_true: files( 'cpr.c', 'cpr-legacy.c', diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index 3f002252ac..c97606dbf1 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -113,6 +113,7 @@ static uint64_t vfio_generic_window_quirk_data_read(void *opaque, { VFIOConfigWindowQuirk *window = opaque; VFIOPCIDevice *vdev = window->vdev; + PCIDevice *pdev = PCI_DEVICE(vdev); uint64_t data; /* Always read data reg, discard if window enabled */ @@ -120,7 +121,7 @@ static uint64_t vfio_generic_window_quirk_data_read(void *opaque, addr + window->data_offset, size); if (window->window_enabled) { - data = vfio_pci_read_config(&vdev->pdev, window->address_val, size); + data = vfio_pci_read_config(pdev, window->address_val, size); trace_vfio_quirk_generic_window_data_read(vdev->vbasedev.name, memory_region_name(window->data_mem), data); } @@ -133,9 +134,10 @@ static void vfio_generic_window_quirk_data_write(void *opaque, hwaddr addr, { VFIOConfigWindowQuirk *window = opaque; VFIOPCIDevice *vdev = window->vdev; + PCIDevice *pdev = PCI_DEVICE(vdev); if (window->window_enabled) { - vfio_pci_write_config(&vdev->pdev, window->address_val, data, size); + vfio_pci_write_config(pdev, window->address_val, data, size); trace_vfio_quirk_generic_window_data_write(vdev->vbasedev.name, memory_region_name(window->data_mem), data); return; @@ -156,6 +158,7 @@ static uint64_t vfio_generic_quirk_mirror_read(void *opaque, { VFIOConfigMirrorQuirk *mirror = opaque; VFIOPCIDevice *vdev = mirror->vdev; + PCIDevice *pdev = PCI_DEVICE(vdev); uint64_t data; /* Read and discard in case the hardware cares */ @@ -163,7 +166,7 @@ static uint64_t vfio_generic_quirk_mirror_read(void *opaque, addr + mirror->offset, size); addr += mirror->config_offset; - data = vfio_pci_read_config(&vdev->pdev, addr, size); + data = vfio_pci_read_config(pdev, addr, size); trace_vfio_quirk_generic_mirror_read(vdev->vbasedev.name, memory_region_name(mirror->mem), addr, data); @@ -175,9 +178,10 @@ static void vfio_generic_quirk_mirror_write(void *opaque, hwaddr addr, { VFIOConfigMirrorQuirk *mirror = opaque; VFIOPCIDevice *vdev = mirror->vdev; + PCIDevice *pdev = PCI_DEVICE(vdev); addr += mirror->config_offset; - vfio_pci_write_config(&vdev->pdev, addr, data, size); + vfio_pci_write_config(pdev, addr, data, size); trace_vfio_quirk_generic_mirror_write(vdev->vbasedev.name, memory_region_name(mirror->mem), addr, data); @@ -211,7 +215,8 @@ static uint64_t vfio_ati_3c3_quirk_read(void *opaque, hwaddr addr, unsigned size) { VFIOPCIDevice *vdev = opaque; - uint64_t data = vfio_pci_read_config(&vdev->pdev, + PCIDevice *pdev = PCI_DEVICE(vdev); + uint64_t data = vfio_pci_read_config(pdev, PCI_BASE_ADDRESS_4 + 1, size); trace_vfio_quirk_ati_3c3_read(vdev->vbasedev.name, data); @@ -563,6 +568,7 @@ static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque, { VFIONvidia3d0Quirk *quirk = opaque; VFIOPCIDevice *vdev = quirk->vdev; + PCIDevice *pdev = PCI_DEVICE(vdev); VFIONvidia3d0State old_state = quirk->state; uint64_t data = vfio_vga_read(&vdev->vga->region[QEMU_PCI_VGA_IO_HI], addr + 0x10, size); @@ -573,7 +579,7 @@ static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque, (quirk->offset & ~(PCI_CONFIG_SPACE_SIZE - 1)) == 0x1800) { uint8_t offset = quirk->offset & (PCI_CONFIG_SPACE_SIZE - 1); - data = vfio_pci_read_config(&vdev->pdev, offset, size); + data = vfio_pci_read_config(pdev, offset, size); trace_vfio_quirk_nvidia_3d0_read(vdev->vbasedev.name, offset, size, data); } @@ -586,6 +592,7 @@ static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr, { VFIONvidia3d0Quirk *quirk = opaque; VFIOPCIDevice *vdev = quirk->vdev; + PCIDevice *pdev = PCI_DEVICE(vdev); VFIONvidia3d0State old_state = quirk->state; quirk->state = NONE; @@ -599,7 +606,7 @@ static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr, if ((quirk->offset & ~(PCI_CONFIG_SPACE_SIZE - 1)) == 0x1800) { uint8_t offset = quirk->offset & (PCI_CONFIG_SPACE_SIZE - 1); - vfio_pci_write_config(&vdev->pdev, offset, data, size); + vfio_pci_write_config(pdev, offset, data, size); trace_vfio_quirk_nvidia_3d0_write(vdev->vbasedev.name, offset, data, size); return; @@ -815,7 +822,7 @@ static void vfio_nvidia_quirk_mirror_write(void *opaque, hwaddr addr, { VFIOConfigMirrorQuirk *mirror = opaque; VFIOPCIDevice *vdev = mirror->vdev; - PCIDevice *pdev = &vdev->pdev; + PCIDevice *pdev = PCI_DEVICE(vdev); LastDataSet *last = (LastDataSet *)&mirror->data; vfio_generic_quirk_mirror_write(opaque, addr, data, size); @@ -1005,6 +1012,7 @@ static void vfio_rtl8168_quirk_address_write(void *opaque, hwaddr addr, { VFIOrtl8168Quirk *rtl = opaque; VFIOPCIDevice *vdev = rtl->vdev; + PCIDevice *pdev = PCI_DEVICE(vdev); rtl->enabled = false; @@ -1013,7 +1021,7 @@ static void vfio_rtl8168_quirk_address_write(void *opaque, hwaddr addr, rtl->addr = (uint32_t)data; if (data & 0x80000000U) { /* Do write */ - if (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX) { + if (pdev->cap_present & QEMU_PCI_CAP_MSIX) { hwaddr offset = data & 0xfff; uint64_t val = rtl->data; @@ -1021,7 +1029,7 @@ static void vfio_rtl8168_quirk_address_write(void *opaque, hwaddr addr, (uint16_t)offset, val); /* Write to the proper guest MSI-X table instead */ - memory_region_dispatch_write(&vdev->pdev.msix_table_mmio, + memory_region_dispatch_write(&pdev->msix_table_mmio, offset, val, size_memop(size) | MO_LE, MEMTXATTRS_UNSPECIFIED); @@ -1049,11 +1057,12 @@ static uint64_t vfio_rtl8168_quirk_data_read(void *opaque, { VFIOrtl8168Quirk *rtl = opaque; VFIOPCIDevice *vdev = rtl->vdev; + PCIDevice *pdev = PCI_DEVICE(vdev); uint64_t data = vfio_region_read(&vdev->bars[2].region, addr + 0x70, size); - if (rtl->enabled && (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) { + if (rtl->enabled && (pdev->cap_present & QEMU_PCI_CAP_MSIX)) { hwaddr offset = rtl->addr & 0xfff; - memory_region_dispatch_read(&vdev->pdev.msix_table_mmio, offset, + memory_region_dispatch_read(&pdev->msix_table_mmio, offset, &data, size_memop(size) | MO_LE, MEMTXATTRS_UNSPECIFIED); trace_vfio_quirk_rtl8168_msix_read(vdev->vbasedev.name, offset, data); @@ -1297,7 +1306,7 @@ static void vfio_radeon_set_gfx_only_reset(VFIOPCIDevice *vdev) static int vfio_radeon_reset(VFIOPCIDevice *vdev) { - PCIDevice *pdev = &vdev->pdev; + PCIDevice *pdev = PCI_DEVICE(vdev); int i, ret = 0; uint32_t data; @@ -1454,7 +1463,7 @@ static bool is_valid_std_cap_offset(uint8_t pos) static bool vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) { ERRP_GUARD(); - PCIDevice *pdev = &vdev->pdev; + PCIDevice *pdev = PCI_DEVICE(vdev); int ret, pos; bool c8_conflict = false, d4_conflict = false; uint8_t tmp; @@ -1547,6 +1556,7 @@ static bool vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) static bool vfio_add_vmd_shadow_cap(VFIOPCIDevice *vdev, Error **errp) { ERRP_GUARD(); + PCIDevice *pdev = PCI_DEVICE(vdev); uint8_t membar_phys[16]; int ret, pos = 0xE8; @@ -1565,7 +1575,7 @@ static bool vfio_add_vmd_shadow_cap(VFIOPCIDevice *vdev, Error **errp) return false; } - ret = pci_add_capability(&vdev->pdev, PCI_CAP_ID_VNDR, pos, + ret = pci_add_capability(pdev, PCI_CAP_ID_VNDR, pos, VMD_SHADOW_CAP_LEN, errp); if (ret < 0) { error_prepend(errp, "Failed to add VMD MEMBAR Shadow cap: "); @@ -1574,10 +1584,10 @@ static bool vfio_add_vmd_shadow_cap(VFIOPCIDevice *vdev, Error **errp) memset(vdev->emulated_config_bits + pos, 0xFF, VMD_SHADOW_CAP_LEN); pos += PCI_CAP_FLAGS; - pci_set_byte(vdev->pdev.config + pos++, VMD_SHADOW_CAP_LEN); - pci_set_byte(vdev->pdev.config + pos++, VMD_SHADOW_CAP_VER); - pci_set_long(vdev->pdev.config + pos, 0x53484457); /* SHDW */ - memcpy(vdev->pdev.config + pos + 4, membar_phys, 16); + pci_set_byte(pdev->config + pos++, VMD_SHADOW_CAP_LEN); + pci_set_byte(pdev->config + pos++, VMD_SHADOW_CAP_VER); + pci_set_long(pdev->config + pos, 0x53484457); /* SHDW */ + memcpy(pdev->config + pos + 4, membar_phys, 16); return true; } diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 07257d0fa0..d14e96b2f8 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -117,6 +117,7 @@ static void vfio_intx_mmap_enable(void *opaque) static void vfio_intx_interrupt(void *opaque) { VFIOPCIDevice *vdev = opaque; + PCIDevice *pdev = PCI_DEVICE(vdev); if (!event_notifier_test_and_clear(&vdev->intx.interrupt)) { return; @@ -125,7 +126,7 @@ static void vfio_intx_interrupt(void *opaque) trace_vfio_intx_interrupt(vdev->vbasedev.name, 'A' + vdev->intx.pin); vdev->intx.pending = true; - pci_irq_assert(&vdev->pdev); + pci_irq_assert(pdev); vfio_mmap_set_enabled(vdev, false); if (vdev->intx.mmap_timeout) { timer_mod(vdev->intx.mmap_timer, @@ -136,6 +137,7 @@ static void vfio_intx_interrupt(void *opaque) void vfio_pci_intx_eoi(VFIODevice *vbasedev) { VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); + PCIDevice *pdev = PCI_DEVICE(vdev); if (!vdev->intx.pending) { return; @@ -144,13 +146,14 @@ void vfio_pci_intx_eoi(VFIODevice *vbasedev) trace_vfio_pci_intx_eoi(vbasedev->name); vdev->intx.pending = false; - pci_irq_deassert(&vdev->pdev); + pci_irq_deassert(pdev); vfio_device_irq_unmask(vbasedev, VFIO_PCI_INTX_IRQ_INDEX); } static bool vfio_intx_enable_kvm(VFIOPCIDevice *vdev, Error **errp) { #ifdef CONFIG_KVM + PCIDevice *pdev = PCI_DEVICE(vdev); int irq_fd = event_notifier_get_fd(&vdev->intx.interrupt); if (vdev->no_kvm_intx || !kvm_irqfds_enabled() || @@ -163,7 +166,7 @@ static bool vfio_intx_enable_kvm(VFIOPCIDevice *vdev, Error **errp) qemu_set_fd_handler(irq_fd, NULL, NULL, vdev); vfio_device_irq_mask(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX); vdev->intx.pending = false; - pci_irq_deassert(&vdev->pdev); + pci_irq_deassert(pdev); /* Get an eventfd for resample/unmask */ if (!vfio_notifier_init(vdev, &vdev->intx.unmask, "intx-unmask", 0, errp)) { @@ -241,6 +244,8 @@ static bool vfio_cpr_intx_enable_kvm(VFIOPCIDevice *vdev, Error **errp) static void vfio_intx_disable_kvm(VFIOPCIDevice *vdev) { #ifdef CONFIG_KVM + PCIDevice *pdev = PCI_DEVICE(vdev); + if (!vdev->intx.kvm_accel) { return; } @@ -251,7 +256,7 @@ static void vfio_intx_disable_kvm(VFIOPCIDevice *vdev) */ vfio_device_irq_mask(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX); vdev->intx.pending = false; - pci_irq_deassert(&vdev->pdev); + pci_irq_deassert(pdev); /* Tell KVM to stop listening for an INTx irqfd */ if (kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, &vdev->intx.interrupt, @@ -307,7 +312,7 @@ static void vfio_intx_routing_notifier(PCIDevice *pdev) return; } - route = pci_device_route_intx_to_irq(&vdev->pdev, vdev->intx.pin); + route = pci_device_route_intx_to_irq(pdev, vdev->intx.pin); if (pci_intx_route_changed(&vdev->intx.route, &route)) { vfio_intx_update(vdev, &route); @@ -324,7 +329,8 @@ static void vfio_irqchip_change(Notifier *notify, void *data) static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp) { - uint8_t pin = vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1); + PCIDevice *pdev = PCI_DEVICE(vdev); + uint8_t pin = vfio_pci_read_config(pdev, PCI_INTERRUPT_PIN, 1); Error *err = NULL; int32_t fd; @@ -342,7 +348,7 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp) } vdev->intx.pin = pin - 1; /* Pin A (1) -> irq[0] */ - pci_config_set_interrupt_pin(vdev->pdev.config, pin); + pci_config_set_interrupt_pin(pdev->config, pin); #ifdef CONFIG_KVM /* @@ -350,7 +356,7 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp) * where we won't actually use the result anyway. */ if (kvm_irqfds_enabled() && kvm_resamplefds_enabled()) { - vdev->intx.route = pci_device_route_intx_to_irq(&vdev->pdev, + vdev->intx.route = pci_device_route_intx_to_irq(pdev, vdev->intx.pin); } #endif @@ -390,13 +396,14 @@ skip_signaling: static void vfio_intx_disable(VFIOPCIDevice *vdev) { + PCIDevice *pdev = PCI_DEVICE(vdev); int fd; timer_del(vdev->intx.mmap_timer); vfio_intx_disable_kvm(vdev); vfio_device_irq_disable(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX); vdev->intx.pending = false; - pci_irq_deassert(&vdev->pdev); + pci_irq_deassert(pdev); vfio_mmap_set_enabled(vdev, true); fd = event_notifier_get_fd(&vdev->intx.interrupt); @@ -428,6 +435,7 @@ static void vfio_msi_interrupt(void *opaque) { VFIOMSIVector *vector = opaque; VFIOPCIDevice *vdev = vector->vdev; + PCIDevice *pdev = PCI_DEVICE(vdev); MSIMessage (*get_msg)(PCIDevice *dev, unsigned vector); void (*notify)(PCIDevice *dev, unsigned vector); MSIMessage msg; @@ -442,9 +450,9 @@ static void vfio_msi_interrupt(void *opaque) notify = msix_notify; /* A masked vector firing needs to use the PBA, enable it */ - if (msix_is_masked(&vdev->pdev, nr)) { + if (msix_is_masked(pdev, nr)) { set_bit(nr, vdev->msix->pending); - memory_region_set_enabled(&vdev->pdev.msix_pba_mmio, true); + memory_region_set_enabled(&pdev->msix_pba_mmio, true); trace_vfio_msix_pba_enable(vdev->vbasedev.name); } } else if (vdev->interrupt == VFIO_INT_MSI) { @@ -454,9 +462,9 @@ static void vfio_msi_interrupt(void *opaque) abort(); } - msg = get_msg(&vdev->pdev, nr); + msg = get_msg(pdev, nr); trace_vfio_msi_interrupt(vdev->vbasedev.name, nr, msg.address, msg.data); - notify(&vdev->pdev, nr); + notify(pdev, nr); } void vfio_pci_msi_set_handler(VFIOPCIDevice *vdev, int nr, bool enable) @@ -495,6 +503,7 @@ static int vfio_enable_msix_no_vec(VFIOPCIDevice *vdev) static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) { + PCIDevice *pdev = PCI_DEVICE(vdev); struct vfio_irq_set *irq_set; int ret = 0, i, argsz; int32_t *fds; @@ -537,7 +546,7 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) */ if (vdev->msi_vectors[i].use) { if (vdev->msi_vectors[i].virq < 0 || - (msix && msix_is_masked(&vdev->pdev, i))) { + (msix && msix_is_masked(pdev, i))) { fd = event_notifier_get_fd(&vdev->msi_vectors[i].interrupt); } else { fd = event_notifier_get_fd(&vdev->msi_vectors[i].kvm_interrupt); @@ -557,12 +566,14 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) void vfio_pci_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, int vector_n, bool msix) { + PCIDevice *pdev = PCI_DEVICE(vdev); + if ((msix && vdev->no_kvm_msix) || (!msix && vdev->no_kvm_msi)) { return; } vector->virq = kvm_irqchip_add_msi_route(&vfio_route_change, - vector_n, &vdev->pdev); + vector_n, pdev); } static void vfio_connect_kvm_msi_virq(VFIOMSIVector *vector, int nr) @@ -631,7 +642,7 @@ static void set_irq_signalling(VFIODevice *vbasedev, VFIOMSIVector *vector, void vfio_pci_vector_init(VFIOPCIDevice *vdev, int nr) { VFIOMSIVector *vector = &vdev->msi_vectors[nr]; - PCIDevice *pdev = &vdev->pdev; + PCIDevice *pdev = PCI_DEVICE(vdev); Error *local_err = NULL; vector->vdev = vdev; @@ -720,7 +731,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, clear_bit(nr, vdev->msix->pending); if (find_first_bit(vdev->msix->pending, vdev->nr_vectors) == vdev->nr_vectors) { - memory_region_set_enabled(&vdev->pdev.msix_pba_mmio, false); + memory_region_set_enabled(&pdev->msix_pba_mmio, false); trace_vfio_msix_pba_disable(vdev->vbasedev.name); } @@ -771,7 +782,9 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr) void vfio_pci_msix_set_notifiers(VFIOPCIDevice *vdev) { - msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use, + PCIDevice *pdev = PCI_DEVICE(vdev); + + msix_set_vector_notifiers(pdev, vfio_msix_vector_use, vfio_msix_vector_release, NULL); } @@ -798,6 +811,7 @@ void vfio_pci_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev) static void vfio_msix_enable(VFIOPCIDevice *vdev) { + PCIDevice *pdev = PCI_DEVICE(vdev); int ret; vfio_disable_interrupts(vdev); @@ -814,7 +828,7 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev) */ vfio_pci_prepare_kvm_msi_virq_batch(vdev); - if (msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use, + if (msix_set_vector_notifiers(pdev, vfio_msix_vector_use, vfio_msix_vector_release, NULL)) { error_report("vfio: msix_set_vector_notifiers failed"); } @@ -852,11 +866,12 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev) static void vfio_msi_enable(VFIOPCIDevice *vdev) { + PCIDevice *pdev = PCI_DEVICE(vdev); int ret, i; vfio_disable_interrupts(vdev); - vdev->nr_vectors = msi_nr_vectors_allocated(&vdev->pdev); + vdev->nr_vectors = msi_nr_vectors_allocated(pdev); retry: /* * Setting vector notifiers needs to enable route for each vector. @@ -949,10 +964,11 @@ static void vfio_msi_disable_common(VFIOPCIDevice *vdev) static void vfio_msix_disable(VFIOPCIDevice *vdev) { + PCIDevice *pdev = PCI_DEVICE(vdev); Error *err = NULL; int i; - msix_unset_vector_notifiers(&vdev->pdev); + msix_unset_vector_notifiers(pdev); /* * MSI-X will only release vectors if MSI-X is still enabled on the @@ -960,8 +976,8 @@ static void vfio_msix_disable(VFIOPCIDevice *vdev) */ for (i = 0; i < vdev->nr_vectors; i++) { if (vdev->msi_vectors[i].use) { - vfio_msix_vector_release(&vdev->pdev, i); - msix_vector_unuse(&vdev->pdev, i); + vfio_msix_vector_release(pdev, i); + msix_vector_unuse(pdev, i); } } @@ -998,6 +1014,7 @@ static void vfio_msi_disable(VFIOPCIDevice *vdev) static void vfio_update_msi(VFIOPCIDevice *vdev) { + PCIDevice *pdev = PCI_DEVICE(vdev); int i; for (i = 0; i < vdev->nr_vectors; i++) { @@ -1008,8 +1025,8 @@ static void vfio_update_msi(VFIOPCIDevice *vdev) continue; } - msg = msi_get_message(&vdev->pdev, i); - vfio_update_kvm_msi_virq(vector, msg, &vdev->pdev); + msg = msi_get_message(pdev, i); + vfio_update_kvm_msi_virq(vector, msg, pdev); } } @@ -1171,13 +1188,14 @@ static const MemoryRegionOps vfio_rom_ops = { static void vfio_pci_size_rom(VFIOPCIDevice *vdev) { + PCIDevice *pdev = PCI_DEVICE(vdev); VFIODevice *vbasedev = &vdev->vbasedev; uint32_t orig, size = cpu_to_le32((uint32_t)PCI_ROM_ADDRESS_MASK); char *name; - if (vdev->pdev.romfile || !vdev->pdev.rom_bar) { + if (pdev->romfile || !pdev->rom_bar) { /* Since pci handles romfile, just print a message and return */ - if (vfio_opt_rom_in_denylist(vdev) && vdev->pdev.romfile) { + if (vfio_opt_rom_in_denylist(vdev) && pdev->romfile) { warn_report("Device at %s is known to cause system instability" " issues during option rom execution", vdev->vbasedev.name); @@ -1206,7 +1224,7 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) } if (vfio_opt_rom_in_denylist(vdev)) { - if (vdev->pdev.rom_bar > 0) { + if (pdev->rom_bar > 0) { warn_report("Device at %s is known to cause system instability" " issues during option rom execution", vdev->vbasedev.name); @@ -1225,12 +1243,12 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev) name = g_strdup_printf("vfio[%s].rom", vdev->vbasedev.name); - memory_region_init_io(&vdev->pdev.rom, OBJECT(vdev), + memory_region_init_io(&pdev->rom, OBJECT(vdev), &vfio_rom_ops, vdev, name, size); g_free(name); - pci_register_bar(&vdev->pdev, PCI_ROM_SLOT, - PCI_BASE_ADDRESS_SPACE_MEMORY, &vdev->pdev.rom); + pci_register_bar(pdev, PCI_ROM_SLOT, + PCI_BASE_ADDRESS_SPACE_MEMORY, &pdev->rom); vdev->rom_read_failed = false; } @@ -1503,6 +1521,7 @@ static void vfio_disable_interrupts(VFIOPCIDevice *vdev) static bool vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) { + PCIDevice *pdev = PCI_DEVICE(vdev); uint16_t ctrl; bool msi_64bit, msi_maskbit; int ret, entries; @@ -1523,7 +1542,7 @@ static bool vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) trace_vfio_msi_setup(vdev->vbasedev.name, pos); - ret = msi_init(&vdev->pdev, pos, entries, msi_64bit, msi_maskbit, &err); + ret = msi_init(pdev, pos, entries, msi_64bit, msi_maskbit, &err); if (ret < 0) { if (ret == -ENOTSUP) { return true; @@ -1716,6 +1735,7 @@ static bool vfio_pci_relocate_msix(VFIOPCIDevice *vdev, Error **errp) */ static bool vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp) { + PCIDevice *pdev = PCI_DEVICE(vdev); uint8_t pos; uint16_t ctrl; uint32_t table, pba; @@ -1723,7 +1743,7 @@ static bool vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp) VFIOMSIXInfo *msix; int ret; - pos = pci_find_capability(&vdev->pdev, PCI_CAP_ID_MSIX); + pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX); if (!pos) { return true; } @@ -1815,12 +1835,13 @@ static bool vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp) static bool vfio_msix_setup(VFIOPCIDevice *vdev, int pos, Error **errp) { + PCIDevice *pdev = PCI_DEVICE(vdev); int ret; Error *err = NULL; vdev->msix->pending = g_new0(unsigned long, BITS_TO_LONGS(vdev->msix->entries)); - ret = msix_init(&vdev->pdev, vdev->msix->entries, + ret = msix_init(pdev, vdev->msix->entries, vdev->bars[vdev->msix->table_bar].mr, vdev->msix->table_bar, vdev->msix->table_offset, vdev->bars[vdev->msix->pba_bar].mr, @@ -1852,7 +1873,7 @@ static bool vfio_msix_setup(VFIOPCIDevice *vdev, int pos, Error **errp) * vector-use notifier is called, which occurs on unmask, we test whether * PBA emulation is needed and again disable if not. */ - memory_region_set_enabled(&vdev->pdev.msix_pba_mmio, false); + memory_region_set_enabled(&pdev->msix_pba_mmio, false); /* * The emulated machine may provide a paravirt interface for MSIX setup @@ -1864,7 +1885,7 @@ static bool vfio_msix_setup(VFIOPCIDevice *vdev, int pos, Error **errp) */ if (object_property_get_bool(OBJECT(qdev_get_machine()), "vfio-no-msix-emulation", NULL)) { - memory_region_set_enabled(&vdev->pdev.msix_table_mmio, false); + memory_region_set_enabled(&pdev->msix_table_mmio, false); } return true; @@ -1872,10 +1893,12 @@ static bool vfio_msix_setup(VFIOPCIDevice *vdev, int pos, Error **errp) void vfio_pci_teardown_msi(VFIOPCIDevice *vdev) { - msi_uninit(&vdev->pdev); + PCIDevice *pdev = PCI_DEVICE(vdev); + + msi_uninit(pdev); if (vdev->msix) { - msix_uninit(&vdev->pdev, + msix_uninit(pdev, vdev->bars[vdev->msix->table_bar].mr, vdev->bars[vdev->msix->pba_bar].mr); g_free(vdev->msix->pending); @@ -1936,6 +1959,7 @@ static void vfio_bars_prepare(VFIOPCIDevice *vdev) static void vfio_bar_register(VFIOPCIDevice *vdev, int nr) { + PCIDevice *pdev = PCI_DEVICE(vdev); VFIOBAR *bar = &vdev->bars[nr]; char *name; @@ -1957,7 +1981,7 @@ static void vfio_bar_register(VFIOPCIDevice *vdev, int nr) } } - pci_register_bar(&vdev->pdev, nr, bar->type, bar->mr); + pci_register_bar(pdev, nr, bar->type, bar->mr); } static void vfio_bars_register(VFIOPCIDevice *vdev) @@ -1971,6 +1995,7 @@ static void vfio_bars_register(VFIOPCIDevice *vdev) void vfio_pci_bars_exit(VFIOPCIDevice *vdev) { + PCIDevice *pdev = PCI_DEVICE(vdev); int i; for (i = 0; i < PCI_ROM_SLOT; i++) { @@ -1984,7 +2009,7 @@ void vfio_pci_bars_exit(VFIOPCIDevice *vdev) } if (vdev->vga) { - pci_unregister_vga(&vdev->pdev); + pci_unregister_vga(pdev); vfio_vga_quirk_exit(vdev); } } @@ -2056,8 +2081,10 @@ static void vfio_set_word_bits(uint8_t *buf, uint16_t val, uint16_t mask) static void vfio_add_emulated_word(VFIOPCIDevice *vdev, int pos, uint16_t val, uint16_t mask) { - vfio_set_word_bits(vdev->pdev.config + pos, val, mask); - vfio_set_word_bits(vdev->pdev.wmask + pos, ~mask, mask); + PCIDevice *pdev = PCI_DEVICE(vdev); + + vfio_set_word_bits(pdev->config + pos, val, mask); + vfio_set_word_bits(pdev->wmask + pos, ~mask, mask); vfio_set_word_bits(vdev->emulated_config_bits + pos, mask, mask); } @@ -2069,8 +2096,10 @@ static void vfio_set_long_bits(uint8_t *buf, uint32_t val, uint32_t mask) static void vfio_add_emulated_long(VFIOPCIDevice *vdev, int pos, uint32_t val, uint32_t mask) { - vfio_set_long_bits(vdev->pdev.config + pos, val, mask); - vfio_set_long_bits(vdev->pdev.wmask + pos, ~mask, mask); + PCIDevice *pdev = PCI_DEVICE(vdev); + + vfio_set_long_bits(pdev->config + pos, val, mask); + vfio_set_long_bits(pdev->wmask + pos, ~mask, mask); vfio_set_long_bits(vdev->emulated_config_bits + pos, mask, mask); } @@ -2078,7 +2107,8 @@ static void vfio_pci_enable_rp_atomics(VFIOPCIDevice *vdev) { struct vfio_device_info_cap_pci_atomic_comp *cap; g_autofree struct vfio_device_info *info = NULL; - PCIBus *bus = pci_get_bus(&vdev->pdev); + PCIDevice *pdev = PCI_DEVICE(vdev); + PCIBus *bus = pci_get_bus(pdev); PCIDevice *parent = bus->parent_dev; struct vfio_info_cap_header *hdr; uint32_t mask = 0; @@ -2094,8 +2124,8 @@ static void vfio_pci_enable_rp_atomics(VFIOPCIDevice *vdev) if (pci_bus_is_root(bus) || !parent || !parent->exp.exp_cap || pcie_cap_get_type(parent) != PCI_EXP_TYPE_ROOT_PORT || pcie_cap_get_version(parent) != PCI_EXP_FLAGS_VER2 || - vdev->pdev.devfn || - vdev->pdev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) { + pdev->devfn || + pdev->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) { return; } @@ -2139,8 +2169,10 @@ static void vfio_pci_enable_rp_atomics(VFIOPCIDevice *vdev) static void vfio_pci_disable_rp_atomics(VFIOPCIDevice *vdev) { + PCIDevice *pdev = PCI_DEVICE(vdev); + if (vdev->clear_parent_atomics_on_exit) { - PCIDevice *parent = pci_get_bus(&vdev->pdev)->parent_dev; + PCIDevice *parent = pci_get_bus(pdev)->parent_dev; uint8_t *pos = parent->config + parent->exp.exp_cap + PCI_EXP_DEVCAP2; pci_long_test_and_clear_mask(pos, PCI_EXP_DEVCAP2_ATOMIC_COMP32 | @@ -2152,10 +2184,11 @@ static void vfio_pci_disable_rp_atomics(VFIOPCIDevice *vdev) static bool vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size, Error **errp) { + PCIDevice *pdev = PCI_DEVICE(vdev); uint16_t flags; uint8_t type; - flags = pci_get_word(vdev->pdev.config + pos + PCI_CAP_FLAGS); + flags = pci_get_word(pdev->config + pos + PCI_CAP_FLAGS); type = (flags & PCI_EXP_FLAGS_TYPE) >> 4; if (type != PCI_EXP_TYPE_ENDPOINT && @@ -2167,8 +2200,8 @@ static bool vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size, return false; } - if (!pci_bus_is_express(pci_get_bus(&vdev->pdev))) { - PCIBus *bus = pci_get_bus(&vdev->pdev); + if (!pci_bus_is_express(pci_get_bus(pdev))) { + PCIBus *bus = pci_get_bus(pdev); PCIDevice *bridge; /* @@ -2200,7 +2233,7 @@ static bool vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size, return true; } - } else if (pci_bus_is_root(pci_get_bus(&vdev->pdev))) { + } else if (pci_bus_is_root(pci_get_bus(pdev))) { /* * On a Root Complex bus Endpoints become Root Complex Integrated * Endpoints, which changes the type and clears the LNK & LNK2 fields. @@ -2268,20 +2301,20 @@ static bool vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size, 1, PCI_EXP_FLAGS_VERS); } - pos = pci_add_capability(&vdev->pdev, PCI_CAP_ID_EXP, pos, size, - errp); + pos = pci_add_capability(pdev, PCI_CAP_ID_EXP, pos, size, errp); if (pos < 0) { return false; } - vdev->pdev.exp.exp_cap = pos; + pdev->exp.exp_cap = pos; return true; } static void vfio_check_pcie_flr(VFIOPCIDevice *vdev, uint8_t pos) { - uint32_t cap = pci_get_long(vdev->pdev.config + pos + PCI_EXP_DEVCAP); + PCIDevice *pdev = PCI_DEVICE(vdev); + uint32_t cap = pci_get_long(pdev->config + pos + PCI_EXP_DEVCAP); if (cap & PCI_EXP_DEVCAP_FLR) { trace_vfio_check_pcie_flr(vdev->vbasedev.name); @@ -2291,7 +2324,8 @@ static void vfio_check_pcie_flr(VFIOPCIDevice *vdev, uint8_t pos) static void vfio_check_pm_reset(VFIOPCIDevice *vdev, uint8_t pos) { - uint16_t csr = pci_get_word(vdev->pdev.config + pos + PCI_PM_CTRL); + PCIDevice *pdev = PCI_DEVICE(vdev); + uint16_t csr = pci_get_word(pdev->config + pos + PCI_PM_CTRL); if (!(csr & PCI_PM_CTRL_NO_SOFT_RESET)) { trace_vfio_check_pm_reset(vdev->vbasedev.name); @@ -2301,7 +2335,8 @@ static void vfio_check_pm_reset(VFIOPCIDevice *vdev, uint8_t pos) static void vfio_check_af_flr(VFIOPCIDevice *vdev, uint8_t pos) { - uint8_t cap = pci_get_byte(vdev->pdev.config + pos + PCI_AF_CAP); + PCIDevice *pdev = PCI_DEVICE(vdev); + uint8_t cap = pci_get_byte(pdev->config + pos + PCI_AF_CAP); if ((cap & PCI_AF_CAP_TP) && (cap & PCI_AF_CAP_FLR)) { trace_vfio_check_af_flr(vdev->vbasedev.name); @@ -2312,7 +2347,7 @@ static void vfio_check_af_flr(VFIOPCIDevice *vdev, uint8_t pos) static bool vfio_add_vendor_specific_cap(VFIOPCIDevice *vdev, int pos, uint8_t size, Error **errp) { - PCIDevice *pdev = &vdev->pdev; + PCIDevice *pdev = PCI_DEVICE(vdev); pos = pci_add_capability(pdev, PCI_CAP_ID_VNDR, pos, size, errp); if (pos < 0) { @@ -2334,7 +2369,7 @@ static bool vfio_add_vendor_specific_cap(VFIOPCIDevice *vdev, int pos, static bool vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp) { ERRP_GUARD(); - PCIDevice *pdev = &vdev->pdev; + PCIDevice *pdev = PCI_DEVICE(vdev); uint8_t cap_id, next, size; bool ret; @@ -2420,17 +2455,18 @@ static bool vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp) static int vfio_setup_rebar_ecap(VFIOPCIDevice *vdev, uint16_t pos) { + PCIDevice *pdev = PCI_DEVICE(vdev); uint32_t ctrl; int i, nbar; - ctrl = pci_get_long(vdev->pdev.config + pos + PCI_REBAR_CTRL); + ctrl = pci_get_long(pdev->config + pos + PCI_REBAR_CTRL); nbar = (ctrl & PCI_REBAR_CTRL_NBAR_MASK) >> PCI_REBAR_CTRL_NBAR_SHIFT; for (i = 0; i < nbar; i++) { uint32_t cap; int size; - ctrl = pci_get_long(vdev->pdev.config + pos + PCI_REBAR_CTRL + (i * 8)); + ctrl = pci_get_long(pdev->config + pos + PCI_REBAR_CTRL + (i * 8)); size = (ctrl & PCI_REBAR_CTRL_BAR_SIZE) >> PCI_REBAR_CTRL_BAR_SHIFT; /* The cap register reports sizes 1MB to 128TB, with 4 reserved bits */ @@ -2468,7 +2504,7 @@ static int vfio_setup_rebar_ecap(VFIOPCIDevice *vdev, uint16_t pos) static void vfio_add_ext_cap(VFIOPCIDevice *vdev) { - PCIDevice *pdev = &vdev->pdev; + PCIDevice *pdev = PCI_DEVICE(vdev); uint32_t header; uint16_t cap_id, next, size; uint8_t cap_ver; @@ -2562,7 +2598,7 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev) bool vfio_pci_add_capabilities(VFIOPCIDevice *vdev, Error **errp) { - PCIDevice *pdev = &vdev->pdev; + PCIDevice *pdev = PCI_DEVICE(vdev); if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST) || !pdev->config[PCI_CAPABILITY_LIST]) { @@ -2579,7 +2615,7 @@ bool vfio_pci_add_capabilities(VFIOPCIDevice *vdev, Error **errp) void vfio_pci_pre_reset(VFIOPCIDevice *vdev) { - PCIDevice *pdev = &vdev->pdev; + PCIDevice *pdev = PCI_DEVICE(vdev); uint16_t cmd; vfio_disable_interrupts(vdev); @@ -2775,8 +2811,8 @@ static const VMStateDescription vmstate_vfio_pci_config = { .version_id = 1, .minimum_version_id = 1, .fields = (const VMStateField[]) { - VMSTATE_PCI_DEVICE(pdev, VFIOPCIDevice), - VMSTATE_MSIX_TEST(pdev, VFIOPCIDevice, vfio_msix_present), + VMSTATE_PCI_DEVICE(parent_obj, VFIOPCIDevice), + VMSTATE_MSIX_TEST(parent_obj, VFIOPCIDevice, vfio_msix_present), VMSTATE_END_OF_LIST() }, .subsections = (const VMStateDescription * const []) { @@ -2796,7 +2832,7 @@ static int vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f, Error **errp) static int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f) { VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); - PCIDevice *pdev = &vdev->pdev; + PCIDevice *pdev = PCI_DEVICE(vdev); pcibus_t old_addr[PCI_NUM_REGIONS - 1]; int bar, ret; @@ -2833,9 +2869,18 @@ static int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f) return ret; } +/* Transform from VFIODevice to VFIOPCIDevice. Return NULL if fails. */ +VFIOPCIDevice *vfio_pci_from_vfio_device(VFIODevice *vbasedev) +{ + if (vbasedev && vbasedev->type == VFIO_DEVICE_TYPE_PCI) { + return container_of(vbasedev, VFIOPCIDevice, vbasedev); + } + return NULL; +} + void vfio_sub_page_bar_update_mappings(VFIOPCIDevice *vdev) { - PCIDevice *pdev = &vdev->pdev; + PCIDevice *pdev = PCI_DEVICE(vdev); int page_size = qemu_real_host_page_size(); int bar; @@ -2919,6 +2964,7 @@ bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp) bool vfio_pci_populate_device(VFIOPCIDevice *vdev, Error **errp) { + PCIDevice *pdev = PCI_DEVICE(vdev); VFIODevice *vbasedev = &vdev->vbasedev; struct vfio_region_info *reg_info = NULL; struct vfio_irq_info irq_info; @@ -2970,7 +3016,7 @@ bool vfio_pci_populate_device(VFIOPCIDevice *vdev, Error **errp) vdev->config_size = reg_info->size; if (vdev->config_size == PCI_CONFIG_SPACE_SIZE) { - vdev->pdev.cap_present &= ~QEMU_PCI_CAP_EXPRESS; + pdev->cap_present &= ~QEMU_PCI_CAP_EXPRESS; } vdev->config_offset = reg_info->offset; @@ -3174,25 +3220,26 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev) void vfio_pci_config_register_vga(VFIOPCIDevice *vdev) { + PCIDevice *pdev = PCI_DEVICE(vdev); assert(vdev->vga != NULL); - pci_register_vga(&vdev->pdev, &vdev->vga->region[QEMU_PCI_VGA_MEM].mem, + pci_register_vga(pdev, &vdev->vga->region[QEMU_PCI_VGA_MEM].mem, &vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem, &vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem); } bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp) { - PCIDevice *pdev = &vdev->pdev; + PCIDevice *pdev = PCI_DEVICE(vdev); VFIODevice *vbasedev = &vdev->vbasedev; uint32_t config_space_size; int ret; - config_space_size = MIN(pci_config_size(&vdev->pdev), vdev->config_size); + config_space_size = MIN(pci_config_size(pdev), vdev->config_size); /* Get a copy of config space */ ret = vfio_pci_config_space_read(vdev, 0, config_space_size, - vdev->pdev.config); + pdev->config); if (ret < (int)config_space_size) { ret = ret < 0 ? -ret : EFAULT; error_setg_errno(errp, ret, "failed to read device config space"); @@ -3277,10 +3324,10 @@ bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp) PCI_HEADER_TYPE_MULTI_FUNCTION; /* Restore or clear multifunction, this is always controlled by QEMU */ - if (vdev->pdev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) { - vdev->pdev.config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION; + if (pdev->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) { + pdev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION; } else { - vdev->pdev.config[PCI_HEADER_TYPE] &= ~PCI_HEADER_TYPE_MULTI_FUNCTION; + pdev->config[PCI_HEADER_TYPE] &= ~PCI_HEADER_TYPE_MULTI_FUNCTION; } /* @@ -3288,8 +3335,8 @@ bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp) * BAR, such as might be the case with the option ROM, we can get * confusing, unwritable, residual addresses from the host here. */ - memset(&vdev->pdev.config[PCI_BASE_ADDRESS_0], 0, 24); - memset(&vdev->pdev.config[PCI_ROM_ADDRESS], 0, 4); + memset(&pdev->config[PCI_BASE_ADDRESS_0], 0, 24); + memset(&pdev->config[PCI_ROM_ADDRESS], 0, 4); vfio_pci_size_rom(vdev); @@ -3310,7 +3357,7 @@ bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp) bool vfio_pci_interrupt_setup(VFIOPCIDevice *vdev, Error **errp) { - PCIDevice *pdev = &vdev->pdev; + PCIDevice *pdev = PCI_DEVICE(vdev); /* QEMU emulates all of MSI & MSIX */ if (pdev->cap_present & QEMU_PCI_CAP_MSIX) { @@ -3323,10 +3370,10 @@ bool vfio_pci_interrupt_setup(VFIOPCIDevice *vdev, Error **errp) vdev->msi_cap_size); } - if (vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1)) { + if (vfio_pci_read_config(pdev, PCI_INTERRUPT_PIN, 1)) { vdev->intx.mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, vfio_intx_mmap_enable, vdev); - pci_device_set_intx_routing_notifier(&vdev->pdev, + pci_device_set_intx_routing_notifier(pdev, vfio_intx_routing_notifier); vdev->irqchip_change_notifier.notify = vfio_irqchip_change; kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier); @@ -3338,7 +3385,7 @@ bool vfio_pci_interrupt_setup(VFIOPCIDevice *vdev, Error **errp) */ if (!cpr_is_incoming() && !vfio_intx_enable(vdev, errp)) { timer_free(vdev->intx.mmap_timer); - pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); + pci_device_set_intx_routing_notifier(pdev, NULL); kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); return false; } @@ -3489,7 +3536,7 @@ out_deregister: if (vdev->interrupt == VFIO_INT_INTx) { vfio_intx_disable(vdev); } - pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); + pci_device_set_intx_routing_notifier(pdev, NULL); if (vdev->irqchip_change_notifier.notify) { kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); } @@ -3521,7 +3568,7 @@ static void vfio_exitfn(PCIDevice *pdev) vfio_unregister_req_notifier(vdev); vfio_unregister_err_notifier(vdev); - pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); + pci_device_set_intx_routing_notifier(pdev, NULL); if (vdev->irqchip_change_notifier.notify) { kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); } diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index 810a842f4a..e0aef82a89 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -123,7 +123,8 @@ typedef struct VFIOMSIXInfo { OBJECT_DECLARE_SIMPLE_TYPE(VFIOPCIDevice, VFIO_PCI_BASE) struct VFIOPCIDevice { - PCIDevice pdev; + PCIDevice parent_obj; + VFIODevice vbasedev; VFIOINTx intx; unsigned int config_size; @@ -203,6 +204,11 @@ static inline bool vfio_is_vga(VFIOPCIDevice *vdev) return (vdev->class_code >> 8) == PCI_CLASS_DISPLAY_VGA; } +static inline bool vfio_is_base_display(VFIOPCIDevice *vdev) +{ + return (vdev->class_code >> 16) == PCI_BASE_CLASS_DISPLAY; +} + /* MSI/MSI-X/INTx */ void vfio_pci_vector_init(VFIOPCIDevice *vdev, int nr); void vfio_pci_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, @@ -221,6 +227,18 @@ void vfio_pci_write_config(PCIDevice *pdev, uint64_t vfio_vga_read(void *opaque, hwaddr addr, unsigned size); void vfio_vga_write(void *opaque, hwaddr addr, uint64_t data, unsigned size); +/** + * vfio_pci_from_vfio_device: Transform from VFIODevice to + * VFIOPCIDevice + * + * This function checks if the given @vbasedev is a VFIO PCI device. + * If it is, it returns the containing VFIOPCIDevice. + * + * @vbasedev: The VFIODevice to transform + * + * Return: The VFIOPCIDevice on success, NULL on failure. + */ +VFIOPCIDevice *vfio_pci_from_vfio_device(VFIODevice *vbasedev); void vfio_sub_page_bar_update_mappings(VFIOPCIDevice *vdev); bool vfio_opt_rom_in_denylist(VFIOPCIDevice *vdev); bool vfio_config_quirk_setup(VFIOPCIDevice *vdev, Error **errp); diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c deleted file mode 100644 index 5c1795a26f..0000000000 --- a/hw/vfio/platform.c +++ /dev/null @@ -1,716 +0,0 @@ -/* - * vfio based device assignment support - platform devices - * - * Copyright Linaro Limited, 2014 - * - * Authors: - * Kim Phillips <kim.phillips@linaro.org> - * Eric Auger <eric.auger@linaro.org> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * - * Based on vfio based PCI device assignment support: - * Copyright Red Hat, Inc. 2012 - */ - -#include "qemu/osdep.h" -#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ -#include "qapi/error.h" -#include <sys/ioctl.h> -#include <linux/vfio.h> - -#include "hw/vfio/vfio-platform.h" -#include "system/iommufd.h" -#include "migration/vmstate.h" -#include "qemu/error-report.h" -#include "qemu/lockable.h" -#include "qemu/main-loop.h" -#include "qemu/module.h" -#include "qemu/range.h" -#include "system/memory.h" -#include "system/address-spaces.h" -#include "qemu/queue.h" -#include "hw/sysbus.h" -#include "trace.h" -#include "hw/irq.h" -#include "hw/platform-bus.h" -#include "hw/qdev-properties.h" -#include "system/kvm.h" -#include "hw/vfio/vfio-region.h" - -/* - * Functions used whatever the injection method - */ - -static inline bool vfio_irq_is_automasked(VFIOINTp *intp) -{ - return intp->flags & VFIO_IRQ_INFO_AUTOMASKED; -} - -/** - * vfio_init_intp - allocate, initialize the IRQ struct pointer - * and add it into the list of IRQs - * @vbasedev: the VFIO device handle - * @info: irq info struct retrieved from VFIO driver - * @errp: error object - */ -static VFIOINTp *vfio_init_intp(VFIODevice *vbasedev, - struct vfio_irq_info info, Error **errp) -{ - int ret; - VFIOPlatformDevice *vdev = - container_of(vbasedev, VFIOPlatformDevice, vbasedev); - SysBusDevice *sbdev = SYS_BUS_DEVICE(vdev); - VFIOINTp *intp; - - intp = g_malloc0(sizeof(*intp)); - intp->vdev = vdev; - intp->pin = info.index; - intp->flags = info.flags; - intp->state = VFIO_IRQ_INACTIVE; - intp->kvm_accel = false; - - sysbus_init_irq(sbdev, &intp->qemuirq); - - /* Get an eventfd for trigger */ - intp->interrupt = g_new0(EventNotifier, 1); - ret = event_notifier_init(intp->interrupt, 0); - if (ret) { - g_free(intp->interrupt); - g_free(intp); - error_setg_errno(errp, -ret, - "failed to initialize trigger eventfd notifier"); - return NULL; - } - if (vfio_irq_is_automasked(intp)) { - /* Get an eventfd for resample/unmask */ - intp->unmask = g_new0(EventNotifier, 1); - ret = event_notifier_init(intp->unmask, 0); - if (ret) { - g_free(intp->interrupt); - g_free(intp->unmask); - g_free(intp); - error_setg_errno(errp, -ret, - "failed to initialize resample eventfd notifier"); - return NULL; - } - } - - QLIST_INSERT_HEAD(&vdev->intp_list, intp, next); - return intp; -} - -/** - * vfio_set_trigger_eventfd - set VFIO eventfd handling - * - * @intp: IRQ struct handle - * @handler: handler to be called on eventfd signaling - * - * Setup VFIO signaling and attach an optional user-side handler - * to the eventfd - */ -static int vfio_set_trigger_eventfd(VFIOINTp *intp, - eventfd_user_side_handler_t handler) -{ - VFIODevice *vbasedev = &intp->vdev->vbasedev; - int32_t fd = event_notifier_get_fd(intp->interrupt); - Error *err = NULL; - - qemu_set_fd_handler(fd, (IOHandler *)handler, NULL, intp); - - if (!vfio_device_irq_set_signaling(vbasedev, intp->pin, 0, - VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) { - error_reportf_err(err, VFIO_MSG_PREFIX, vbasedev->name); - qemu_set_fd_handler(fd, NULL, NULL, NULL); - return -EINVAL; - } - - return 0; -} - -/* - * Functions only used when eventfds are handled on user-side - * ie. without irqfd - */ - -/** - * vfio_mmap_set_enabled - enable/disable the fast path mode - * @vdev: the VFIO platform device - * @enabled: the target mmap state - * - * enabled = true ~ fast path = MMIO region is mmaped (no KVM TRAP); - * enabled = false ~ slow path = MMIO region is trapped and region callbacks - * are called; slow path enables to trap the device IRQ status register reset -*/ - -static void vfio_mmap_set_enabled(VFIOPlatformDevice *vdev, bool enabled) -{ - int i; - - for (i = 0; i < vdev->vbasedev.num_regions; i++) { - vfio_region_mmaps_set_enabled(vdev->regions[i], enabled); - } -} - -/** - * vfio_intp_mmap_enable - timer function, restores the fast path - * if there is no more active IRQ - * @opaque: actually points to the VFIO platform device - * - * Called on mmap timer timeout, this function checks whether the - * IRQ is still active and if not, restores the fast path. - * by construction a single eventfd is handled at a time. - * if the IRQ is still active, the timer is re-programmed. - */ -static void vfio_intp_mmap_enable(void *opaque) -{ - VFIOINTp *tmp; - VFIOPlatformDevice *vdev = (VFIOPlatformDevice *)opaque; - - QEMU_LOCK_GUARD(&vdev->intp_mutex); - QLIST_FOREACH(tmp, &vdev->intp_list, next) { - if (tmp->state == VFIO_IRQ_ACTIVE) { - trace_vfio_platform_intp_mmap_enable(tmp->pin); - /* re-program the timer to check active status later */ - timer_mod(vdev->mmap_timer, - qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + - vdev->mmap_timeout); - return; - } - } - vfio_mmap_set_enabled(vdev, true); -} - -/** - * vfio_intp_inject_pending_lockheld - Injects a pending IRQ - * @opaque: opaque pointer, in practice the VFIOINTp handle - * - * The function is called on a previous IRQ completion, from - * vfio_platform_eoi, while the intp_mutex is locked. - * Also in such situation, the slow path already is set and - * the mmap timer was already programmed. - */ -static void vfio_intp_inject_pending_lockheld(VFIOINTp *intp) -{ - trace_vfio_platform_intp_inject_pending_lockheld(intp->pin, - event_notifier_get_fd(intp->interrupt)); - - intp->state = VFIO_IRQ_ACTIVE; - - /* trigger the virtual IRQ */ - qemu_set_irq(intp->qemuirq, 1); -} - -/** - * vfio_intp_interrupt - The user-side eventfd handler - * @opaque: opaque pointer which in practice is the VFIOINTp handle - * - * the function is entered in event handler context: - * the vIRQ is injected into the guest if there is no other active - * or pending IRQ. - */ -static void vfio_intp_interrupt(VFIOINTp *intp) -{ - int ret; - VFIOINTp *tmp; - VFIOPlatformDevice *vdev = intp->vdev; - bool delay_handling = false; - - QEMU_LOCK_GUARD(&vdev->intp_mutex); - if (intp->state == VFIO_IRQ_INACTIVE) { - QLIST_FOREACH(tmp, &vdev->intp_list, next) { - if (tmp->state == VFIO_IRQ_ACTIVE || - tmp->state == VFIO_IRQ_PENDING) { - delay_handling = true; - break; - } - } - } - if (delay_handling) { - /* - * the new IRQ gets a pending status and is pushed in - * the pending queue - */ - intp->state = VFIO_IRQ_PENDING; - trace_vfio_intp_interrupt_set_pending(intp->pin); - QSIMPLEQ_INSERT_TAIL(&vdev->pending_intp_queue, - intp, pqnext); - event_notifier_test_and_clear(intp->interrupt); - return; - } - - trace_vfio_platform_intp_interrupt(intp->pin, - event_notifier_get_fd(intp->interrupt)); - - ret = event_notifier_test_and_clear(intp->interrupt); - if (!ret) { - error_report("Error when clearing fd=%d (ret = %d)", - event_notifier_get_fd(intp->interrupt), ret); - } - - intp->state = VFIO_IRQ_ACTIVE; - - /* sets slow path */ - vfio_mmap_set_enabled(vdev, false); - - /* trigger the virtual IRQ */ - qemu_set_irq(intp->qemuirq, 1); - - /* - * Schedule the mmap timer which will restore fastpath when no IRQ - * is active anymore - */ - if (vdev->mmap_timeout) { - timer_mod(vdev->mmap_timer, - qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + - vdev->mmap_timeout); - } -} - -/** - * vfio_platform_eoi - IRQ completion routine - * @vbasedev: the VFIO device handle - * - * De-asserts the active virtual IRQ and unmasks the physical IRQ - * (effective for level sensitive IRQ auto-masked by the VFIO driver). - * Then it handles next pending IRQ if any. - * eoi function is called on the first access to any MMIO region - * after an IRQ was triggered, trapped since slow path was set. - * It is assumed this access corresponds to the IRQ status - * register reset. With such a mechanism, a single IRQ can be - * handled at a time since there is no way to know which IRQ - * was completed by the guest (we would need additional details - * about the IRQ status register mask). - */ -static void vfio_platform_eoi(VFIODevice *vbasedev) -{ - VFIOINTp *intp; - VFIOPlatformDevice *vdev = - container_of(vbasedev, VFIOPlatformDevice, vbasedev); - - QEMU_LOCK_GUARD(&vdev->intp_mutex); - QLIST_FOREACH(intp, &vdev->intp_list, next) { - if (intp->state == VFIO_IRQ_ACTIVE) { - trace_vfio_platform_eoi(intp->pin, - event_notifier_get_fd(intp->interrupt)); - intp->state = VFIO_IRQ_INACTIVE; - - /* deassert the virtual IRQ */ - qemu_set_irq(intp->qemuirq, 0); - - if (vfio_irq_is_automasked(intp)) { - /* unmasks the physical level-sensitive IRQ */ - vfio_device_irq_unmask(vbasedev, intp->pin); - } - - /* a single IRQ can be active at a time */ - break; - } - } - /* in case there are pending IRQs, handle the first one */ - if (!QSIMPLEQ_EMPTY(&vdev->pending_intp_queue)) { - intp = QSIMPLEQ_FIRST(&vdev->pending_intp_queue); - vfio_intp_inject_pending_lockheld(intp); - QSIMPLEQ_REMOVE_HEAD(&vdev->pending_intp_queue, pqnext); - } -} - -/** - * vfio_start_eventfd_injection - starts the virtual IRQ injection using - * user-side handled eventfds - * @sbdev: the sysbus device handle - * @irq: the qemu irq handle - */ - -static void vfio_start_eventfd_injection(SysBusDevice *sbdev, qemu_irq irq) -{ - VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev); - VFIOINTp *intp; - - QLIST_FOREACH(intp, &vdev->intp_list, next) { - if (intp->qemuirq == irq) { - break; - } - } - assert(intp); - - if (vfio_set_trigger_eventfd(intp, vfio_intp_interrupt)) { - abort(); - } -} - -/* - * Functions used for irqfd - */ - -/** - * vfio_set_resample_eventfd - sets the resamplefd for an IRQ - * @intp: the IRQ struct handle - * programs the VFIO driver to unmask this IRQ when the - * intp->unmask eventfd is triggered - */ -static int vfio_set_resample_eventfd(VFIOINTp *intp) -{ - int32_t fd = event_notifier_get_fd(intp->unmask); - VFIODevice *vbasedev = &intp->vdev->vbasedev; - Error *err = NULL; - - qemu_set_fd_handler(fd, NULL, NULL, NULL); - if (!vfio_device_irq_set_signaling(vbasedev, intp->pin, 0, - VFIO_IRQ_SET_ACTION_UNMASK, fd, &err)) { - error_reportf_err(err, VFIO_MSG_PREFIX, vbasedev->name); - return -EINVAL; - } - return 0; -} - -/** - * vfio_start_irqfd_injection - starts the virtual IRQ injection using - * irqfd - * - * @sbdev: the sysbus device handle - * @irq: the qemu irq handle - * - * In case the irqfd setup fails, we fallback to userspace handled eventfd - */ -static void vfio_start_irqfd_injection(SysBusDevice *sbdev, qemu_irq irq) -{ - VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev); - VFIOINTp *intp; - - if (!kvm_irqfds_enabled() || !kvm_resamplefds_enabled() || - !vdev->irqfd_allowed) { - goto fail_irqfd; - } - - QLIST_FOREACH(intp, &vdev->intp_list, next) { - if (intp->qemuirq == irq) { - break; - } - } - assert(intp); - - if (kvm_irqchip_add_irqfd_notifier(kvm_state, intp->interrupt, - intp->unmask, irq) < 0) { - goto fail_irqfd; - } - - if (vfio_set_trigger_eventfd(intp, NULL) < 0) { - goto fail_vfio; - } - if (vfio_irq_is_automasked(intp)) { - if (vfio_set_resample_eventfd(intp) < 0) { - goto fail_vfio; - } - trace_vfio_platform_start_level_irqfd_injection(intp->pin, - event_notifier_get_fd(intp->interrupt), - event_notifier_get_fd(intp->unmask)); - } else { - trace_vfio_platform_start_edge_irqfd_injection(intp->pin, - event_notifier_get_fd(intp->interrupt)); - } - - intp->kvm_accel = true; - - return; -fail_vfio: - kvm_irqchip_remove_irqfd_notifier(kvm_state, intp->interrupt, irq); - abort(); -fail_irqfd: - vfio_start_eventfd_injection(sbdev, irq); -} - -/* VFIO skeleton */ - -static void vfio_platform_compute_needs_reset(VFIODevice *vbasedev) -{ - vbasedev->needs_reset = true; -} - -/* not implemented yet */ -static int vfio_platform_hot_reset_multi(VFIODevice *vbasedev) -{ - return -1; -} - -/** - * vfio_populate_device - Allocate and populate MMIO region - * and IRQ structs according to driver returned information - * @vbasedev: the VFIO device handle - * @errp: error object - * - */ -static bool vfio_populate_device(VFIODevice *vbasedev, Error **errp) -{ - VFIOINTp *intp, *tmp; - int i, ret = -1; - VFIOPlatformDevice *vdev = - container_of(vbasedev, VFIOPlatformDevice, vbasedev); - - if (!(vbasedev->flags & VFIO_DEVICE_FLAGS_PLATFORM)) { - error_setg(errp, "this isn't a platform device"); - return false; - } - - vdev->regions = g_new0(VFIORegion *, vbasedev->num_regions); - - for (i = 0; i < vbasedev->num_regions; i++) { - char *name = g_strdup_printf("VFIO %s region %d\n", vbasedev->name, i); - - vdev->regions[i] = g_new0(VFIORegion, 1); - ret = vfio_region_setup(OBJECT(vdev), vbasedev, - vdev->regions[i], i, name); - g_free(name); - if (ret) { - error_setg_errno(errp, -ret, "failed to get region %d info", i); - goto reg_error; - } - } - - vdev->mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, - vfio_intp_mmap_enable, vdev); - - QSIMPLEQ_INIT(&vdev->pending_intp_queue); - - for (i = 0; i < vbasedev->num_irqs; i++) { - struct vfio_irq_info irq; - - ret = vfio_device_get_irq_info(vbasedev, i, &irq); - - if (ret) { - error_setg_errno(errp, -ret, "failed to get device irq info"); - goto irq_err; - } else { - trace_vfio_platform_populate_interrupts(irq.index, - irq.count, - irq.flags); - intp = vfio_init_intp(vbasedev, irq, errp); - if (!intp) { - goto irq_err; - } - } - } - return true; -irq_err: - timer_del(vdev->mmap_timer); - QLIST_FOREACH_SAFE(intp, &vdev->intp_list, next, tmp) { - QLIST_REMOVE(intp, next); - g_free(intp); - } -reg_error: - for (i = 0; i < vbasedev->num_regions; i++) { - if (vdev->regions[i]) { - vfio_region_finalize(vdev->regions[i]); - } - g_free(vdev->regions[i]); - } - g_free(vdev->regions); - return false; -} - -/* specialized functions for VFIO Platform devices */ -static VFIODeviceOps vfio_platform_ops = { - .vfio_compute_needs_reset = vfio_platform_compute_needs_reset, - .vfio_hot_reset_multi = vfio_platform_hot_reset_multi, - .vfio_eoi = vfio_platform_eoi, -}; - -/** - * vfio_base_device_init - perform preliminary VFIO setup - * @vbasedev: the VFIO device handle - * @errp: error object - * - * Implement the VFIO command sequence that allows to discover - * assigned device resources: group extraction, device - * fd retrieval, resource query. - * Precondition: the device name must be initialized - */ -static bool vfio_base_device_init(VFIODevice *vbasedev, Error **errp) -{ - /* @fd takes precedence over @sysfsdev which takes precedence over @host */ - if (vbasedev->fd < 0 && vbasedev->sysfsdev) { - vfio_device_free_name(vbasedev); - vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); - } else if (vbasedev->fd < 0) { - if (!vbasedev->name || strchr(vbasedev->name, '/')) { - error_setg(errp, "wrong host device name"); - return false; - } - - vbasedev->sysfsdev = g_strdup_printf("/sys/bus/platform/devices/%s", - vbasedev->name); - } - - if (!vfio_device_get_name(vbasedev, errp)) { - return false; - } - - if (!vfio_device_attach(vbasedev->name, vbasedev, - &address_space_memory, errp)) { - return false; - } - - if (vfio_populate_device(vbasedev, errp)) { - return true; - } - - vfio_device_detach(vbasedev); - return false; -} - -/** - * vfio_platform_realize - the device realize function - * @dev: device state pointer - * @errp: error - * - * initialize the device, its memory regions and IRQ structures - * IRQ are started separately - */ -static void vfio_platform_realize(DeviceState *dev, Error **errp) -{ - ERRP_GUARD(); - VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(dev); - SysBusDevice *sbdev = SYS_BUS_DEVICE(dev); - VFIODevice *vbasedev = &vdev->vbasedev; - int i; - - warn_report("-device vfio-platform is deprecated"); - qemu_mutex_init(&vdev->intp_mutex); - - trace_vfio_platform_realize(vbasedev->sysfsdev ? - vbasedev->sysfsdev : vbasedev->name, - vdev->compat); - - if (!vfio_base_device_init(vbasedev, errp)) { - goto init_err; - } - - if (!vdev->compat) { - GError *gerr = NULL; - gchar *contents; - gsize length; - char *path; - - path = g_strdup_printf("%s/of_node/compatible", vbasedev->sysfsdev); - if (!g_file_get_contents(path, &contents, &length, &gerr)) { - error_setg(errp, "%s", gerr->message); - g_error_free(gerr); - g_free(path); - return; - } - g_free(path); - vdev->compat = contents; - for (vdev->num_compat = 0; length; vdev->num_compat++) { - size_t skip = strlen(contents) + 1; - contents += skip; - length -= skip; - } - } - - for (i = 0; i < vbasedev->num_regions; i++) { - if (vfio_region_mmap(vdev->regions[i])) { - warn_report("%s mmap unsupported, performance may be slow", - memory_region_name(vdev->regions[i]->mem)); - } - sysbus_init_mmio(sbdev, vdev->regions[i]->mem); - } - return; - -init_err: - if (vdev->vbasedev.name) { - error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name); - } else { - error_prepend(errp, "vfio error: "); - } -} - -static const VMStateDescription vfio_platform_vmstate = { - .name = "vfio-platform", - .unmigratable = 1, -}; - -static const Property vfio_platform_dev_properties[] = { - DEFINE_PROP_STRING("host", VFIOPlatformDevice, vbasedev.name), - DEFINE_PROP_STRING("sysfsdev", VFIOPlatformDevice, vbasedev.sysfsdev), - DEFINE_PROP_BOOL("x-no-mmap", VFIOPlatformDevice, vbasedev.no_mmap, false), - DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice, - mmap_timeout, 1100), - DEFINE_PROP_BOOL("x-irqfd", VFIOPlatformDevice, irqfd_allowed, true), -#ifdef CONFIG_IOMMUFD - DEFINE_PROP_LINK("iommufd", VFIOPlatformDevice, vbasedev.iommufd, - TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), -#endif -}; - -static void vfio_platform_instance_init(Object *obj) -{ - VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(obj); - VFIODevice *vbasedev = &vdev->vbasedev; - - vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PLATFORM, &vfio_platform_ops, - DEVICE(vdev), false); -} - -#ifdef CONFIG_IOMMUFD -static void vfio_platform_set_fd(Object *obj, const char *str, Error **errp) -{ - vfio_device_set_fd(&VFIO_PLATFORM_DEVICE(obj)->vbasedev, str, errp); -} -#endif - -static void vfio_platform_class_init(ObjectClass *klass, const void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass); - - dc->realize = vfio_platform_realize; - device_class_set_props(dc, vfio_platform_dev_properties); -#ifdef CONFIG_IOMMUFD - object_class_property_add_str(klass, "fd", NULL, vfio_platform_set_fd); -#endif - dc->vmsd = &vfio_platform_vmstate; - dc->desc = "VFIO-based platform device assignment"; - sbc->connect_irq_notifier = vfio_start_irqfd_injection; - set_bit(DEVICE_CATEGORY_MISC, dc->categories); - - object_class_property_set_description(klass, /* 2.4 */ - "host", - "Host device name of assigned device"); - object_class_property_set_description(klass, /* 2.4 and 2.5 */ - "x-no-mmap", - "Disable MMAP for device. Allows to trace MMIO " - "accesses (DEBUG)"); - object_class_property_set_description(klass, /* 2.4 */ - "mmap-timeout-ms", - "When EOI is not provided by KVM/QEMU, wait time " - "(milliseconds) to re-enable device direct access " - "after level interrupt (DEBUG)"); - object_class_property_set_description(klass, /* 2.4 */ - "x-irqfd", - "Allow disabling irqfd support (DEBUG)"); - object_class_property_set_description(klass, /* 2.6 */ - "sysfsdev", - "Host sysfs path of assigned device"); -#ifdef CONFIG_IOMMUFD - object_class_property_set_description(klass, /* 9.0 */ - "iommufd", - "Set host IOMMUFD backend device"); -#endif -} - -static const TypeInfo vfio_platform_dev_info = { - .name = TYPE_VFIO_PLATFORM, - .parent = TYPE_DYNAMIC_SYS_BUS_DEVICE, - .instance_size = sizeof(VFIOPlatformDevice), - .instance_init = vfio_platform_instance_init, - .class_init = vfio_platform_class_init, - .class_size = sizeof(VFIOPlatformDeviceClass), -}; - -static void register_vfio_platform_dev_type(void) -{ - type_register_static(&vfio_platform_dev_info); -} - -type_init(register_vfio_platform_dev_type) diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c index 564b70ef97..c41e4588d6 100644 --- a/hw/vfio/spapr.c +++ b/hw/vfio/spapr.c @@ -62,7 +62,7 @@ static void vfio_prereg_listener_region_add(MemoryListener *listener, VFIOSpaprContainer *scontainer = container_of(listener, VFIOSpaprContainer, prereg_listener); VFIOContainer *container = &scontainer->container; - VFIOContainerBase *bcontainer = &container->bcontainer; + VFIOContainerBase *bcontainer = VFIO_IOMMU(container); const hwaddr gpa = section->offset_within_address_space; hwaddr end; int ret; @@ -244,7 +244,7 @@ static bool vfio_spapr_create_window(VFIOContainer *container, hwaddr *pgsize, Error **errp) { int ret = 0; - VFIOContainerBase *bcontainer = &container->bcontainer; + VFIOContainerBase *bcontainer = VFIO_IOMMU(container); VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, container); IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr); @@ -352,8 +352,7 @@ vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer, MemoryRegionSection *section, Error **errp) { - VFIOContainer *container = container_of(bcontainer, VFIOContainer, - bcontainer); + VFIOContainer *container = VFIO_IOMMU_LEGACY(bcontainer); VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, container); VFIOHostDMAWindow *hostwin; @@ -443,8 +442,7 @@ static void vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer, MemoryRegionSection *section) { - VFIOContainer *container = container_of(bcontainer, VFIOContainer, - bcontainer); + VFIOContainer *container = VFIO_IOMMU_LEGACY(bcontainer); VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, container); @@ -465,8 +463,7 @@ vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer, static void vfio_spapr_container_release(VFIOContainerBase *bcontainer) { - VFIOContainer *container = container_of(bcontainer, VFIOContainer, - bcontainer); + VFIOContainer *container = VFIO_IOMMU_LEGACY(bcontainer); VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, container); VFIOHostDMAWindow *hostwin, *next; @@ -484,8 +481,7 @@ static void vfio_spapr_container_release(VFIOContainerBase *bcontainer) static bool vfio_spapr_container_setup(VFIOContainerBase *bcontainer, Error **errp) { - VFIOContainer *container = container_of(bcontainer, VFIOContainer, - bcontainer); + VFIOContainer *container = VFIO_IOMMU_LEGACY(bcontainer); VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, container); struct vfio_iommu_spapr_tce_info info; diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index fc6ed230d0..e3d571f8c8 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -127,17 +127,6 @@ vfio_region_unmap(const char *name, unsigned long offset, unsigned long end) "Re vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Device %s region %d: %d sparse mmap entries" vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]" -# platform.c -vfio_platform_realize(char *name, char *compat) "vfio device %s, compat = %s" -vfio_platform_eoi(int pin, int fd) "EOI IRQ pin %d (fd=%d)" -vfio_platform_intp_mmap_enable(int pin) "IRQ #%d still active, stay in slow path" -vfio_platform_intp_interrupt(int pin, int fd) "Inject IRQ #%d (fd = %d)" -vfio_platform_intp_inject_pending_lockheld(int pin, int fd) "Inject pending IRQ #%d (fd = %d)" -vfio_platform_populate_interrupts(int pin, int count, int flags) "- IRQ index %d: count %d, flags=0x%x" -vfio_intp_interrupt_set_pending(int index) "irq %d is set PENDING" -vfio_platform_start_level_irqfd_injection(int index, int fd, int resamplefd) "IRQ index=%d, fd = %d, resamplefd = %d" -vfio_platform_start_edge_irqfd_injection(int index, int fd) "IRQ index=%d, fd = %d" - # spapr.c vfio_prereg_listener_region_add_skip(uint64_t start, uint64_t end) "0x%"PRIx64" - 0x%"PRIx64 vfio_prereg_listener_region_del_skip(uint64_t start, uint64_t end) "0x%"PRIx64" - 0x%"PRIx64 diff --git a/hw/vfio/vfio-region.h b/hw/vfio/vfio-region.h new file mode 100644 index 0000000000..ede6e0c8f9 --- /dev/null +++ b/hw/vfio/vfio-region.h @@ -0,0 +1,48 @@ +/* + * VFIO region + * + * Copyright Red Hat, Inc. 2025 + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HW_VFIO_REGION_H +#define HW_VFIO_REGION_H + +#include "system/memory.h" + +typedef struct VFIOMmap { + MemoryRegion mem; + void *mmap; + off_t offset; + size_t size; +} VFIOMmap; + +typedef struct VFIODevice VFIODevice; + +typedef struct VFIORegion { + struct VFIODevice *vbasedev; + off_t fd_offset; /* offset of region within device fd */ + MemoryRegion *mem; /* slow, read/write access */ + size_t size; + uint32_t flags; /* VFIO region flags (rd/wr/mmap) */ + uint32_t nr_mmaps; + VFIOMmap *mmaps; + uint8_t nr; /* cache the region number for debug */ + bool post_wr; /* writes can be posted */ +} VFIORegion; + + +void vfio_region_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size); +uint64_t vfio_region_read(void *opaque, + hwaddr addr, unsigned size); +int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, + int index, const char *name); +int vfio_region_mmap(VFIORegion *region); +void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled); +void vfio_region_unmap(VFIORegion *region); +void vfio_region_exit(VFIORegion *region); +void vfio_region_finalize(VFIORegion *region); + +#endif /* HW_VFIO_REGION_H */ |