diff options
Diffstat (limited to 'hw')
| -rw-r--r-- | hw/alpha/typhoon.c | 6 | ||||
| -rw-r--r-- | hw/arm/smmu-common.c | 6 | ||||
| -rw-r--r-- | hw/core/qdev-properties-system.c | 11 | ||||
| -rw-r--r-- | hw/hyperv/vmbus.c | 4 | ||||
| -rw-r--r-- | hw/i386/amd_iommu.c | 6 | ||||
| -rw-r--r-- | hw/i386/intel_iommu.c | 6 | ||||
| -rw-r--r-- | hw/pci-host/astro.c | 6 | ||||
| -rw-r--r-- | hw/pci-host/designware.c | 6 | ||||
| -rw-r--r-- | hw/pci-host/dino.c | 6 | ||||
| -rw-r--r-- | hw/pci-host/pnv_phb3.c | 6 | ||||
| -rw-r--r-- | hw/pci-host/pnv_phb4.c | 6 | ||||
| -rw-r--r-- | hw/pci-host/ppce500.c | 6 | ||||
| -rw-r--r-- | hw/pci-host/raven.c | 6 | ||||
| -rw-r--r-- | hw/pci-host/sabre.c | 6 | ||||
| -rw-r--r-- | hw/pci/pci.c | 18 | ||||
| -rw-r--r-- | hw/ppc/ppc440_pcix.c | 6 | ||||
| -rw-r--r-- | hw/ppc/spapr_pci.c | 6 | ||||
| -rw-r--r-- | hw/ppc/spapr_pci_vfio.c | 100 | ||||
| -rw-r--r-- | hw/remote/iommu.c | 6 | ||||
| -rw-r--r-- | hw/s390x/s390-pci-bus.c | 8 | ||||
| -rw-r--r-- | hw/vfio/ap.c | 1 | ||||
| -rw-r--r-- | hw/vfio/ccw.c | 1 | ||||
| -rw-r--r-- | hw/vfio/common.c | 80 | ||||
| -rw-r--r-- | hw/vfio/container.c | 328 | ||||
| -rw-r--r-- | hw/vfio/helpers.c | 1 | ||||
| -rw-r--r-- | hw/vfio/pci.c | 2 | ||||
| -rw-r--r-- | hw/vfio/spapr.c | 282 | ||||
| -rw-r--r-- | hw/virtio/trace-events | 1 | ||||
| -rw-r--r-- | hw/virtio/virtio-iommu-pci.c | 8 | ||||
| -rw-r--r-- | hw/virtio/virtio-iommu.c | 161 |
30 files changed, 683 insertions, 413 deletions
diff --git a/hw/alpha/typhoon.c b/hw/alpha/typhoon.c index 49a80550c5..e8711ae16a 100644 --- a/hw/alpha/typhoon.c +++ b/hw/alpha/typhoon.c @@ -738,6 +738,10 @@ static AddressSpace *typhoon_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) return &s->pchip.iommu_as; } +static const PCIIOMMUOps typhoon_iommu_ops = { + .get_address_space = typhoon_pci_dma_iommu, +}; + static void typhoon_set_irq(void *opaque, int irq, int level) { TyphoonState *s = opaque; @@ -897,7 +901,7 @@ PCIBus *typhoon_init(MemoryRegion *ram, qemu_irq *p_isa_irq, "iommu-typhoon", UINT64_MAX); address_space_init(&s->pchip.iommu_as, MEMORY_REGION(&s->pchip.iommu), "pchip0-pci"); - pci_setup_iommu(b, typhoon_pci_dma_iommu, s); + pci_setup_iommu(b, &typhoon_iommu_ops, s); /* Pchip0 PCI special/interrupt acknowledge, 0x801.F800.0000, 64MB. */ memory_region_init_io(&s->pchip.reg_iack, OBJECT(s), &alpha_pci_iack_ops, diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c index f35ae9aa22..9a8ac45431 100644 --- a/hw/arm/smmu-common.c +++ b/hw/arm/smmu-common.c @@ -605,6 +605,10 @@ static AddressSpace *smmu_find_add_as(PCIBus *bus, void *opaque, int devfn) return &sdev->as; } +static const PCIIOMMUOps smmu_ops = { + .get_address_space = smmu_find_add_as, +}; + IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid) { uint8_t bus_n, devfn; @@ -661,7 +665,7 @@ static void smmu_base_realize(DeviceState *dev, Error **errp) s->smmu_pcibus_by_busptr = g_hash_table_new(NULL, NULL); if (s->primary_bus) { - pci_setup_iommu(s->primary_bus, smmu_find_add_as, s); + pci_setup_iommu(s->primary_bus, &smmu_ops, s); } else { error_setg(errp, "SMMU is not attached to any PCI bus!"); } diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index 2f1dbb3fd7..b46d16cd2c 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -705,7 +705,7 @@ static void get_reserved_region(Object *obj, Visitor *v, const char *name, int rc; rc = snprintf(buffer, sizeof(buffer), "0x%"PRIx64":0x%"PRIx64":%u", - rr->low, rr->high, rr->type); + range_lob(&rr->range), range_upb(&rr->range), rr->type); assert(rc < sizeof(buffer)); visit_type_str(v, name, &p, errp); @@ -717,6 +717,7 @@ static void set_reserved_region(Object *obj, Visitor *v, const char *name, Property *prop = opaque; ReservedRegion *rr = object_field_prop_ptr(obj, prop); const char *endptr; + uint64_t lob, upb; char *str; int ret; @@ -724,7 +725,7 @@ static void set_reserved_region(Object *obj, Visitor *v, const char *name, return; } - ret = qemu_strtou64(str, &endptr, 16, &rr->low); + ret = qemu_strtou64(str, &endptr, 16, &lob); if (ret) { error_setg(errp, "start address of '%s'" " must be a hexadecimal integer", name); @@ -734,7 +735,7 @@ static void set_reserved_region(Object *obj, Visitor *v, const char *name, goto separator_error; } - ret = qemu_strtou64(endptr + 1, &endptr, 16, &rr->high); + ret = qemu_strtou64(endptr + 1, &endptr, 16, &upb); if (ret) { error_setg(errp, "end address of '%s'" " must be a hexadecimal integer", name); @@ -744,6 +745,8 @@ static void set_reserved_region(Object *obj, Visitor *v, const char *name, goto separator_error; } + range_set_bounds(&rr->range, lob, upb); + ret = qemu_strtoui(endptr + 1, &endptr, 10, &rr->type); if (ret) { error_setg(errp, "type of '%s'" @@ -1111,7 +1114,7 @@ static void get_uuid(Object *obj, Visitor *v, const char *name, void *opaque, { Property *prop = opaque; QemuUUID *uuid = object_field_prop_ptr(obj, prop); - char buffer[UUID_FMT_LEN + 1]; + char buffer[UUID_STR_LEN]; char *p = buffer; qemu_uuid_unparse(uuid, buffer); diff --git a/hw/hyperv/vmbus.c b/hw/hyperv/vmbus.c index 271289f902..c64eaa5a46 100644 --- a/hw/hyperv/vmbus.c +++ b/hw/hyperv/vmbus.c @@ -2271,7 +2271,7 @@ static void vmbus_dev_realize(DeviceState *dev, Error **errp) VMBus *vmbus = VMBUS(qdev_get_parent_bus(dev)); BusChild *child; Error *err = NULL; - char idstr[UUID_FMT_LEN + 1]; + char idstr[UUID_STR_LEN]; assert(!qemu_uuid_is_null(&vdev->instanceid)); @@ -2467,7 +2467,7 @@ static char *vmbus_get_dev_path(DeviceState *dev) static char *vmbus_get_fw_dev_path(DeviceState *dev) { VMBusDevice *vdev = VMBUS_DEVICE(dev); - char uuid[UUID_FMT_LEN + 1]; + char uuid[UUID_STR_LEN]; qemu_uuid_unparse(&vdev->instanceid, uuid); return g_strdup_printf("%s@%s", qdev_fw_name(dev), uuid); diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 7965415b47..4203144da9 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -1450,6 +1450,10 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) return &iommu_as[devfn]->as; } +static const PCIIOMMUOps amdvi_iommu_ops = { + .get_address_space = amdvi_host_dma_iommu, +}; + static const MemoryRegionOps mmio_mem_ops = { .read = amdvi_mmio_read, .write = amdvi_mmio_write, @@ -1581,7 +1585,7 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) AMDVI_MMIO_SIZE); memory_region_add_subregion(get_system_memory(), AMDVI_BASE_ADDR, &s->mmio); - pci_setup_iommu(bus, amdvi_host_dma_iommu, s); + pci_setup_iommu(bus, &amdvi_iommu_ops, s); amdvi_init(s); } diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 1c6c18622f..a6f1a7aa52 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -4088,6 +4088,10 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) return &vtd_as->as; } +static PCIIOMMUOps vtd_iommu_ops = { + .get_address_space = vtd_host_dma_iommu, +}; + static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) { X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); @@ -4210,7 +4214,7 @@ static void vtd_realize(DeviceState *dev, Error **errp) s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, vtd_as_equal, g_free, g_free); vtd_init(s); - pci_setup_iommu(bus, vtd_host_dma_iommu, dev); + pci_setup_iommu(bus, &vtd_iommu_ops, dev); /* Pseudo address space under root PCI bus. */ x86ms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC); qemu_add_machine_init_done_notifier(&vtd_machine_done_notify); diff --git a/hw/pci-host/astro.c b/hw/pci-host/astro.c index 4b2d7caf2d..84e0ca14ac 100644 --- a/hw/pci-host/astro.c +++ b/hw/pci-host/astro.c @@ -345,6 +345,10 @@ static AddressSpace *elroy_pcihost_set_iommu(PCIBus *bus, void *opaque, return &s->astro->iommu_as; } +static const PCIIOMMUOps elroy_pcihost_iommu_ops = { + .get_address_space = elroy_pcihost_set_iommu, +}; + /* * Encoding in IOSAPIC: * base_addr == 0xfffa0000, we want to get 0xa0ff0000. @@ -834,7 +838,7 @@ static void astro_realize(DeviceState *obj, Error **errp) &elroy->pci_io); /* Host memory as seen from the PCI side, via the IOMMU. */ - pci_setup_iommu(PCI_HOST_BRIDGE(elroy)->bus, elroy_pcihost_set_iommu, + pci_setup_iommu(PCI_HOST_BRIDGE(elroy)->bus, &elroy_pcihost_iommu_ops, elroy); } } diff --git a/hw/pci-host/designware.c b/hw/pci-host/designware.c index 6f5442f108..f477f97847 100644 --- a/hw/pci-host/designware.c +++ b/hw/pci-host/designware.c @@ -663,6 +663,10 @@ static AddressSpace *designware_pcie_host_set_iommu(PCIBus *bus, void *opaque, return &s->pci.address_space; } +static const PCIIOMMUOps designware_iommu_ops = { + .get_address_space = designware_pcie_host_set_iommu, +}; + static void designware_pcie_host_realize(DeviceState *dev, Error **errp) { PCIHostState *pci = PCI_HOST_BRIDGE(dev); @@ -705,7 +709,7 @@ static void designware_pcie_host_realize(DeviceState *dev, Error **errp) address_space_init(&s->pci.address_space, &s->pci.address_space_root, "pcie-bus-address-space"); - pci_setup_iommu(pci->bus, designware_pcie_host_set_iommu, s); + pci_setup_iommu(pci->bus, &designware_iommu_ops, s); qdev_realize(DEVICE(&s->root), BUS(pci->bus), &error_fatal); } diff --git a/hw/pci-host/dino.c b/hw/pci-host/dino.c index 82503229fa..5b0947a16c 100644 --- a/hw/pci-host/dino.c +++ b/hw/pci-host/dino.c @@ -354,6 +354,10 @@ static AddressSpace *dino_pcihost_set_iommu(PCIBus *bus, void *opaque, return &s->bm_as; } +static const PCIIOMMUOps dino_iommu_ops = { + .get_address_space = dino_pcihost_set_iommu, +}; + /* * Dino interrupts are connected as shown on Page 78, Table 23 * (Little-endian bit numbers) @@ -481,7 +485,7 @@ static void dino_pcihost_init(Object *obj) g_free(name); } - pci_setup_iommu(phb->bus, dino_pcihost_set_iommu, s); + pci_setup_iommu(phb->bus, &dino_iommu_ops, s); sysbus_init_mmio(sbd, &s->this_mem); diff --git a/hw/pci-host/pnv_phb3.c b/hw/pci-host/pnv_phb3.c index c5e58f4086..2a74dbe45f 100644 --- a/hw/pci-host/pnv_phb3.c +++ b/hw/pci-host/pnv_phb3.c @@ -968,6 +968,10 @@ static AddressSpace *pnv_phb3_dma_iommu(PCIBus *bus, void *opaque, int devfn) return &ds->dma_as; } +static PCIIOMMUOps pnv_phb3_iommu_ops = { + .get_address_space = pnv_phb3_dma_iommu, +}; + static void pnv_phb3_instance_init(Object *obj) { PnvPHB3 *phb = PNV_PHB3(obj); @@ -1012,7 +1016,7 @@ void pnv_phb3_bus_init(DeviceState *dev, PnvPHB3 *phb) object_property_set_int(OBJECT(pci->bus), "chip-id", phb->chip_id, &error_abort); - pci_setup_iommu(pci->bus, pnv_phb3_dma_iommu, phb); + pci_setup_iommu(pci->bus, &pnv_phb3_iommu_ops, phb); } static void pnv_phb3_realize(DeviceState *dev, Error **errp) diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c index 29cb11a5d9..37c7afc18c 100644 --- a/hw/pci-host/pnv_phb4.c +++ b/hw/pci-host/pnv_phb4.c @@ -1518,6 +1518,10 @@ static void pnv_phb4_xscom_realize(PnvPHB4 *phb) &phb->phb_regs_mr); } +static PCIIOMMUOps pnv_phb4_iommu_ops = { + .get_address_space = pnv_phb4_dma_iommu, +}; + static void pnv_phb4_instance_init(Object *obj) { PnvPHB4 *phb = PNV_PHB4(obj); @@ -1557,7 +1561,7 @@ void pnv_phb4_bus_init(DeviceState *dev, PnvPHB4 *phb) object_property_set_int(OBJECT(pci->bus), "chip-id", phb->chip_id, &error_abort); - pci_setup_iommu(pci->bus, pnv_phb4_dma_iommu, phb); + pci_setup_iommu(pci->bus, &pnv_phb4_iommu_ops, phb); pci->bus->flags |= PCI_BUS_EXTENDED_CONFIG_SPACE; } diff --git a/hw/pci-host/ppce500.c b/hw/pci-host/ppce500.c index 38814247f2..453a4e6ed3 100644 --- a/hw/pci-host/ppce500.c +++ b/hw/pci-host/ppce500.c @@ -435,6 +435,10 @@ static AddressSpace *e500_pcihost_set_iommu(PCIBus *bus, void *opaque, return &s->bm_as; } +static const PCIIOMMUOps ppce500_iommu_ops = { + .get_address_space = e500_pcihost_set_iommu, +}; + static void e500_pcihost_realize(DeviceState *dev, Error **errp) { SysBusDevice *sbd = SYS_BUS_DEVICE(dev); @@ -469,7 +473,7 @@ static void e500_pcihost_realize(DeviceState *dev, Error **errp) memory_region_init(&s->bm, OBJECT(s), "bm-e500", UINT64_MAX); memory_region_add_subregion(&s->bm, 0x0, &s->busmem); address_space_init(&s->bm_as, &s->bm, "pci-bm"); - pci_setup_iommu(b, e500_pcihost_set_iommu, s); + pci_setup_iommu(b, &ppce500_iommu_ops, s); pci_create_simple(b, 0, "e500-host-bridge"); diff --git a/hw/pci-host/raven.c b/hw/pci-host/raven.c index 9a11ac4b2b..86c3a49087 100644 --- a/hw/pci-host/raven.c +++ b/hw/pci-host/raven.c @@ -223,6 +223,10 @@ static AddressSpace *raven_pcihost_set_iommu(PCIBus *bus, void *opaque, return &s->bm_as; } +static const PCIIOMMUOps raven_iommu_ops = { + .get_address_space = raven_pcihost_set_iommu, +}; + static void raven_change_gpio(void *opaque, int n, int level) { PREPPCIState *s = opaque; @@ -320,7 +324,7 @@ static void raven_pcihost_initfn(Object *obj) memory_region_add_subregion(&s->bm, 0 , &s->bm_pci_memory_alias); memory_region_add_subregion(&s->bm, 0x80000000, &s->bm_ram_alias); address_space_init(&s->bm_as, &s->bm, "raven-bm"); - pci_setup_iommu(&s->pci_bus, raven_pcihost_set_iommu, s); + pci_setup_iommu(&s->pci_bus, &raven_iommu_ops, s); h->bus = &s->pci_bus; diff --git a/hw/pci-host/sabre.c b/hw/pci-host/sabre.c index dcb2e230b6..d0851b48b0 100644 --- a/hw/pci-host/sabre.c +++ b/hw/pci-host/sabre.c @@ -112,6 +112,10 @@ static AddressSpace *sabre_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) return &is->iommu_as; } +static const PCIIOMMUOps sabre_iommu_ops = { + .get_address_space = sabre_pci_dma_iommu, +}; + static void sabre_config_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) { @@ -384,7 +388,7 @@ static void sabre_realize(DeviceState *dev, Error **errp) /* IOMMU */ memory_region_add_subregion_overlap(&s->sabre_config, 0x200, sysbus_mmio_get_region(SYS_BUS_DEVICE(s->iommu), 0), 1); - pci_setup_iommu(phb->bus, sabre_pci_dma_iommu, s->iommu); + pci_setup_iommu(phb->bus, &sabre_iommu_ops, s->iommu); /* APB secondary busses */ pci_dev = pci_new_multifunction(PCI_DEVFN(1, 0), TYPE_SIMBA_PCI_BRIDGE); diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 885c04b6f5..c49417abb2 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -2678,7 +2678,7 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) PCIBus *iommu_bus = bus; uint8_t devfn = dev->devfn; - while (iommu_bus && !iommu_bus->iommu_fn && iommu_bus->parent_dev) { + while (iommu_bus && !iommu_bus->iommu_ops && iommu_bus->parent_dev) { PCIBus *parent_bus = pci_get_bus(iommu_bus->parent_dev); /* @@ -2717,15 +2717,23 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) iommu_bus = parent_bus; } - if (!pci_bus_bypass_iommu(bus) && iommu_bus && iommu_bus->iommu_fn) { - return iommu_bus->iommu_fn(bus, iommu_bus->iommu_opaque, devfn); + if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) { + return iommu_bus->iommu_ops->get_address_space(bus, + iommu_bus->iommu_opaque, devfn); } return &address_space_memory; } -void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque) +void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque) { - bus->iommu_fn = fn; + /* + * If called, pci_setup_iommu() should provide a minimum set of + * useful callbacks for the bus. + */ + assert(ops); + assert(ops->get_address_space); + + bus->iommu_ops = ops; bus->iommu_opaque = opaque; } diff --git a/hw/ppc/ppc440_pcix.c b/hw/ppc/ppc440_pcix.c index 672090de94..df4ee374d0 100644 --- a/hw/ppc/ppc440_pcix.c +++ b/hw/ppc/ppc440_pcix.c @@ -449,6 +449,10 @@ static AddressSpace *ppc440_pcix_set_iommu(PCIBus *b, void *opaque, int devfn) return &s->bm_as; } +static const PCIIOMMUOps ppc440_iommu_ops = { + .get_address_space = ppc440_pcix_set_iommu, +}; + /* * Some guests on sam460ex write all kinds of garbage here such as * missing enable bit and low bits set and still expect this to work @@ -503,7 +507,7 @@ static void ppc440_pcix_realize(DeviceState *dev, Error **errp) memory_region_init(&s->bm, OBJECT(s), "bm-ppc440-pcix", UINT64_MAX); memory_region_add_subregion(&s->bm, 0x0, &s->busmem); address_space_init(&s->bm_as, &s->bm, "pci-bm"); - pci_setup_iommu(h->bus, ppc440_pcix_set_iommu, s); + pci_setup_iommu(h->bus, &ppc440_iommu_ops, s); memory_region_init(&s->container, OBJECT(s), "pci-container", PCI_ALL_SIZE); memory_region_init_io(&h->conf_mem, OBJECT(s), &ppc440_pcix_host_conf_ops, diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 370c5a90f2..a27024e45a 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -780,6 +780,10 @@ static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) return &phb->iommu_as; } +static const PCIIOMMUOps spapr_iommu_ops = { + .get_address_space = spapr_pci_dma_iommu, +}; + static char *spapr_phb_vfio_get_loc_code(SpaprPhbState *sphb, PCIDevice *pdev) { g_autofree char *path = NULL; @@ -1978,7 +1982,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) memory_region_add_subregion(&sphb->iommu_root, SPAPR_PCI_MSI_WINDOW, &sphb->msiwindow); - pci_setup_iommu(bus, spapr_pci_dma_iommu, sphb); + pci_setup_iommu(bus, &spapr_iommu_ops, sphb); pci_bus_set_route_irq_fn(bus, spapr_route_intx_pin_to_irq); diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c index 9016720547..f283f7e38d 100644 --- a/hw/ppc/spapr_pci_vfio.c +++ b/hw/ppc/spapr_pci_vfio.c @@ -18,14 +18,112 @@ */ #include "qemu/osdep.h" +#include <sys/ioctl.h> #include <linux/vfio.h> #include "hw/ppc/spapr.h" #include "hw/pci-host/spapr.h" #include "hw/pci/msix.h" #include "hw/pci/pci_device.h" -#include "hw/vfio/vfio.h" +#include "hw/vfio/vfio-common.h" #include "qemu/error-report.h" +/* + * Interfaces for IBM EEH (Enhanced Error Handling) + */ +static bool vfio_eeh_container_ok(VFIOContainer *container) +{ + /* + * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO + * implementation is broken if there are multiple groups in a + * container. The hardware works in units of Partitionable + * Endpoints (== IOMMU groups) and the EEH operations naively + * iterate across all groups in the container, without any logic + * to make sure the groups have their state synchronized. For + * certain operations (ENABLE) that might be ok, until an error + * occurs, but for others (GET_STATE) it's clearly broken. + */ + + /* + * XXX Once fixed kernels exist, test for them here + */ + + if (QLIST_EMPTY(&container->group_list)) { + return false; + } + + if (QLIST_NEXT(QLIST_FIRST(&container->group_list), container_next)) { + return false; + } + + return true; +} + +static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op) +{ + struct vfio_eeh_pe_op pe_op = { + .argsz = sizeof(pe_op), + .op = op, + }; + int ret; + + if (!vfio_eeh_container_ok(container)) { + error_report("vfio/eeh: EEH_PE_OP 0x%x: " + "kernel requires a container with exactly one group", op); + return -EPERM; + } + + ret = ioctl(container->fd, VFIO_EEH_PE_OP, &pe_op); + if (ret < 0) { + error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op); + return -errno; + } + + return ret; +} + +static VFIOContainer *vfio_eeh_as_container(AddressSpace *as) +{ + VFIOAddressSpace *space = vfio_get_address_space(as); + VFIOContainer *container = NULL; + + if (QLIST_EMPTY(&space->containers)) { + /* No containers to act on */ + goto out; + } + + container = QLIST_FIRST(&space->containers); + + if (QLIST_NEXT(container, next)) { + /* + * We don't yet have logic to synchronize EEH state across + * multiple containers + */ + container = NULL; + goto out; + } + +out: + vfio_put_address_space(space); + return container; +} + +static bool vfio_eeh_as_ok(AddressSpace *as) +{ + VFIOContainer *container = vfio_eeh_as_container(as); + + return (container != NULL) && vfio_eeh_container_ok(container); +} + +static int vfio_eeh_as_op(AddressSpace *as, uint32_t op) +{ + VFIOContainer *container = vfio_eeh_as_container(as); + + if (!container) { + return -ENODEV; + } + return vfio_eeh_container_op(container, op); +} + bool spapr_phb_eeh_available(SpaprPhbState *sphb) { return vfio_eeh_as_ok(&sphb->iommu_as); diff --git a/hw/remote/iommu.c b/hw/remote/iommu.c index 1391dd712c..7c56aad0fc 100644 --- a/hw/remote/iommu.c +++ b/hw/remote/iommu.c @@ -100,6 +100,10 @@ static void remote_iommu_finalize(Object *obj) iommu->elem_by_devfn = NULL; } +static const PCIIOMMUOps remote_iommu_ops = { + .get_address_space = remote_iommu_find_add_as, +}; + void remote_iommu_setup(PCIBus *pci_bus) { RemoteIommu *iommu = NULL; @@ -108,7 +112,7 @@ void remote_iommu_setup(PCIBus *pci_bus) iommu = REMOTE_IOMMU(object_new(TYPE_REMOTE_IOMMU)); - pci_setup_iommu(pci_bus, remote_iommu_find_add_as, iommu); + pci_setup_iommu(pci_bus, &remote_iommu_ops, iommu); object_property_add_child(OBJECT(pci_bus), "remote-iommu", OBJECT(iommu)); diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c index 2ca36f9f3b..347580ebac 100644 --- a/hw/s390x/s390-pci-bus.c +++ b/hw/s390x/s390-pci-bus.c @@ -652,6 +652,10 @@ static AddressSpace *s390_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) return &iommu->as; } +static const PCIIOMMUOps s390_iommu_ops = { + .get_address_space = s390_pci_dma_iommu, +}; + static uint8_t set_ind_atomic(uint64_t ind_loc, uint8_t to_be_set) { uint8_t expected, actual; @@ -839,7 +843,7 @@ static void s390_pcihost_realize(DeviceState *dev, Error **errp) b = pci_register_root_bus(dev, NULL, s390_pci_set_irq, s390_pci_map_irq, NULL, get_system_memory(), get_system_io(), 0, 64, TYPE_PCI_BUS); - pci_setup_iommu(b, s390_pci_dma_iommu, s); + pci_setup_iommu(b, &s390_iommu_ops, s); bus = BUS(b); qbus_set_hotplug_handler(bus, OBJECT(dev)); @@ -1058,7 +1062,7 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, pdev = PCI_DEVICE(dev); pci_bridge_map_irq(pb, dev->id, s390_pci_map_irq); - pci_setup_iommu(&pb->sec_bus, s390_pci_dma_iommu, s); + pci_setup_iommu(&pb->sec_bus, &s390_iommu_ops, s); qbus_set_hotplug_handler(BUS(&pb->sec_bus), OBJECT(s)); diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c index 5f257bffb9..bbf69ff55a 100644 --- a/hw/vfio/ap.c +++ b/hw/vfio/ap.c @@ -14,7 +14,6 @@ #include <linux/vfio.h> #include <sys/ioctl.h> #include "qapi/error.h" -#include "hw/vfio/vfio.h" #include "hw/vfio/vfio-common.h" #include "hw/s390x/ap-device.h" #include "qemu/error-report.h" diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c index 6623ae237b..d857bb8d0f 100644 --- a/hw/vfio/ccw.c +++ b/hw/vfio/ccw.c @@ -20,7 +20,6 @@ #include <sys/ioctl.h> #include "qapi/error.h" -#include "hw/vfio/vfio.h" #include "hw/vfio/vfio-common.h" #include "hw/s390x/s390-ccw.h" #include "hw/s390x/vfio-ccw.h" diff --git a/hw/vfio/common.c b/hw/vfio/common.c index d806057b40..e70fdf5e0c 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -26,7 +26,6 @@ #include <linux/vfio.h> #include "hw/vfio/vfio-common.h" -#include "hw/vfio/vfio.h" #include "hw/vfio/pci.h" #include "exec/address-spaces.h" #include "exec/memory.h" @@ -246,44 +245,6 @@ bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) return true; } -void vfio_host_win_add(VFIOContainer *container, hwaddr min_iova, - hwaddr max_iova, uint64_t iova_pgsizes) -{ - VFIOHostDMAWindow *hostwin; - - QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { - if (ranges_overlap(hostwin->min_iova, - hostwin->max_iova - hostwin->min_iova + 1, - min_iova, - max_iova - min_iova + 1)) { - hw_error("%s: Overlapped IOMMU are not enabled", __func__); - } - } - - hostwin = g_malloc0(sizeof(*hostwin)); - - hostwin->min_iova = min_iova; - hostwin->max_iova = max_iova; - hostwin->iova_pgsizes = iova_pgsizes; - QLIST_INSERT_HEAD(&container->hostwin_list, hostwin, hostwin_next); -} - -int vfio_host_win_del(VFIOContainer *container, - hwaddr min_iova, hwaddr max_iova) -{ - VFIOHostDMAWindow *hostwin; - - QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { - if (hostwin->min_iova == min_iova && hostwin->max_iova == max_iova) { - QLIST_REMOVE(hostwin, hostwin_next); - g_free(hostwin); - return 0; - } - } - - return -1; -} - static bool vfio_listener_skipped_section(MemoryRegionSection *section) { return (!memory_region_is_ram(section->mr) && @@ -532,22 +493,6 @@ static void vfio_unregister_ram_discard_listener(VFIOContainer *container, g_free(vrdl); } -static VFIOHostDMAWindow *vfio_find_hostwin(VFIOContainer *container, - hwaddr iova, hwaddr end) -{ - VFIOHostDMAWindow *hostwin; - bool hostwin_found = false; - - QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { - if (hostwin->min_iova <= iova && end <= hostwin->max_iova) { - hostwin_found = true; - break; - } - } - - return hostwin_found ? hostwin : NULL; -} - static bool vfio_known_safe_misalignment(MemoryRegionSection *section) { MemoryRegion *mr = section->mr; @@ -626,7 +571,6 @@ static void vfio_listener_region_add(MemoryListener *listener, Int128 llend, llsize; void *vaddr; int ret; - VFIOHostDMAWindow *hostwin; Error *err = NULL; if (!vfio_listener_valid_section(section, "region_add")) { @@ -648,13 +592,6 @@ static void vfio_listener_region_add(MemoryListener *listener, goto fail; } - hostwin = vfio_find_hostwin(container, iova, end); - if (!hostwin) { - error_setg(&err, "Container %p can't map guest IOVA region" - " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, container, iova, end); - goto fail; - } - memory_region_ref(section->mr); if (memory_region_is_iommu(section->mr)) { @@ -693,6 +630,15 @@ static void vfio_listener_region_add(MemoryListener *listener, goto fail; } + if (container->iova_ranges) { + ret = memory_region_iommu_set_iova_ranges(giommu->iommu_mr, + container->iova_ranges, &err); + if (ret) { + g_free(giommu); + goto fail; + } + } + ret = memory_region_register_iommu_notifier(section->mr, &giommu->n, &err); if (ret) { @@ -726,7 +672,7 @@ static void vfio_listener_region_add(MemoryListener *listener, llsize = int128_sub(llend, int128_make64(iova)); if (memory_region_is_ram_device(section->mr)) { - hwaddr pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1; + hwaddr pgmask = (1ULL << ctz64(container->pgsizes)) - 1; if ((iova & pgmask) || (int128_get64(llsize) & pgmask)) { trace_vfio_listener_region_add_no_dma_map( @@ -825,12 +771,8 @@ static void vfio_listener_region_del(MemoryListener *listener, if (memory_region_is_ram_device(section->mr)) { hwaddr pgmask; - VFIOHostDMAWindow *hostwin; - - hostwin = vfio_find_hostwin(container, iova, end); - assert(hostwin); /* or region_add() would have failed */ - pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1; + pgmask = (1ULL << ctz64(container->pgsizes)) - 1; try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask)); } else if (memory_region_has_ram_discard_manager(section->mr)) { vfio_unregister_ram_discard_listener(container, section); diff --git a/hw/vfio/container.c b/hw/vfio/container.c index adc467210f..242010036a 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -20,20 +20,15 @@ #include "qemu/osdep.h" #include <sys/ioctl.h> -#ifdef CONFIG_KVM -#include <linux/kvm.h> -#endif #include <linux/vfio.h> #include "hw/vfio/vfio-common.h" -#include "hw/vfio/vfio.h" #include "exec/address-spaces.h" #include "exec/memory.h" #include "exec/ram_addr.h" #include "hw/hw.h" #include "qemu/error-report.h" #include "qemu/range.h" -#include "sysemu/kvm.h" #include "sysemu/reset.h" #include "trace.h" #include "qapi/error.h" @@ -205,92 +200,6 @@ int vfio_dma_map(VFIOContainer *container, hwaddr iova, return -errno; } -int vfio_container_add_section_window(VFIOContainer *container, - MemoryRegionSection *section, - Error **errp) -{ - VFIOHostDMAWindow *hostwin; - hwaddr pgsize = 0; - int ret; - - if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) { - return 0; - } - - /* For now intersections are not allowed, we may relax this later */ - QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { - if (ranges_overlap(hostwin->min_iova, - hostwin->max_iova - hostwin->min_iova + 1, - section->offset_within_address_space, - int128_get64(section->size))) { - error_setg(errp, - "region [0x%"PRIx64",0x%"PRIx64"] overlaps with existing" - "host DMA window [0x%"PRIx64",0x%"PRIx64"]", - section->offset_within_address_space, - section->offset_within_address_space + - int128_get64(section->size) - 1, - hostwin->min_iova, hostwin->max_iova); - return -EINVAL; - } - } - - ret = vfio_spapr_create_window(container, section, &pgsize); - if (ret) { - error_setg_errno(errp, -ret, "Failed to create SPAPR window"); - return ret; - } - - vfio_host_win_add(container, section->offset_within_address_space, - section->offset_within_address_space + - int128_get64(section->size) - 1, pgsize); -#ifdef CONFIG_KVM - if (kvm_enabled()) { - VFIOGroup *group; - IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr); - struct kvm_vfio_spapr_tce param; - struct kvm_device_attr attr = { - .group = KVM_DEV_VFIO_GROUP, - .attr = KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE, - .addr = (uint64_t)(unsigned long)¶m, - }; - - if (!memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_SPAPR_TCE_FD, - ¶m.tablefd)) { - QLIST_FOREACH(group, &container->group_list, container_next) { - param.groupfd = group->fd; - if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) { - error_setg_errno(errp, errno, - "vfio: failed GROUP_SET_SPAPR_TCE for " - "KVM VFIO device %d and group fd %d", - param.tablefd, param.groupfd); - return -errno; - } - trace_vfio_spapr_group_attach(param.groupfd, param.tablefd); - } - } - } -#endif - return 0; -} - -void vfio_container_del_section_window(VFIOContainer *container, - MemoryRegionSection *section) -{ - if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) { - return; - } - - vfio_spapr_remove_window(container, - section->offset_within_address_space); - if (vfio_host_win_del(container, - section->offset_within_address_space, - section->offset_within_address_space + - int128_get64(section->size) - 1) < 0) { - hw_error("%s: Cannot delete missing window at %"HWADDR_PRIx, - __func__, section->offset_within_address_space); - } -} - int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start) { int ret; @@ -355,14 +264,6 @@ int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap, return ret; } -static void vfio_listener_release(VFIOContainer *container) -{ - memory_listener_unregister(&container->listener); - if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { - memory_listener_unregister(&container->prereg_listener); - } -} - static struct vfio_info_cap_header * vfio_get_iommu_type1_info_cap(struct vfio_iommu_type1_info *info, uint16_t id) { @@ -382,7 +283,7 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, /* If the capability cannot be found, assume no DMA limiting */ hdr = vfio_get_iommu_type1_info_cap(info, VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL); - if (hdr == NULL) { + if (!hdr) { return false; } @@ -394,6 +295,32 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, return true; } +static bool vfio_get_info_iova_range(struct vfio_iommu_type1_info *info, + VFIOContainer *container) +{ + struct vfio_info_cap_header *hdr; + struct vfio_iommu_type1_info_cap_iova_range *cap; + + hdr = vfio_get_iommu_type1_info_cap(info, + VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE); + if (!hdr) { + return false; + } + + cap = (void *)hdr; + + for (int i = 0; i < cap->nr_iovas; i++) { + Range *range = g_new(Range, 1); + + range_set_bounds(range, cap->iova_ranges[i].start, + cap->iova_ranges[i].end); + container->iova_ranges = + range_list_insert(container->iova_ranges, range); + } + + return true; +} + static void vfio_kvm_device_add_group(VFIOGroup *group) { Error *err = NULL; @@ -535,6 +462,12 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, } } +static void vfio_free_container(VFIOContainer *container) +{ + g_list_free_full(container->iova_ranges, g_free); + g_free(container); +} + static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, Error **errp) { @@ -616,8 +549,8 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, container->error = NULL; container->dirty_pages_supported = false; container->dma_max_mappings = 0; + container->iova_ranges = NULL; QLIST_INIT(&container->giommu_list); - QLIST_INIT(&container->hostwin_list); QLIST_INIT(&container->vrdl_list); ret = vfio_init_container(container, group->fd, errp); @@ -652,84 +585,21 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, if (!vfio_get_info_dma_avail(info, &container->dma_max_mappings)) { container->dma_max_mappings = 65535; } - vfio_get_iommu_info_migration(container, info); - g_free(info); - /* - * FIXME: We should parse VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE - * information to get the actual window extent rather than assume - * a 64-bit IOVA address space. - */ - vfio_host_win_add(container, 0, (hwaddr)-1, container->pgsizes); + vfio_get_info_iova_range(info, container); + vfio_get_iommu_info_migration(container, info); + g_free(info); break; } case VFIO_SPAPR_TCE_v2_IOMMU: case VFIO_SPAPR_TCE_IOMMU: { - struct vfio_iommu_spapr_tce_info info; - bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU; - - /* - * The host kernel code implementing VFIO_IOMMU_DISABLE is called - * when container fd is closed so we do not call it explicitly - * in this file. - */ - if (!v2) { - ret = ioctl(fd, VFIO_IOMMU_ENABLE); - if (ret) { - error_setg_errno(errp, errno, "failed to enable container"); - ret = -errno; - goto enable_discards_exit; - } - } else { - container->prereg_listener = vfio_prereg_listener; - - memory_listener_register(&container->prereg_listener, - &address_space_memory); - if (container->error) { - memory_listener_unregister(&container->prereg_listener); - ret = -1; - error_propagate_prepend(errp, container->error, - "RAM memory listener initialization failed: "); - goto enable_discards_exit; - } - } - - info.argsz = sizeof(info); - ret = ioctl(fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info); + ret = vfio_spapr_container_init(container, errp); if (ret) { - error_setg_errno(errp, errno, - "VFIO_IOMMU_SPAPR_TCE_GET_INFO failed"); - ret = -errno; - if (v2) { - memory_listener_unregister(&container->prereg_listener); - } goto enable_discards_exit; } - - if (v2) { - container->pgsizes = info.ddw.pgsizes; - /* - * There is a default window in just created container. - * To make region_add/del simpler, we better remove this - * window now and let those iommu_listener callbacks - * create/remove them when needed. - */ - ret = vfio_spapr_remove_window(container, info.dma32_window_start); - if (ret) { - error_setg_errno(errp, -ret, - "failed to remove existing window"); - goto enable_discards_exit; - } - } else { - /* The default table uses 4K pages */ - container->pgsizes = 0x1000; - vfio_host_win_add(container, info.dma32_window_start, - info.dma32_window_start + - info.dma32_window_size - 1, - 0x1000); - } + break; } } @@ -759,13 +629,17 @@ listener_release_exit: QLIST_REMOVE(group, container_next); QLIST_REMOVE(container, next); vfio_kvm_device_del_group(group); - vfio_listener_release(container); + memory_listener_unregister(&container->listener); + if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || + container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { + vfio_spapr_container_deinit(container); + } enable_discards_exit: vfio_ram_block_discard_disable(container, false); free_container_exit: - g_free(container); + vfio_free_container(container); close_fd_exit: close(fd); @@ -789,7 +663,11 @@ static void vfio_disconnect_container(VFIOGroup *group) * group. */ if (QLIST_EMPTY(&container->group_list)) { - vfio_listener_release(container); + memory_listener_unregister(&container->listener); + if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || + container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { + vfio_spapr_container_deinit(container); + } } if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) { @@ -800,7 +678,6 @@ static void vfio_disconnect_container(VFIOGroup *group) if (QLIST_EMPTY(&container->group_list)) { VFIOAddressSpace *space = container->space; VFIOGuestIOMMU *giommu, *tmp; - VFIOHostDMAWindow *hostwin, *next; QLIST_REMOVE(container, next); @@ -811,15 +688,9 @@ static void vfio_disconnect_container(VFIOGroup *group) g_free(giommu); } - QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next, - next) { - QLIST_REMOVE(hostwin, hostwin_next); - g_free(hostwin); - } - trace_vfio_disconnect_container(container->fd); close(container->fd); - g_free(container); + vfio_free_container(container); vfio_put_address_space(space); } @@ -975,103 +846,6 @@ static void vfio_put_base_device(VFIODevice *vbasedev) close(vbasedev->fd); } -/* - * Interfaces for IBM EEH (Enhanced Error Handling) - */ -static bool vfio_eeh_container_ok(VFIOContainer *container) -{ - /* - * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO - * implementation is broken if there are multiple groups in a - * container. The hardware works in units of Partitionable - * Endpoints (== IOMMU groups) and the EEH operations naively - * iterate across all groups in the container, without any logic - * to make sure the groups have their state synchronized. For - * certain operations (ENABLE) that might be ok, until an error - * occurs, but for others (GET_STATE) it's clearly broken. - */ - - /* - * XXX Once fixed kernels exist, test for them here - */ - - if (QLIST_EMPTY(&container->group_list)) { - return false; - } - - if (QLIST_NEXT(QLIST_FIRST(&container->group_list), container_next)) { - return false; - } - - return true; -} - -static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op) -{ - struct vfio_eeh_pe_op pe_op = { - .argsz = sizeof(pe_op), - .op = op, - }; - int ret; - - if (!vfio_eeh_container_ok(container)) { - error_report("vfio/eeh: EEH_PE_OP 0x%x: " - "kernel requires a container with exactly one group", op); - return -EPERM; - } - - ret = ioctl(container->fd, VFIO_EEH_PE_OP, &pe_op); - if (ret < 0) { - error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op); - return -errno; - } - - return ret; -} - -static VFIOContainer *vfio_eeh_as_container(AddressSpace *as) -{ - VFIOAddressSpace *space = vfio_get_address_space(as); - VFIOContainer *container = NULL; - - if (QLIST_EMPTY(&space->containers)) { - /* No containers to act on */ - goto out; - } - - container = QLIST_FIRST(&space->containers); - - if (QLIST_NEXT(container, next)) { - /* - * We don't yet have logic to synchronize EEH state across - * multiple containers - */ - container = NULL; - goto out; - } - -out: - vfio_put_address_space(space); - return container; -} - -bool vfio_eeh_as_ok(AddressSpace *as) -{ - VFIOContainer *container = vfio_eeh_as_container(as); - - return (container != NULL) && vfio_eeh_container_ok(container); -} - -int vfio_eeh_as_op(AddressSpace *as, uint32_t op) -{ - VFIOContainer *container = vfio_eeh_as_container(as); - - if (!container) { - return -ENODEV; - } - return vfio_eeh_container_op(container, op); -} - static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp) { char *tmp, group_path[PATH_MAX], *group_name; diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c index 7e5da21b31..168847e7c5 100644 --- a/hw/vfio/helpers.c +++ b/hw/vfio/helpers.c @@ -23,7 +23,6 @@ #include <sys/ioctl.h> #include "hw/vfio/vfio-common.h" -#include "hw/vfio/vfio.h" #include "hw/hw.h" #include "trace.h" #include "qapi/error.h" diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index b27011cee7..c62c02f7b6 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -3081,7 +3081,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) struct stat st; int i, ret; bool is_mdev; - char uuid[UUID_FMT_LEN]; + char uuid[UUID_STR_LEN]; char *name; if (!vbasedev->sysfsdev) { diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c index 9ec1e95f6d..83da2f7ec2 100644 --- a/hw/vfio/spapr.c +++ b/hw/vfio/spapr.c @@ -11,6 +11,11 @@ #include "qemu/osdep.h" #include <sys/ioctl.h> #include <linux/vfio.h> +#ifdef CONFIG_KVM +#include <linux/kvm.h> +#endif +#include "sysemu/kvm.h" +#include "exec/address-spaces.h" #include "hw/vfio/vfio-common.h" #include "hw/hw.h" @@ -135,15 +140,90 @@ static void vfio_prereg_listener_region_del(MemoryListener *listener, trace_vfio_prereg_unregister(reg.vaddr, reg.size, ret ? -errno : 0); } -const MemoryListener vfio_prereg_listener = { +static const MemoryListener vfio_prereg_listener = { .name = "vfio-pre-reg", .region_add = vfio_prereg_listener_region_add, .region_del = vfio_prereg_listener_region_del, }; -int vfio_spapr_create_window(VFIOContainer *container, - MemoryRegionSection *section, - hwaddr *pgsize) +static void vfio_host_win_add(VFIOContainer *container, hwaddr min_iova, + hwaddr max_iova, uint64_t iova_pgsizes) +{ + VFIOHostDMAWindow *hostwin; + + QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { + if (ranges_overlap(hostwin->min_iova, + hostwin->max_iova - hostwin->min_iova + 1, + min_iova, + max_iova - min_iova + 1)) { + hw_error("%s: Overlapped IOMMU are not enabled", __func__); + } + } + + hostwin = g_malloc0(sizeof(*hostwin)); + + hostwin->min_iova = min_iova; + hostwin->max_iova = max_iova; + hostwin->iova_pgsizes = iova_pgsizes; + QLIST_INSERT_HEAD(&container->hostwin_list, hostwin, hostwin_next); +} + +static int vfio_host_win_del(VFIOContainer *container, + hwaddr min_iova, hwaddr max_iova) +{ + VFIOHostDMAWindow *hostwin; + + QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { + if (hostwin->min_iova == min_iova && hostwin->max_iova == max_iova) { + QLIST_REMOVE(hostwin, hostwin_next); + g_free(hostwin); + return 0; + } + } + + return -1; +} + +static VFIOHostDMAWindow *vfio_find_hostwin(VFIOContainer *container, + hwaddr iova, hwaddr end) +{ + VFIOHostDMAWindow *hostwin; + bool hostwin_found = false; + + QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { + if (hostwin->min_iova <= iova && end <= hostwin->max_iova) { + hostwin_found = true; + break; + } + } + + return hostwin_found ? hostwin : NULL; +} + +static int vfio_spapr_remove_window(VFIOContainer *container, + hwaddr offset_within_address_space) +{ + struct vfio_iommu_spapr_tce_remove remove = { + .argsz = sizeof(remove), + .start_addr = offset_within_address_space, + }; + int ret; + + ret = ioctl(container->fd, VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove); + if (ret) { + error_report("Failed to remove window at %"PRIx64, + (uint64_t)remove.start_addr); + return -errno; + } + + trace_vfio_spapr_remove_window(offset_within_address_space); + + return 0; +} + +static int vfio_spapr_create_window(VFIOContainer *container, + MemoryRegionSection *section, + hwaddr *pgsize) { int ret = 0; IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr); @@ -233,23 +313,195 @@ int vfio_spapr_create_window(VFIOContainer *container, return 0; } -int vfio_spapr_remove_window(VFIOContainer *container, - hwaddr offset_within_address_space) +int vfio_container_add_section_window(VFIOContainer *container, + MemoryRegionSection *section, + Error **errp) { - struct vfio_iommu_spapr_tce_remove remove = { - .argsz = sizeof(remove), - .start_addr = offset_within_address_space, - }; + VFIOHostDMAWindow *hostwin; + hwaddr pgsize = 0; int ret; - ret = ioctl(container->fd, VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove); + /* + * VFIO_SPAPR_TCE_IOMMU supports a single host window between + * [dma32_window_start, dma32_window_size), we need to ensure + * the section fall in this range. + */ + if (container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { + hwaddr iova, end; + + iova = section->offset_within_address_space; + end = iova + int128_get64(section->size) - 1; + + if (!vfio_find_hostwin(container, iova, end)) { + error_setg(errp, "Container %p can't map guest IOVA region" + " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, container, + iova, end); + return -EINVAL; + } + return 0; + } + + if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) { + return 0; + } + + /* For now intersections are not allowed, we may relax this later */ + QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { + if (ranges_overlap(hostwin->min_iova, + hostwin->max_iova - hostwin->min_iova + 1, + section->offset_within_address_space, + int128_get64(section->size))) { + error_setg(errp, + "region [0x%"PRIx64",0x%"PRIx64"] overlaps with existing" + "host DMA window [0x%"PRIx64",0x%"PRIx64"]", + section->offset_within_address_space, + section->offset_within_address_space + + int128_get64(section->size) - 1, + hostwin->min_iova, hostwin->max_iova); + return -EINVAL; + } + } + + ret = vfio_spapr_create_window(container, section, &pgsize); if (ret) { - error_report("Failed to remove window at %"PRIx64, - (uint64_t)remove.start_addr); - return -errno; + error_setg_errno(errp, -ret, "Failed to create SPAPR window"); + return ret; } - trace_vfio_spapr_remove_window(offset_within_address_space); + vfio_host_win_add(container, section->offset_within_address_space, + section->offset_within_address_space + + int128_get64(section->size) - 1, pgsize); +#ifdef CONFIG_KVM + if (kvm_enabled()) { + VFIOGroup *group; + IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr); + struct kvm_vfio_spapr_tce param; + struct kvm_device_attr attr = { + .group = KVM_DEV_VFIO_GROUP, + .attr = KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE, + .addr = (uint64_t)(unsigned long)¶m, + }; + + if (!memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_SPAPR_TCE_FD, + ¶m.tablefd)) { + QLIST_FOREACH(group, &container->group_list, container_next) { + param.groupfd = group->fd; + if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) { + error_setg_errno(errp, errno, + "vfio: failed GROUP_SET_SPAPR_TCE for " + "KVM VFIO device %d and group fd %d", + param.tablefd, param.groupfd); + return -errno; + } + trace_vfio_spapr_group_attach(param.groupfd, param.tablefd); + } + } + } +#endif + return 0; +} + +void vfio_container_del_section_window(VFIOContainer *container, + MemoryRegionSection *section) +{ + if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) { + return; + } + + vfio_spapr_remove_window(container, + section->offset_within_address_space); + if (vfio_host_win_del(container, + section->offset_within_address_space, + section->offset_within_address_space + + int128_get64(section->size) - 1) < 0) { + hw_error("%s: Cannot delete missing window at %"HWADDR_PRIx, + __func__, section->offset_within_address_space); + } +} + +int vfio_spapr_container_init(VFIOContainer *container, Error **errp) +{ + struct vfio_iommu_spapr_tce_info info; + bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU; + int ret, fd = container->fd; + + QLIST_INIT(&container->hostwin_list); + + /* + * The host kernel code implementing VFIO_IOMMU_DISABLE is called + * when container fd is closed so we do not call it explicitly + * in this file. + */ + if (!v2) { + ret = ioctl(fd, VFIO_IOMMU_ENABLE); + if (ret) { + error_setg_errno(errp, errno, "failed to enable container"); + return -errno; + } + } else { + container->prereg_listener = vfio_prereg_listener; + + memory_listener_register(&container->prereg_listener, + &address_space_memory); + if (container->error) { + ret = -1; + error_propagate_prepend(errp, container->error, + "RAM memory listener initialization failed: "); + goto listener_unregister_exit; + } + } + + info.argsz = sizeof(info); + ret = ioctl(fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info); + if (ret) { + error_setg_errno(errp, errno, + "VFIO_IOMMU_SPAPR_TCE_GET_INFO failed"); + ret = -errno; + goto listener_unregister_exit; + } + + if (v2) { + container->pgsizes = info.ddw.pgsizes; + /* + * There is a default window in just created container. + * To make region_add/del simpler, we better remove this + * window now and let those iommu_listener callbacks + * create/remove them when needed. + */ + ret = vfio_spapr_remove_window(container, info.dma32_window_start); + if (ret) { + error_setg_errno(errp, -ret, + "failed to remove existing window"); + goto listener_unregister_exit; + } + } else { + /* The default table uses 4K pages */ + container->pgsizes = 0x1000; + vfio_host_win_add(container, info.dma32_window_start, + info.dma32_window_start + + info.dma32_window_size - 1, + 0x1000); + } return 0; + +listener_unregister_exit: + if (v2) { + memory_listener_unregister(&container->prereg_listener); + } + return ret; +} + +void vfio_spapr_container_deinit(VFIOContainer *container) +{ + VFIOHostDMAWindow *hostwin, *next; + + if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { + memory_listener_unregister(&container->prereg_listener); + } + QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next, + next) { + QLIST_REMOVE(hostwin, hostwin_next); + g_free(hostwin); + } } diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index 0af7a2886c..637cac4edf 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -135,6 +135,7 @@ virtio_iommu_notify_flag_add(const char *name) "add notifier to mr %s" virtio_iommu_notify_flag_del(const char *name) "del notifier from mr %s" virtio_iommu_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)" virtio_iommu_freeze_granule(uint64_t page_size_mask) "granule set to 0x%"PRIx64 +virtio_iommu_host_resv_regions(const char *name, uint32_t index, uint64_t lob, uint64_t upb) "mr=%s host-resv-reg[%d] = [0x%"PRIx64",0x%"PRIx64"]" # virtio-mem.c virtio_mem_send_response(uint16_t type) "type=%" PRIu16 diff --git a/hw/virtio/virtio-iommu-pci.c b/hw/virtio/virtio-iommu-pci.c index 7ef2f9dcdb..9459fbf6ed 100644 --- a/hw/virtio/virtio-iommu-pci.c +++ b/hw/virtio/virtio-iommu-pci.c @@ -37,7 +37,7 @@ struct VirtIOIOMMUPCI { static Property virtio_iommu_pci_properties[] = { DEFINE_PROP_UINT32("class", VirtIOPCIProxy, class_code, 0), DEFINE_PROP_ARRAY("reserved-regions", VirtIOIOMMUPCI, - vdev.nb_reserved_regions, vdev.reserved_regions, + vdev.nr_prop_resv_regions, vdev.prop_resv_regions, qdev_prop_reserved_region, ReservedRegion), DEFINE_PROP_END_OF_LIST(), }; @@ -54,9 +54,9 @@ static void virtio_iommu_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) "for the virtio-iommu-pci device"); return; } - for (int i = 0; i < s->nb_reserved_regions; i++) { - if (s->reserved_regions[i].type != VIRTIO_IOMMU_RESV_MEM_T_RESERVED && - s->reserved_regions[i].type != VIRTIO_IOMMU_RESV_MEM_T_MSI) { + for (int i = 0; i < s->nr_prop_resv_regions; i++) { + if (s->prop_resv_regions[i].type != VIRTIO_IOMMU_RESV_MEM_T_RESERVED && + s->prop_resv_regions[i].type != VIRTIO_IOMMU_RESV_MEM_T_MSI) { error_setg(errp, "reserved region %d has an invalid type", i); error_append_hint(errp, "Valid values are 0 and 1\n"); return; diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index be51635895..89fb5767d1 100644 --- a/hw/virtio/virtio-iommu.c +++ b/hw/virtio/virtio-iommu.c @@ -20,12 +20,15 @@ #include "qemu/osdep.h" #include "qemu/log.h" #include "qemu/iov.h" +#include "qemu/range.h" +#include "qemu/reserved-region.h" #include "exec/target_page.h" #include "hw/qdev-properties.h" #include "hw/virtio/virtio.h" #include "sysemu/kvm.h" #include "sysemu/reset.h" #include "sysemu/sysemu.h" +#include "qemu/reserved-region.h" #include "qapi/error.h" #include "qemu/error-report.h" #include "trace.h" @@ -378,6 +381,19 @@ static void virtio_iommu_put_domain(gpointer data) g_free(domain); } +static void add_prop_resv_regions(IOMMUDevice *sdev) +{ + VirtIOIOMMU *s = sdev->viommu; + int i; + + for (i = 0; i < s->nr_prop_resv_regions; i++) { + ReservedRegion *reg = g_new0(ReservedRegion, 1); + + *reg = s->prop_resv_regions[i]; + sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg); + } +} + static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque, int devfn) { @@ -408,6 +424,7 @@ static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque, memory_region_init(&sdev->root, OBJECT(s), name, UINT64_MAX); address_space_init(&sdev->as, &sdev->root, TYPE_VIRTIO_IOMMU); + add_prop_resv_regions(sdev); /* * Build the IOMMU disabled container with aliases to the @@ -444,6 +461,10 @@ static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque, return &sdev->as; } +static const PCIIOMMUOps virtio_iommu_ops = { + .get_address_space = virtio_iommu_find_add_as, +}; + static int virtio_iommu_attach(VirtIOIOMMU *s, struct virtio_iommu_req_attach *req) { @@ -624,29 +645,30 @@ static int virtio_iommu_unmap(VirtIOIOMMU *s, return ret; } -static ssize_t virtio_iommu_fill_resv_mem_prop(VirtIOIOMMU *s, uint32_t ep, +static ssize_t virtio_iommu_fill_resv_mem_prop(IOMMUDevice *sdev, uint32_t ep, uint8_t *buf, size_t free) { struct virtio_iommu_probe_resv_mem prop = {}; size_t size = sizeof(prop), length = size - sizeof(prop.head), total; - int i; - - total = size * s->nb_reserved_regions; + GList *l; + total = size * g_list_length(sdev->resv_regions); if (total > free) { return -ENOSPC; } - for (i = 0; i < s->nb_reserved_regions; i++) { - unsigned subtype = s->reserved_regions[i].type; + for (l = sdev->resv_regions; l; l = l->next) { + ReservedRegion *reg = l->data; + unsigned subtype = reg->type; + Range *range = ®->range; assert(subtype == VIRTIO_IOMMU_RESV_MEM_T_RESERVED || subtype == VIRTIO_IOMMU_RESV_MEM_T_MSI); prop.head.type = cpu_to_le16(VIRTIO_IOMMU_PROBE_T_RESV_MEM); prop.head.length = cpu_to_le16(length); prop.subtype = subtype; - prop.start = cpu_to_le64(s->reserved_regions[i].low); - prop.end = cpu_to_le64(s->reserved_regions[i].high); + prop.start = cpu_to_le64(range_lob(range)); + prop.end = cpu_to_le64(range_upb(range)); memcpy(buf, &prop, size); @@ -666,19 +688,27 @@ static int virtio_iommu_probe(VirtIOIOMMU *s, uint8_t *buf) { uint32_t ep_id = le32_to_cpu(req->endpoint); + IOMMUMemoryRegion *iommu_mr = virtio_iommu_mr(s, ep_id); size_t free = VIOMMU_PROBE_SIZE; + IOMMUDevice *sdev; ssize_t count; - if (!virtio_iommu_mr(s, ep_id)) { + if (!iommu_mr) { return VIRTIO_IOMMU_S_NOENT; } - count = virtio_iommu_fill_resv_mem_prop(s, ep_id, buf, free); + sdev = container_of(iommu_mr, IOMMUDevice, iommu_mr); + if (!sdev) { + return -EINVAL; + } + + count = virtio_iommu_fill_resv_mem_prop(sdev, ep_id, buf, free); if (count < 0) { return VIRTIO_IOMMU_S_INVAL; } buf += count; free -= count; + sdev->probe_done = true; return VIRTIO_IOMMU_S_OK; } @@ -856,7 +886,7 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, bool bypass_allowed; int granule; bool found; - int i; + GList *l; interval.low = addr; interval.high = addr + 1; @@ -894,10 +924,10 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, goto unlock; } - for (i = 0; i < s->nb_reserved_regions; i++) { - ReservedRegion *reg = &s->reserved_regions[i]; + for (l = sdev->resv_regions; l; l = l->next) { + ReservedRegion *reg = l->data; - if (addr >= reg->low && addr <= reg->high) { + if (range_contains(®->range, addr)) { switch (reg->type) { case VIRTIO_IOMMU_RESV_MEM_T_MSI: entry.perm = flag; @@ -1131,6 +1161,106 @@ static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, return 0; } +/** + * rebuild_resv_regions: rebuild resv regions with both the + * info of host resv ranges and property set resv ranges + */ +static int rebuild_resv_regions(IOMMUDevice *sdev) +{ + GList *l; + int i = 0; + + /* free the existing list and rebuild it from scratch */ + g_list_free_full(sdev->resv_regions, g_free); + sdev->resv_regions = NULL; + + /* First add host reserved regions if any, all tagged as RESERVED */ + for (l = sdev->host_resv_ranges; l; l = l->next) { + ReservedRegion *reg = g_new0(ReservedRegion, 1); + Range *r = (Range *)l->data; + + reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED; + range_set_bounds(®->range, range_lob(r), range_upb(r)); + sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg); + trace_virtio_iommu_host_resv_regions(sdev->iommu_mr.parent_obj.name, i, + range_lob(®->range), + range_upb(®->range)); + i++; + } + /* + * then add higher priority reserved regions set by the machine + * through properties + */ + add_prop_resv_regions(sdev); + return 0; +} + +/** + * virtio_iommu_set_iova_ranges: Conveys the usable IOVA ranges + * + * The function turns those into reserved ranges. Once some + * reserved ranges have been set, new reserved regions cannot be + * added outside of the original ones. + * + * @mr: IOMMU MR + * @iova_ranges: list of usable IOVA ranges + * @errp: error handle + */ +static int virtio_iommu_set_iova_ranges(IOMMUMemoryRegion *mr, + GList *iova_ranges, + Error **errp) +{ + IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); + GList *current_ranges = sdev->host_resv_ranges; + GList *l, *tmp, *new_ranges = NULL; + int ret = -EINVAL; + + /* check that each new resv region is included in an existing one */ + if (sdev->host_resv_ranges) { + range_inverse_array(iova_ranges, + &new_ranges, + 0, UINT64_MAX); + + for (tmp = new_ranges; tmp; tmp = tmp->next) { + Range *newr = (Range *)tmp->data; + bool included = false; + + for (l = current_ranges; l; l = l->next) { + Range * r = (Range *)l->data; + + if (range_contains_range(r, newr)) { + included = true; + break; + } + } + if (!included) { + goto error; + } + } + /* all new reserved ranges are included in existing ones */ + ret = 0; + goto out; + } + + if (sdev->probe_done) { + warn_report("%s: Notified about new host reserved regions after probe", + mr->parent_obj.name); + } + + range_inverse_array(iova_ranges, + &sdev->host_resv_ranges, + 0, UINT64_MAX); + rebuild_resv_regions(sdev); + + return 0; +error: + error_setg(errp, "IOMMU mr=%s Conflicting host reserved ranges set!", + mr->parent_obj.name); +out: + g_list_free_full(new_ranges, g_free); + return ret; +} + static void virtio_iommu_system_reset(void *opaque) { VirtIOIOMMU *s = opaque; @@ -1206,7 +1336,7 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free); if (s->primary_bus) { - pci_setup_iommu(s->primary_bus, virtio_iommu_find_add_as, s); + pci_setup_iommu(s->primary_bus, &virtio_iommu_ops, s); } else { error_setg(errp, "VIRTIO-IOMMU is not attached to any PCI bus!"); } @@ -1426,6 +1556,7 @@ static void virtio_iommu_memory_region_class_init(ObjectClass *klass, imrc->replay = virtio_iommu_replay; imrc->notify_flag_changed = virtio_iommu_notify_flag_changed; imrc->iommu_set_page_size_mask = virtio_iommu_set_page_size_mask; + imrc->iommu_set_iova_ranges = virtio_iommu_set_iova_ranges; } static const TypeInfo virtio_iommu_info = { |