diff options
Diffstat (limited to 'hw')
| -rw-r--r-- | hw/i386/acpi-build.c | 127 | ||||
| -rw-r--r-- | hw/misc/grlib_ahb_apb_pnp.c | 12 | ||||
| -rw-r--r-- | hw/pci/pci.c | 43 | ||||
| -rw-r--r-- | hw/rdma/rdma_backend.c | 13 | ||||
| -rw-r--r-- | hw/rdma/rdma_backend.h | 5 | ||||
| -rw-r--r-- | hw/rdma/rdma_rm.c | 5 | ||||
| -rw-r--r-- | hw/rdma/vmw/pvrdma_main.c | 6 | ||||
| -rw-r--r-- | hw/virtio/virtio-bus.c | 4 | ||||
| -rw-r--r-- | hw/virtio/virtio.c | 9 |
9 files changed, 213 insertions, 11 deletions
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 266d9b534b..12ff55fcfb 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2517,12 +2517,105 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker) */ #define IOAPIC_SB_DEVID (uint64_t)PCI_BUILD_BDF(0, PCI_DEVFN(0x14, 0)) +/* + * Insert IVHD entry for device and recurse, insert alias, or insert range as + * necessary for the PCI topology. + */ +static void +insert_ivhd(PCIBus *bus, PCIDevice *dev, void *opaque) +{ + GArray *table_data = opaque; + uint32_t entry; + + /* "Select" IVHD entry, type 0x2 */ + entry = PCI_BUILD_BDF(pci_bus_num(bus), dev->devfn) << 8 | 0x2; + build_append_int_noprefix(table_data, entry, 4); + + if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) { + PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev)); + uint8_t sec = pci_bus_num(sec_bus); + uint8_t sub = dev->config[PCI_SUBORDINATE_BUS]; + + if (pci_bus_is_express(sec_bus)) { + /* + * Walk the bus if there are subordinates, otherwise use a range + * to cover an entire leaf bus. We could potentially also use a + * range for traversed buses, but we'd need to take care not to + * create both Select and Range entries covering the same device. + * This is easier and potentially more compact. + * + * An example bare metal system seems to use Select entries for + * root ports without a slot (ie. built-ins) and Range entries + * when there is a slot. The same system also only hard-codes + * the alias range for an onboard PCIe-to-PCI bridge, apparently + * making no effort to support nested bridges. We attempt to + * be more thorough here. + */ + if (sec == sub) { /* leaf bus */ + /* "Start of Range" IVHD entry, type 0x3 */ + entry = PCI_BUILD_BDF(sec, PCI_DEVFN(0, 0)) << 8 | 0x3; + build_append_int_noprefix(table_data, entry, 4); + /* "End of Range" IVHD entry, type 0x4 */ + entry = PCI_BUILD_BDF(sub, PCI_DEVFN(31, 7)) << 8 | 0x4; + build_append_int_noprefix(table_data, entry, 4); + } else { + pci_for_each_device(sec_bus, sec, insert_ivhd, table_data); + } + } else { + /* + * If the secondary bus is conventional, then we need to create an + * Alias range for everything downstream. The range covers the + * first devfn on the secondary bus to the last devfn on the + * subordinate bus. The alias target depends on legacy versus + * express bridges, just as in pci_device_iommu_address_space(). + * DeviceIDa vs DeviceIDb as per the AMD IOMMU spec. + */ + uint16_t dev_id_a, dev_id_b; + + dev_id_a = PCI_BUILD_BDF(sec, PCI_DEVFN(0, 0)); + + if (pci_is_express(dev) && + pcie_cap_get_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE) { + dev_id_b = dev_id_a; + } else { + dev_id_b = PCI_BUILD_BDF(pci_bus_num(bus), dev->devfn); + } + + /* "Alias Start of Range" IVHD entry, type 0x43, 8 bytes */ + build_append_int_noprefix(table_data, dev_id_a << 8 | 0x43, 4); + build_append_int_noprefix(table_data, dev_id_b << 8 | 0x0, 4); + + /* "End of Range" IVHD entry, type 0x4 */ + entry = PCI_BUILD_BDF(sub, PCI_DEVFN(31, 7)) << 8 | 0x4; + build_append_int_noprefix(table_data, entry, 4); + } + } +} + +/* For all PCI host bridges, walk and insert IVHD entries */ +static int +ivrs_host_bridges(Object *obj, void *opaque) +{ + GArray *ivhd_blob = opaque; + + if (object_dynamic_cast(obj, TYPE_PCI_HOST_BRIDGE)) { + PCIBus *bus = PCI_HOST_BRIDGE(obj)->bus; + + if (bus) { + pci_for_each_device(bus, pci_bus_num(bus), insert_ivhd, ivhd_blob); + } + } + + return 0; +} + static void build_amd_iommu(GArray *table_data, BIOSLinker *linker) { - int ivhd_table_len = 28; + int ivhd_table_len = 24; int iommu_start = table_data->len; AMDVIState *s = AMD_IOMMU_DEVICE(x86_iommu_get_default()); + GArray *ivhd_blob = g_array_new(false, true, 1); /* IVRS header */ acpi_data_push(table_data, sizeof(AcpiTableHeader)); @@ -2544,12 +2637,34 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker) 1); /* + * A PCI bus walk, for each PCI host bridge, is necessary to create a + * complete set of IVHD entries. Do this into a separate blob so that we + * can calculate the total IVRS table length here and then append the new + * blob further below. Fall back to an entry covering all devices, which + * is sufficient when no aliases are present. + */ + object_child_foreach_recursive(object_get_root(), + ivrs_host_bridges, ivhd_blob); + + if (!ivhd_blob->len) { + /* + * Type 1 device entry reporting all devices + * These are 4-byte device entries currently reporting the range of + * Refer to Spec - Table 95:IVHD Device Entry Type Codes(4-byte) + */ + build_append_int_noprefix(ivhd_blob, 0x0000001, 4); + } + + ivhd_table_len += ivhd_blob->len; + + /* * When interrupt remapping is supported, we add a special IVHD device * for type IO-APIC. */ if (x86_iommu_ir_supported(x86_iommu_get_default())) { ivhd_table_len += 8; } + /* IVHD length */ build_append_int_noprefix(table_data, ivhd_table_len, 2); /* DeviceID */ @@ -2569,12 +2684,10 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker) (1UL << 2) | /* GTSup */ (1UL << 6), /* GASup */ 4); - /* - * Type 1 device entry reporting all devices - * These are 4-byte device entries currently reporting the range of - * Refer to Spec - Table 95:IVHD Device Entry Type Codes(4-byte) - */ - build_append_int_noprefix(table_data, 0x0000001, 4); + + /* IVHD entries as found above */ + g_array_append_vals(table_data, ivhd_blob->data, ivhd_blob->len); + g_array_free(ivhd_blob, TRUE); /* * Add a special IVHD device type. diff --git a/hw/misc/grlib_ahb_apb_pnp.c b/hw/misc/grlib_ahb_apb_pnp.c index 7338461694..e230e25363 100644 --- a/hw/misc/grlib_ahb_apb_pnp.c +++ b/hw/misc/grlib_ahb_apb_pnp.c @@ -22,6 +22,7 @@ */ #include "qemu/osdep.h" +#include "qemu/log.h" #include "hw/sysbus.h" #include "hw/misc/grlib_ahb_apb_pnp.h" @@ -231,9 +232,20 @@ static uint64_t grlib_apb_pnp_read(void *opaque, hwaddr offset, unsigned size) return apb_pnp->regs[offset >> 2]; } +static void grlib_apb_pnp_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + qemu_log_mask(LOG_UNIMP, "%s not implemented\n", __func__); +} + static const MemoryRegionOps grlib_apb_pnp_ops = { .read = grlib_apb_pnp_read, + .write = grlib_apb_pnp_write, .endianness = DEVICE_BIG_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, }; static void grlib_apb_pnp_realize(DeviceState *dev, Error **errp) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index c68498c0de..cbc7a32568 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -2646,12 +2646,49 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) { PCIBus *bus = pci_get_bus(dev); PCIBus *iommu_bus = bus; + uint8_t devfn = dev->devfn; - while(iommu_bus && !iommu_bus->iommu_fn && iommu_bus->parent_dev) { - iommu_bus = pci_get_bus(iommu_bus->parent_dev); + while (iommu_bus && !iommu_bus->iommu_fn && iommu_bus->parent_dev) { + PCIBus *parent_bus = pci_get_bus(iommu_bus->parent_dev); + + /* + * The requester ID of the provided device may be aliased, as seen from + * the IOMMU, due to topology limitations. The IOMMU relies on a + * requester ID to provide a unique AddressSpace for devices, but + * conventional PCI buses pre-date such concepts. Instead, the PCIe- + * to-PCI bridge creates and accepts transactions on behalf of down- + * stream devices. When doing so, all downstream devices are masked + * (aliased) behind a single requester ID. The requester ID used + * depends on the format of the bridge devices. Proper PCIe-to-PCI + * bridges, with a PCIe capability indicating such, follow the + * guidelines of chapter 2.3 of the PCIe-to-PCI/X bridge specification, + * where the bridge uses the seconary bus as the bridge portion of the + * requester ID and devfn of 00.0. For other bridges, typically those + * found on the root complex such as the dmi-to-pci-bridge, we follow + * the convention of typical bare-metal hardware, which uses the + * requester ID of the bridge itself. There are device specific + * exceptions to these rules, but these are the defaults that the + * Linux kernel uses when determining DMA aliases itself and believed + * to be true for the bare metal equivalents of the devices emulated + * in QEMU. + */ + if (!pci_bus_is_express(iommu_bus)) { + PCIDevice *parent = iommu_bus->parent_dev; + + if (pci_is_express(parent) && + pcie_cap_get_type(parent) == PCI_EXP_TYPE_PCI_BRIDGE) { + devfn = PCI_DEVFN(0, 0); + bus = iommu_bus; + } else { + devfn = parent->devfn; + bus = parent_bus; + } + } + + iommu_bus = parent_bus; } if (iommu_bus && iommu_bus->iommu_fn) { - return iommu_bus->iommu_fn(bus, iommu_bus->iommu_opaque, dev->devfn); + return iommu_bus->iommu_fn(bus, iommu_bus->iommu_opaque, devfn); } return &address_space_memory; } diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c index c39051068d..c346407cd3 100644 --- a/hw/rdma/rdma_backend.c +++ b/hw/rdma/rdma_backend.c @@ -391,7 +391,11 @@ static int build_host_sge_array(RdmaDeviceResources *rdma_dev_res, return VENDOR_ERR_INVLKEY | ssge[ssge_idx].lkey; } +#ifdef LEGACY_RDMA_REG_MR dsge->addr = (uintptr_t)mr->virt + ssge[ssge_idx].addr - mr->start; +#else + dsge->addr = ssge[ssge_idx].addr; +#endif dsge->length = ssge[ssge_idx].length; dsge->lkey = rdma_backend_mr_lkey(&mr->backend_mr); @@ -735,10 +739,19 @@ void rdma_backend_destroy_pd(RdmaBackendPD *pd) } } +#ifdef LEGACY_RDMA_REG_MR int rdma_backend_create_mr(RdmaBackendMR *mr, RdmaBackendPD *pd, void *addr, size_t length, int access) +#else +int rdma_backend_create_mr(RdmaBackendMR *mr, RdmaBackendPD *pd, void *addr, + size_t length, uint64_t guest_start, int access) +#endif { +#ifdef LEGACY_RDMA_REG_MR mr->ibmr = ibv_reg_mr(pd->ibpd, addr, length, access); +#else + mr->ibmr = ibv_reg_mr_iova(pd->ibpd, addr, length, guest_start, access); +#endif if (!mr->ibmr) { rdma_error_report("ibv_reg_mr fail, errno=%d", errno); return -EIO; diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h index 7c1a19a2b5..127f96e2d5 100644 --- a/hw/rdma/rdma_backend.h +++ b/hw/rdma/rdma_backend.h @@ -78,8 +78,13 @@ int rdma_backend_query_port(RdmaBackendDev *backend_dev, int rdma_backend_create_pd(RdmaBackendDev *backend_dev, RdmaBackendPD *pd); void rdma_backend_destroy_pd(RdmaBackendPD *pd); +#ifdef LEGACY_RDMA_REG_MR int rdma_backend_create_mr(RdmaBackendMR *mr, RdmaBackendPD *pd, void *addr, size_t length, int access); +#else +int rdma_backend_create_mr(RdmaBackendMR *mr, RdmaBackendPD *pd, void *addr, + size_t length, uint64_t guest_start, int access); +#endif void rdma_backend_destroy_mr(RdmaBackendMR *mr); int rdma_backend_create_cq(RdmaBackendDev *backend_dev, RdmaBackendCQ *cq, diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c index 1927f85472..1524dfaeaa 100644 --- a/hw/rdma/rdma_rm.c +++ b/hw/rdma/rdma_rm.c @@ -227,8 +227,13 @@ int rdma_rm_alloc_mr(RdmaDeviceResources *dev_res, uint32_t pd_handle, mr->length = guest_length; mr->virt += (mr->start & (TARGET_PAGE_SIZE - 1)); +#ifdef LEGACY_RDMA_REG_MR ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, mr->virt, mr->length, access_flags); +#else + ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, mr->virt, + mr->length, guest_start, access_flags); +#endif if (ret) { ret = -EIO; goto out_dealloc_mr; diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c index 3722d9e772..6f0fc405c7 100644 --- a/hw/rdma/vmw/pvrdma_main.c +++ b/hw/rdma/vmw/pvrdma_main.c @@ -664,6 +664,12 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp) dev->shutdown_notifier.notify = pvrdma_shutdown_notifier; qemu_register_shutdown_notifier(&dev->shutdown_notifier); +#ifdef LEGACY_RDMA_REG_MR + rdma_info_report("Using legacy reg_mr"); +#else + rdma_info_report("Using iova reg_mr"); +#endif + out: if (rc) { pvrdma_fini(pdev); diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c index b2c804292e..d6332d45c3 100644 --- a/hw/virtio/virtio-bus.c +++ b/hw/virtio/virtio-bus.c @@ -288,6 +288,10 @@ int virtio_bus_set_host_notifier(VirtioBusState *bus, int n, bool assign) k->ioeventfd_assign(proxy, notifier, n, false); } + if (r == 0) { + virtio_queue_set_host_notifier_enabled(vq, assign); + } + return r; } diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 762df12f4c..04716b5f6c 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -128,6 +128,7 @@ struct VirtQueue VirtIODevice *vdev; EventNotifier guest_notifier; EventNotifier host_notifier; + bool host_notifier_enabled; QLIST_ENTRY(VirtQueue) node; }; @@ -2271,7 +2272,7 @@ void virtio_queue_notify(VirtIODevice *vdev, int n) } trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); - if (vq->handle_aio_output) { + if (vq->host_notifier_enabled) { event_notifier_set(&vq->host_notifier); } else if (vq->handle_output) { vq->handle_output(vdev, vq); @@ -3145,6 +3146,7 @@ void virtio_init(VirtIODevice *vdev, const char *name, vdev->vq[i].vector = VIRTIO_NO_VECTOR; vdev->vq[i].vdev = vdev; vdev->vq[i].queue_index = i; + vdev->vq[i].host_notifier_enabled = false; } vdev->name = name; @@ -3436,6 +3438,11 @@ EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq) return &vq->host_notifier; } +void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled) +{ + vq->host_notifier_enabled = enabled; +} + int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n, MemoryRegion *mr, bool assign) { |