From e4a8ae09c538880440ba866174b0015f147c8c9e Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 19 Oct 2023 15:45:09 +0200 Subject: vfio: Collect container iova range info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Collect iova range information if VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE capability is supported. This allows to propagate the information though the IOMMU MR set_iova_ranges() callback so that virtual IOMMUs get aware of those aperture constraints. This is only done if the info is available and the number of iova ranges is greater than 0. A new vfio_get_info_iova_range helper is introduced matching the coding style of existing vfio_get_info_dma_avail. The boolean returned value isn't used though. Code is aligned between both. Signed-off-by: Eric Auger Reviewed-by: Alex Williamson Tested-by: Yanghang Liu Signed-off-by: Cédric Le Goater --- hw/vfio/container.c | 42 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) (limited to 'hw/vfio/container.c') diff --git a/hw/vfio/container.c b/hw/vfio/container.c index adc467210f..fc88222377 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -382,7 +382,7 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, /* If the capability cannot be found, assume no DMA limiting */ hdr = vfio_get_iommu_type1_info_cap(info, VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL); - if (hdr == NULL) { + if (!hdr) { return false; } @@ -394,6 +394,32 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, return true; } +static bool vfio_get_info_iova_range(struct vfio_iommu_type1_info *info, + VFIOContainer *container) +{ + struct vfio_info_cap_header *hdr; + struct vfio_iommu_type1_info_cap_iova_range *cap; + + hdr = vfio_get_iommu_type1_info_cap(info, + VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE); + if (!hdr) { + return false; + } + + cap = (void *)hdr; + + for (int i = 0; i < cap->nr_iovas; i++) { + Range *range = g_new(Range, 1); + + range_set_bounds(range, cap->iova_ranges[i].start, + cap->iova_ranges[i].end); + container->iova_ranges = + range_list_insert(container->iova_ranges, range); + } + + return true; +} + static void vfio_kvm_device_add_group(VFIOGroup *group) { Error *err = NULL; @@ -535,6 +561,12 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, } } +static void vfio_free_container(VFIOContainer *container) +{ + g_list_free_full(container->iova_ranges, g_free); + g_free(container); +} + static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, Error **errp) { @@ -616,6 +648,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, container->error = NULL; container->dirty_pages_supported = false; container->dma_max_mappings = 0; + container->iova_ranges = NULL; QLIST_INIT(&container->giommu_list); QLIST_INIT(&container->hostwin_list); QLIST_INIT(&container->vrdl_list); @@ -652,6 +685,9 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, if (!vfio_get_info_dma_avail(info, &container->dma_max_mappings)) { container->dma_max_mappings = 65535; } + + vfio_get_info_iova_range(info, container); + vfio_get_iommu_info_migration(container, info); g_free(info); @@ -765,7 +801,7 @@ enable_discards_exit: vfio_ram_block_discard_disable(container, false); free_container_exit: - g_free(container); + vfio_free_container(container); close_fd_exit: close(fd); @@ -819,7 +855,7 @@ static void vfio_disconnect_container(VFIOGroup *group) trace_vfio_disconnect_container(container->fd); close(container->fd); - g_free(container); + vfio_free_container(container); vfio_put_address_space(space); } -- cgit 1.4.1 From 54876d25fe298e0761556253f1c07af6160d5c10 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 2 Nov 2023 15:12:22 +0800 Subject: vfio/container: Move IBM EEH related functions into spapr_pci_vfio.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With vfio_eeh_as_ok/vfio_eeh_as_op moved and made static, vfio.h becomes empty and is deleted. No functional changes intended. Suggested-by: Cédric Le Goater Signed-off-by: Zhenzhong Duan Acked-by: Eric Farman Reviewed-by: Cédric Le Goater Signed-off-by: Cédric Le Goater --- hw/ppc/spapr_pci_vfio.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++- hw/vfio/ap.c | 1 - hw/vfio/ccw.c | 1 - hw/vfio/common.c | 1 - hw/vfio/container.c | 98 ----------------------------------------------- hw/vfio/helpers.c | 1 - include/hw/vfio/vfio.h | 7 ---- 7 files changed, 99 insertions(+), 110 deletions(-) delete mode 100644 include/hw/vfio/vfio.h (limited to 'hw/vfio/container.c') diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c index 9016720547..f283f7e38d 100644 --- a/hw/ppc/spapr_pci_vfio.c +++ b/hw/ppc/spapr_pci_vfio.c @@ -18,14 +18,112 @@ */ #include "qemu/osdep.h" +#include #include #include "hw/ppc/spapr.h" #include "hw/pci-host/spapr.h" #include "hw/pci/msix.h" #include "hw/pci/pci_device.h" -#include "hw/vfio/vfio.h" +#include "hw/vfio/vfio-common.h" #include "qemu/error-report.h" +/* + * Interfaces for IBM EEH (Enhanced Error Handling) + */ +static bool vfio_eeh_container_ok(VFIOContainer *container) +{ + /* + * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO + * implementation is broken if there are multiple groups in a + * container. The hardware works in units of Partitionable + * Endpoints (== IOMMU groups) and the EEH operations naively + * iterate across all groups in the container, without any logic + * to make sure the groups have their state synchronized. For + * certain operations (ENABLE) that might be ok, until an error + * occurs, but for others (GET_STATE) it's clearly broken. + */ + + /* + * XXX Once fixed kernels exist, test for them here + */ + + if (QLIST_EMPTY(&container->group_list)) { + return false; + } + + if (QLIST_NEXT(QLIST_FIRST(&container->group_list), container_next)) { + return false; + } + + return true; +} + +static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op) +{ + struct vfio_eeh_pe_op pe_op = { + .argsz = sizeof(pe_op), + .op = op, + }; + int ret; + + if (!vfio_eeh_container_ok(container)) { + error_report("vfio/eeh: EEH_PE_OP 0x%x: " + "kernel requires a container with exactly one group", op); + return -EPERM; + } + + ret = ioctl(container->fd, VFIO_EEH_PE_OP, &pe_op); + if (ret < 0) { + error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op); + return -errno; + } + + return ret; +} + +static VFIOContainer *vfio_eeh_as_container(AddressSpace *as) +{ + VFIOAddressSpace *space = vfio_get_address_space(as); + VFIOContainer *container = NULL; + + if (QLIST_EMPTY(&space->containers)) { + /* No containers to act on */ + goto out; + } + + container = QLIST_FIRST(&space->containers); + + if (QLIST_NEXT(container, next)) { + /* + * We don't yet have logic to synchronize EEH state across + * multiple containers + */ + container = NULL; + goto out; + } + +out: + vfio_put_address_space(space); + return container; +} + +static bool vfio_eeh_as_ok(AddressSpace *as) +{ + VFIOContainer *container = vfio_eeh_as_container(as); + + return (container != NULL) && vfio_eeh_container_ok(container); +} + +static int vfio_eeh_as_op(AddressSpace *as, uint32_t op) +{ + VFIOContainer *container = vfio_eeh_as_container(as); + + if (!container) { + return -ENODEV; + } + return vfio_eeh_container_op(container, op); +} + bool spapr_phb_eeh_available(SpaprPhbState *sphb) { return vfio_eeh_as_ok(&sphb->iommu_as); diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c index 5f257bffb9..bbf69ff55a 100644 --- a/hw/vfio/ap.c +++ b/hw/vfio/ap.c @@ -14,7 +14,6 @@ #include #include #include "qapi/error.h" -#include "hw/vfio/vfio.h" #include "hw/vfio/vfio-common.h" #include "hw/s390x/ap-device.h" #include "qemu/error-report.h" diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c index 6623ae237b..d857bb8d0f 100644 --- a/hw/vfio/ccw.c +++ b/hw/vfio/ccw.c @@ -20,7 +20,6 @@ #include #include "qapi/error.h" -#include "hw/vfio/vfio.h" #include "hw/vfio/vfio-common.h" #include "hw/s390x/s390-ccw.h" #include "hw/s390x/vfio-ccw.h" diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 9c5c6433f2..e72055e752 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -26,7 +26,6 @@ #include #include "hw/vfio/vfio-common.h" -#include "hw/vfio/vfio.h" #include "hw/vfio/pci.h" #include "exec/address-spaces.h" #include "exec/memory.h" diff --git a/hw/vfio/container.c b/hw/vfio/container.c index fc88222377..83c0f05bba 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -26,7 +26,6 @@ #include #include "hw/vfio/vfio-common.h" -#include "hw/vfio/vfio.h" #include "exec/address-spaces.h" #include "exec/memory.h" #include "exec/ram_addr.h" @@ -1011,103 +1010,6 @@ static void vfio_put_base_device(VFIODevice *vbasedev) close(vbasedev->fd); } -/* - * Interfaces for IBM EEH (Enhanced Error Handling) - */ -static bool vfio_eeh_container_ok(VFIOContainer *container) -{ - /* - * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO - * implementation is broken if there are multiple groups in a - * container. The hardware works in units of Partitionable - * Endpoints (== IOMMU groups) and the EEH operations naively - * iterate across all groups in the container, without any logic - * to make sure the groups have their state synchronized. For - * certain operations (ENABLE) that might be ok, until an error - * occurs, but for others (GET_STATE) it's clearly broken. - */ - - /* - * XXX Once fixed kernels exist, test for them here - */ - - if (QLIST_EMPTY(&container->group_list)) { - return false; - } - - if (QLIST_NEXT(QLIST_FIRST(&container->group_list), container_next)) { - return false; - } - - return true; -} - -static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op) -{ - struct vfio_eeh_pe_op pe_op = { - .argsz = sizeof(pe_op), - .op = op, - }; - int ret; - - if (!vfio_eeh_container_ok(container)) { - error_report("vfio/eeh: EEH_PE_OP 0x%x: " - "kernel requires a container with exactly one group", op); - return -EPERM; - } - - ret = ioctl(container->fd, VFIO_EEH_PE_OP, &pe_op); - if (ret < 0) { - error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op); - return -errno; - } - - return ret; -} - -static VFIOContainer *vfio_eeh_as_container(AddressSpace *as) -{ - VFIOAddressSpace *space = vfio_get_address_space(as); - VFIOContainer *container = NULL; - - if (QLIST_EMPTY(&space->containers)) { - /* No containers to act on */ - goto out; - } - - container = QLIST_FIRST(&space->containers); - - if (QLIST_NEXT(container, next)) { - /* - * We don't yet have logic to synchronize EEH state across - * multiple containers - */ - container = NULL; - goto out; - } - -out: - vfio_put_address_space(space); - return container; -} - -bool vfio_eeh_as_ok(AddressSpace *as) -{ - VFIOContainer *container = vfio_eeh_as_container(as); - - return (container != NULL) && vfio_eeh_container_ok(container); -} - -int vfio_eeh_as_op(AddressSpace *as, uint32_t op) -{ - VFIOContainer *container = vfio_eeh_as_container(as); - - if (!container) { - return -ENODEV; - } - return vfio_eeh_container_op(container, op); -} - static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp) { char *tmp, group_path[PATH_MAX], *group_name; diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c index 7e5da21b31..168847e7c5 100644 --- a/hw/vfio/helpers.c +++ b/hw/vfio/helpers.c @@ -23,7 +23,6 @@ #include #include "hw/vfio/vfio-common.h" -#include "hw/vfio/vfio.h" #include "hw/hw.h" #include "trace.h" #include "qapi/error.h" diff --git a/include/hw/vfio/vfio.h b/include/hw/vfio/vfio.h deleted file mode 100644 index 86248f5436..0000000000 --- a/include/hw/vfio/vfio.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef HW_VFIO_H -#define HW_VFIO_H - -bool vfio_eeh_as_ok(AddressSpace *as); -int vfio_eeh_as_op(AddressSpace *as, uint32_t op); - -#endif -- cgit 1.4.1 From 521c8f4ebc5923576436228211f8947f306b9d82 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 2 Nov 2023 15:12:23 +0800 Subject: vfio/container: Move vfio_container_add/del_section_window into spapr.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vfio_container_add/del_section_window are spapr specific functions, so move them into spapr.c to make container.c cleaner. No functional changes intended. Suggested-by: Cédric Le Goater Signed-off-by: Zhenzhong Duan Reviewed-by: Cédric Le Goater Signed-off-by: Cédric Le Goater --- hw/vfio/container.c | 90 ----------------------------------------------------- hw/vfio/spapr.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 90 deletions(-) (limited to 'hw/vfio/container.c') diff --git a/hw/vfio/container.c b/hw/vfio/container.c index 83c0f05bba..7a3f005d1b 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -20,9 +20,6 @@ #include "qemu/osdep.h" #include -#ifdef CONFIG_KVM -#include -#endif #include #include "hw/vfio/vfio-common.h" @@ -32,7 +29,6 @@ #include "hw/hw.h" #include "qemu/error-report.h" #include "qemu/range.h" -#include "sysemu/kvm.h" #include "sysemu/reset.h" #include "trace.h" #include "qapi/error.h" @@ -204,92 +200,6 @@ int vfio_dma_map(VFIOContainer *container, hwaddr iova, return -errno; } -int vfio_container_add_section_window(VFIOContainer *container, - MemoryRegionSection *section, - Error **errp) -{ - VFIOHostDMAWindow *hostwin; - hwaddr pgsize = 0; - int ret; - - if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) { - return 0; - } - - /* For now intersections are not allowed, we may relax this later */ - QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { - if (ranges_overlap(hostwin->min_iova, - hostwin->max_iova - hostwin->min_iova + 1, - section->offset_within_address_space, - int128_get64(section->size))) { - error_setg(errp, - "region [0x%"PRIx64",0x%"PRIx64"] overlaps with existing" - "host DMA window [0x%"PRIx64",0x%"PRIx64"]", - section->offset_within_address_space, - section->offset_within_address_space + - int128_get64(section->size) - 1, - hostwin->min_iova, hostwin->max_iova); - return -EINVAL; - } - } - - ret = vfio_spapr_create_window(container, section, &pgsize); - if (ret) { - error_setg_errno(errp, -ret, "Failed to create SPAPR window"); - return ret; - } - - vfio_host_win_add(container, section->offset_within_address_space, - section->offset_within_address_space + - int128_get64(section->size) - 1, pgsize); -#ifdef CONFIG_KVM - if (kvm_enabled()) { - VFIOGroup *group; - IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr); - struct kvm_vfio_spapr_tce param; - struct kvm_device_attr attr = { - .group = KVM_DEV_VFIO_GROUP, - .attr = KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE, - .addr = (uint64_t)(unsigned long)¶m, - }; - - if (!memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_SPAPR_TCE_FD, - ¶m.tablefd)) { - QLIST_FOREACH(group, &container->group_list, container_next) { - param.groupfd = group->fd; - if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) { - error_setg_errno(errp, errno, - "vfio: failed GROUP_SET_SPAPR_TCE for " - "KVM VFIO device %d and group fd %d", - param.tablefd, param.groupfd); - return -errno; - } - trace_vfio_spapr_group_attach(param.groupfd, param.tablefd); - } - } - } -#endif - return 0; -} - -void vfio_container_del_section_window(VFIOContainer *container, - MemoryRegionSection *section) -{ - if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) { - return; - } - - vfio_spapr_remove_window(container, - section->offset_within_address_space); - if (vfio_host_win_del(container, - section->offset_within_address_space, - section->offset_within_address_space + - int128_get64(section->size) - 1) < 0) { - hw_error("%s: Cannot delete missing window at %"HWADDR_PRIx, - __func__, section->offset_within_address_space); - } -} - int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start) { int ret; diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c index 9ec1e95f6d..9a7517c042 100644 --- a/hw/vfio/spapr.c +++ b/hw/vfio/spapr.c @@ -11,6 +11,10 @@ #include "qemu/osdep.h" #include #include +#ifdef CONFIG_KVM +#include +#endif +#include "sysemu/kvm.h" #include "hw/vfio/vfio-common.h" #include "hw/hw.h" @@ -253,3 +257,89 @@ int vfio_spapr_remove_window(VFIOContainer *container, return 0; } + +int vfio_container_add_section_window(VFIOContainer *container, + MemoryRegionSection *section, + Error **errp) +{ + VFIOHostDMAWindow *hostwin; + hwaddr pgsize = 0; + int ret; + + if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) { + return 0; + } + + /* For now intersections are not allowed, we may relax this later */ + QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { + if (ranges_overlap(hostwin->min_iova, + hostwin->max_iova - hostwin->min_iova + 1, + section->offset_within_address_space, + int128_get64(section->size))) { + error_setg(errp, + "region [0x%"PRIx64",0x%"PRIx64"] overlaps with existing" + "host DMA window [0x%"PRIx64",0x%"PRIx64"]", + section->offset_within_address_space, + section->offset_within_address_space + + int128_get64(section->size) - 1, + hostwin->min_iova, hostwin->max_iova); + return -EINVAL; + } + } + + ret = vfio_spapr_create_window(container, section, &pgsize); + if (ret) { + error_setg_errno(errp, -ret, "Failed to create SPAPR window"); + return ret; + } + + vfio_host_win_add(container, section->offset_within_address_space, + section->offset_within_address_space + + int128_get64(section->size) - 1, pgsize); +#ifdef CONFIG_KVM + if (kvm_enabled()) { + VFIOGroup *group; + IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr); + struct kvm_vfio_spapr_tce param; + struct kvm_device_attr attr = { + .group = KVM_DEV_VFIO_GROUP, + .attr = KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE, + .addr = (uint64_t)(unsigned long)¶m, + }; + + if (!memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_SPAPR_TCE_FD, + ¶m.tablefd)) { + QLIST_FOREACH(group, &container->group_list, container_next) { + param.groupfd = group->fd; + if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) { + error_setg_errno(errp, errno, + "vfio: failed GROUP_SET_SPAPR_TCE for " + "KVM VFIO device %d and group fd %d", + param.tablefd, param.groupfd); + return -errno; + } + trace_vfio_spapr_group_attach(param.groupfd, param.tablefd); + } + } + } +#endif + return 0; +} + +void vfio_container_del_section_window(VFIOContainer *container, + MemoryRegionSection *section) +{ + if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) { + return; + } + + vfio_spapr_remove_window(container, + section->offset_within_address_space); + if (vfio_host_win_del(container, + section->offset_within_address_space, + section->offset_within_address_space + + int128_get64(section->size) - 1) < 0) { + hw_error("%s: Cannot delete missing window at %"HWADDR_PRIx, + __func__, section->offset_within_address_space); + } +} -- cgit 1.4.1 From 770c3b6e431e5160b81f889ccb76c07dcfeb5da5 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 2 Nov 2023 15:12:24 +0800 Subject: vfio/container: Move spapr specific init/deinit into spapr.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move spapr specific init/deinit code into spapr.c and wrap them with vfio_spapr_container_init/deinit, this way footprint of spapr is further reduced, vfio_prereg_listener could also be made static. vfio_listener_release is unnecessary when prereg_listener is moved out, so have it removed. No functional changes intended. Suggested-by: Cédric Le Goater Signed-off-by: Zhenzhong Duan Reviewed-by: Cédric Le Goater Signed-off-by: Cédric Le Goater --- hw/vfio/container.c | 82 +++++++------------------------------------ hw/vfio/spapr.c | 81 +++++++++++++++++++++++++++++++++++++++++- include/hw/vfio/vfio-common.h | 4 ++- 3 files changed, 95 insertions(+), 72 deletions(-) (limited to 'hw/vfio/container.c') diff --git a/hw/vfio/container.c b/hw/vfio/container.c index 7a3f005d1b..204b244b11 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -264,14 +264,6 @@ int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap, return ret; } -static void vfio_listener_release(VFIOContainer *container) -{ - memory_listener_unregister(&container->listener); - if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { - memory_listener_unregister(&container->prereg_listener); - } -} - static struct vfio_info_cap_header * vfio_get_iommu_type1_info_cap(struct vfio_iommu_type1_info *info, uint16_t id) { @@ -612,69 +604,11 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, case VFIO_SPAPR_TCE_v2_IOMMU: case VFIO_SPAPR_TCE_IOMMU: { - struct vfio_iommu_spapr_tce_info info; - bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU; - - /* - * The host kernel code implementing VFIO_IOMMU_DISABLE is called - * when container fd is closed so we do not call it explicitly - * in this file. - */ - if (!v2) { - ret = ioctl(fd, VFIO_IOMMU_ENABLE); - if (ret) { - error_setg_errno(errp, errno, "failed to enable container"); - ret = -errno; - goto enable_discards_exit; - } - } else { - container->prereg_listener = vfio_prereg_listener; - - memory_listener_register(&container->prereg_listener, - &address_space_memory); - if (container->error) { - memory_listener_unregister(&container->prereg_listener); - ret = -1; - error_propagate_prepend(errp, container->error, - "RAM memory listener initialization failed: "); - goto enable_discards_exit; - } - } - - info.argsz = sizeof(info); - ret = ioctl(fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info); + ret = vfio_spapr_container_init(container, errp); if (ret) { - error_setg_errno(errp, errno, - "VFIO_IOMMU_SPAPR_TCE_GET_INFO failed"); - ret = -errno; - if (v2) { - memory_listener_unregister(&container->prereg_listener); - } goto enable_discards_exit; } - - if (v2) { - container->pgsizes = info.ddw.pgsizes; - /* - * There is a default window in just created container. - * To make region_add/del simpler, we better remove this - * window now and let those iommu_listener callbacks - * create/remove them when needed. - */ - ret = vfio_spapr_remove_window(container, info.dma32_window_start); - if (ret) { - error_setg_errno(errp, -ret, - "failed to remove existing window"); - goto enable_discards_exit; - } - } else { - /* The default table uses 4K pages */ - container->pgsizes = 0x1000; - vfio_host_win_add(container, info.dma32_window_start, - info.dma32_window_start + - info.dma32_window_size - 1, - 0x1000); - } + break; } } @@ -704,7 +638,11 @@ listener_release_exit: QLIST_REMOVE(group, container_next); QLIST_REMOVE(container, next); vfio_kvm_device_del_group(group); - vfio_listener_release(container); + memory_listener_unregister(&container->listener); + if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || + container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { + vfio_spapr_container_deinit(container); + } enable_discards_exit: vfio_ram_block_discard_disable(container, false); @@ -734,7 +672,11 @@ static void vfio_disconnect_container(VFIOGroup *group) * group. */ if (QLIST_EMPTY(&container->group_list)) { - vfio_listener_release(container); + memory_listener_unregister(&container->listener); + if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || + container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { + vfio_spapr_container_deinit(container); + } } if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) { diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c index 9a7517c042..00dbd7af11 100644 --- a/hw/vfio/spapr.c +++ b/hw/vfio/spapr.c @@ -15,6 +15,7 @@ #include #endif #include "sysemu/kvm.h" +#include "exec/address-spaces.h" #include "hw/vfio/vfio-common.h" #include "hw/hw.h" @@ -139,7 +140,7 @@ static void vfio_prereg_listener_region_del(MemoryListener *listener, trace_vfio_prereg_unregister(reg.vaddr, reg.size, ret ? -errno : 0); } -const MemoryListener vfio_prereg_listener = { +static const MemoryListener vfio_prereg_listener = { .name = "vfio-pre-reg", .region_add = vfio_prereg_listener_region_add, .region_del = vfio_prereg_listener_region_del, @@ -343,3 +344,81 @@ void vfio_container_del_section_window(VFIOContainer *container, __func__, section->offset_within_address_space); } } + +int vfio_spapr_container_init(VFIOContainer *container, Error **errp) +{ + struct vfio_iommu_spapr_tce_info info; + bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU; + int ret, fd = container->fd; + + /* + * The host kernel code implementing VFIO_IOMMU_DISABLE is called + * when container fd is closed so we do not call it explicitly + * in this file. + */ + if (!v2) { + ret = ioctl(fd, VFIO_IOMMU_ENABLE); + if (ret) { + error_setg_errno(errp, errno, "failed to enable container"); + return -errno; + } + } else { + container->prereg_listener = vfio_prereg_listener; + + memory_listener_register(&container->prereg_listener, + &address_space_memory); + if (container->error) { + ret = -1; + error_propagate_prepend(errp, container->error, + "RAM memory listener initialization failed: "); + goto listener_unregister_exit; + } + } + + info.argsz = sizeof(info); + ret = ioctl(fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info); + if (ret) { + error_setg_errno(errp, errno, + "VFIO_IOMMU_SPAPR_TCE_GET_INFO failed"); + ret = -errno; + goto listener_unregister_exit; + } + + if (v2) { + container->pgsizes = info.ddw.pgsizes; + /* + * There is a default window in just created container. + * To make region_add/del simpler, we better remove this + * window now and let those iommu_listener callbacks + * create/remove them when needed. + */ + ret = vfio_spapr_remove_window(container, info.dma32_window_start); + if (ret) { + error_setg_errno(errp, -ret, + "failed to remove existing window"); + goto listener_unregister_exit; + } + } else { + /* The default table uses 4K pages */ + container->pgsizes = 0x1000; + vfio_host_win_add(container, info.dma32_window_start, + info.dma32_window_start + + info.dma32_window_size - 1, + 0x1000); + } + + return 0; + +listener_unregister_exit: + if (v2) { + memory_listener_unregister(&container->prereg_listener); + } + return ret; +} + +void vfio_spapr_container_deinit(VFIOContainer *container) +{ + if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { + memory_listener_unregister(&container->prereg_listener); + } +} diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 0c3d390e8b..ed5a8e4754 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -225,11 +225,14 @@ int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start); int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap, hwaddr iova, hwaddr size); +/* SPAPR specific */ int vfio_container_add_section_window(VFIOContainer *container, MemoryRegionSection *section, Error **errp); void vfio_container_del_section_window(VFIOContainer *container, MemoryRegionSection *section); +int vfio_spapr_container_init(VFIOContainer *container, Error **errp); +void vfio_spapr_container_deinit(VFIOContainer *container); void vfio_disable_irqindex(VFIODevice *vbasedev, int index); void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index); @@ -289,7 +292,6 @@ vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id); struct vfio_info_cap_header * vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id); #endif -extern const MemoryListener vfio_prereg_listener; int vfio_spapr_create_window(VFIOContainer *container, MemoryRegionSection *section, -- cgit 1.4.1 From a2347c60a86a7c2a227ebab186a195d16e1e3901 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 2 Nov 2023 15:12:26 +0800 Subject: vfio/common: Move vfio_host_win_add/del into spapr.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only spapr supports a customed host window list, other vfio driver assume 64bit host window. So remove the check in listener callback and move vfio_host_win_add/del into spapr.c and make it static. With the check removed, we still need to do the same check for VFIO_SPAPR_TCE_IOMMU which allows a single host window range [dma32_window_start, dma32_window_size). Move vfio_find_hostwin into spapr.c and do same check in vfio_container_add_section_window instead. When mapping a ram device section, if it's unaligned with hostwin->iova_pgsizes, this mapping is bypassed. With hostwin moved into spapr, we changed to check container->pgsizes. Suggested-by: Alex Williamson Signed-off-by: Zhenzhong Duan Reviewed-by: Cédric Le Goater Signed-off-by: Cédric Le Goater --- hw/vfio/common.c | 70 ++---------------------------------- hw/vfio/container.c | 16 --------- hw/vfio/spapr.c | 83 +++++++++++++++++++++++++++++++++++++++++++ include/hw/vfio/vfio-common.h | 5 --- 4 files changed, 85 insertions(+), 89 deletions(-) (limited to 'hw/vfio/container.c') diff --git a/hw/vfio/common.c b/hw/vfio/common.c index e72055e752..e70fdf5e0c 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -245,44 +245,6 @@ bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) return true; } -void vfio_host_win_add(VFIOContainer *container, hwaddr min_iova, - hwaddr max_iova, uint64_t iova_pgsizes) -{ - VFIOHostDMAWindow *hostwin; - - QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { - if (ranges_overlap(hostwin->min_iova, - hostwin->max_iova - hostwin->min_iova + 1, - min_iova, - max_iova - min_iova + 1)) { - hw_error("%s: Overlapped IOMMU are not enabled", __func__); - } - } - - hostwin = g_malloc0(sizeof(*hostwin)); - - hostwin->min_iova = min_iova; - hostwin->max_iova = max_iova; - hostwin->iova_pgsizes = iova_pgsizes; - QLIST_INSERT_HEAD(&container->hostwin_list, hostwin, hostwin_next); -} - -int vfio_host_win_del(VFIOContainer *container, - hwaddr min_iova, hwaddr max_iova) -{ - VFIOHostDMAWindow *hostwin; - - QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { - if (hostwin->min_iova == min_iova && hostwin->max_iova == max_iova) { - QLIST_REMOVE(hostwin, hostwin_next); - g_free(hostwin); - return 0; - } - } - - return -1; -} - static bool vfio_listener_skipped_section(MemoryRegionSection *section) { return (!memory_region_is_ram(section->mr) && @@ -531,22 +493,6 @@ static void vfio_unregister_ram_discard_listener(VFIOContainer *container, g_free(vrdl); } -static VFIOHostDMAWindow *vfio_find_hostwin(VFIOContainer *container, - hwaddr iova, hwaddr end) -{ - VFIOHostDMAWindow *hostwin; - bool hostwin_found = false; - - QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { - if (hostwin->min_iova <= iova && end <= hostwin->max_iova) { - hostwin_found = true; - break; - } - } - - return hostwin_found ? hostwin : NULL; -} - static bool vfio_known_safe_misalignment(MemoryRegionSection *section) { MemoryRegion *mr = section->mr; @@ -625,7 +571,6 @@ static void vfio_listener_region_add(MemoryListener *listener, Int128 llend, llsize; void *vaddr; int ret; - VFIOHostDMAWindow *hostwin; Error *err = NULL; if (!vfio_listener_valid_section(section, "region_add")) { @@ -647,13 +592,6 @@ static void vfio_listener_region_add(MemoryListener *listener, goto fail; } - hostwin = vfio_find_hostwin(container, iova, end); - if (!hostwin) { - error_setg(&err, "Container %p can't map guest IOVA region" - " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, container, iova, end); - goto fail; - } - memory_region_ref(section->mr); if (memory_region_is_iommu(section->mr)) { @@ -734,7 +672,7 @@ static void vfio_listener_region_add(MemoryListener *listener, llsize = int128_sub(llend, int128_make64(iova)); if (memory_region_is_ram_device(section->mr)) { - hwaddr pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1; + hwaddr pgmask = (1ULL << ctz64(container->pgsizes)) - 1; if ((iova & pgmask) || (int128_get64(llsize) & pgmask)) { trace_vfio_listener_region_add_no_dma_map( @@ -833,12 +771,8 @@ static void vfio_listener_region_del(MemoryListener *listener, if (memory_region_is_ram_device(section->mr)) { hwaddr pgmask; - VFIOHostDMAWindow *hostwin; - - hostwin = vfio_find_hostwin(container, iova, end); - assert(hostwin); /* or region_add() would have failed */ - pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1; + pgmask = (1ULL << ctz64(container->pgsizes)) - 1; try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask)); } else if (memory_region_has_ram_discard_manager(section->mr)) { vfio_unregister_ram_discard_listener(container, section); diff --git a/hw/vfio/container.c b/hw/vfio/container.c index 204b244b11..242010036a 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -551,7 +551,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, container->dma_max_mappings = 0; container->iova_ranges = NULL; QLIST_INIT(&container->giommu_list); - QLIST_INIT(&container->hostwin_list); QLIST_INIT(&container->vrdl_list); ret = vfio_init_container(container, group->fd, errp); @@ -591,14 +590,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, vfio_get_iommu_info_migration(container, info); g_free(info); - - /* - * FIXME: We should parse VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE - * information to get the actual window extent rather than assume - * a 64-bit IOVA address space. - */ - vfio_host_win_add(container, 0, (hwaddr)-1, container->pgsizes); - break; } case VFIO_SPAPR_TCE_v2_IOMMU: @@ -687,7 +678,6 @@ static void vfio_disconnect_container(VFIOGroup *group) if (QLIST_EMPTY(&container->group_list)) { VFIOAddressSpace *space = container->space; VFIOGuestIOMMU *giommu, *tmp; - VFIOHostDMAWindow *hostwin, *next; QLIST_REMOVE(container, next); @@ -698,12 +688,6 @@ static void vfio_disconnect_container(VFIOGroup *group) g_free(giommu); } - QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next, - next) { - QLIST_REMOVE(hostwin, hostwin_next); - g_free(hostwin); - } - trace_vfio_disconnect_container(container->fd); close(container->fd); vfio_free_container(container); diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c index 4428990c28..83da2f7ec2 100644 --- a/hw/vfio/spapr.c +++ b/hw/vfio/spapr.c @@ -146,6 +146,60 @@ static const MemoryListener vfio_prereg_listener = { .region_del = vfio_prereg_listener_region_del, }; +static void vfio_host_win_add(VFIOContainer *container, hwaddr min_iova, + hwaddr max_iova, uint64_t iova_pgsizes) +{ + VFIOHostDMAWindow *hostwin; + + QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { + if (ranges_overlap(hostwin->min_iova, + hostwin->max_iova - hostwin->min_iova + 1, + min_iova, + max_iova - min_iova + 1)) { + hw_error("%s: Overlapped IOMMU are not enabled", __func__); + } + } + + hostwin = g_malloc0(sizeof(*hostwin)); + + hostwin->min_iova = min_iova; + hostwin->max_iova = max_iova; + hostwin->iova_pgsizes = iova_pgsizes; + QLIST_INSERT_HEAD(&container->hostwin_list, hostwin, hostwin_next); +} + +static int vfio_host_win_del(VFIOContainer *container, + hwaddr min_iova, hwaddr max_iova) +{ + VFIOHostDMAWindow *hostwin; + + QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { + if (hostwin->min_iova == min_iova && hostwin->max_iova == max_iova) { + QLIST_REMOVE(hostwin, hostwin_next); + g_free(hostwin); + return 0; + } + } + + return -1; +} + +static VFIOHostDMAWindow *vfio_find_hostwin(VFIOContainer *container, + hwaddr iova, hwaddr end) +{ + VFIOHostDMAWindow *hostwin; + bool hostwin_found = false; + + QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { + if (hostwin->min_iova <= iova && end <= hostwin->max_iova) { + hostwin_found = true; + break; + } + } + + return hostwin_found ? hostwin : NULL; +} + static int vfio_spapr_remove_window(VFIOContainer *container, hwaddr offset_within_address_space) { @@ -267,6 +321,26 @@ int vfio_container_add_section_window(VFIOContainer *container, hwaddr pgsize = 0; int ret; + /* + * VFIO_SPAPR_TCE_IOMMU supports a single host window between + * [dma32_window_start, dma32_window_size), we need to ensure + * the section fall in this range. + */ + if (container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { + hwaddr iova, end; + + iova = section->offset_within_address_space; + end = iova + int128_get64(section->size) - 1; + + if (!vfio_find_hostwin(container, iova, end)) { + error_setg(errp, "Container %p can't map guest IOVA region" + " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, container, + iova, end); + return -EINVAL; + } + return 0; + } + if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) { return 0; } @@ -351,6 +425,8 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU; int ret, fd = container->fd; + QLIST_INIT(&container->hostwin_list); + /* * The host kernel code implementing VFIO_IOMMU_DISABLE is called * when container fd is closed so we do not call it explicitly @@ -418,7 +494,14 @@ listener_unregister_exit: void vfio_spapr_container_deinit(VFIOContainer *container) { + VFIOHostDMAWindow *hostwin, *next; + if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { memory_listener_unregister(&container->prereg_listener); } + QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next, + next) { + QLIST_REMOVE(hostwin, hostwin_next); + g_free(hostwin); + } } diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 87848982bd..a4a22accb9 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -207,11 +207,6 @@ typedef struct { hwaddr pages; } VFIOBitmap; -void vfio_host_win_add(VFIOContainer *container, - hwaddr min_iova, hwaddr max_iova, - uint64_t iova_pgsizes); -int vfio_host_win_del(VFIOContainer *container, hwaddr min_iova, - hwaddr max_iova); VFIOAddressSpace *vfio_get_address_space(AddressSpace *as); void vfio_put_address_space(VFIOAddressSpace *space); bool vfio_devices_all_running_and_saving(VFIOContainer *container); -- cgit 1.4.1