diff options
Diffstat (limited to 'hw/vfio')
| -rw-r--r-- | hw/vfio/ap.c | 1 | ||||
| -rw-r--r-- | hw/vfio/ccw.c | 1 | ||||
| -rw-r--r-- | hw/vfio/common.c | 80 | ||||
| -rw-r--r-- | hw/vfio/container.c | 328 | ||||
| -rw-r--r-- | hw/vfio/helpers.c | 1 | ||||
| -rw-r--r-- | hw/vfio/pci.c | 2 | ||||
| -rw-r--r-- | hw/vfio/spapr.c | 282 |
7 files changed, 330 insertions, 365 deletions
diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c index 5f257bffb9..bbf69ff55a 100644 --- a/hw/vfio/ap.c +++ b/hw/vfio/ap.c @@ -14,7 +14,6 @@ #include <linux/vfio.h> #include <sys/ioctl.h> #include "qapi/error.h" -#include "hw/vfio/vfio.h" #include "hw/vfio/vfio-common.h" #include "hw/s390x/ap-device.h" #include "qemu/error-report.h" diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c index 6623ae237b..d857bb8d0f 100644 --- a/hw/vfio/ccw.c +++ b/hw/vfio/ccw.c @@ -20,7 +20,6 @@ #include <sys/ioctl.h> #include "qapi/error.h" -#include "hw/vfio/vfio.h" #include "hw/vfio/vfio-common.h" #include "hw/s390x/s390-ccw.h" #include "hw/s390x/vfio-ccw.h" diff --git a/hw/vfio/common.c b/hw/vfio/common.c index d806057b40..e70fdf5e0c 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -26,7 +26,6 @@ #include <linux/vfio.h> #include "hw/vfio/vfio-common.h" -#include "hw/vfio/vfio.h" #include "hw/vfio/pci.h" #include "exec/address-spaces.h" #include "exec/memory.h" @@ -246,44 +245,6 @@ bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) return true; } -void vfio_host_win_add(VFIOContainer *container, hwaddr min_iova, - hwaddr max_iova, uint64_t iova_pgsizes) -{ - VFIOHostDMAWindow *hostwin; - - QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { - if (ranges_overlap(hostwin->min_iova, - hostwin->max_iova - hostwin->min_iova + 1, - min_iova, - max_iova - min_iova + 1)) { - hw_error("%s: Overlapped IOMMU are not enabled", __func__); - } - } - - hostwin = g_malloc0(sizeof(*hostwin)); - - hostwin->min_iova = min_iova; - hostwin->max_iova = max_iova; - hostwin->iova_pgsizes = iova_pgsizes; - QLIST_INSERT_HEAD(&container->hostwin_list, hostwin, hostwin_next); -} - -int vfio_host_win_del(VFIOContainer *container, - hwaddr min_iova, hwaddr max_iova) -{ - VFIOHostDMAWindow *hostwin; - - QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { - if (hostwin->min_iova == min_iova && hostwin->max_iova == max_iova) { - QLIST_REMOVE(hostwin, hostwin_next); - g_free(hostwin); - return 0; - } - } - - return -1; -} - static bool vfio_listener_skipped_section(MemoryRegionSection *section) { return (!memory_region_is_ram(section->mr) && @@ -532,22 +493,6 @@ static void vfio_unregister_ram_discard_listener(VFIOContainer *container, g_free(vrdl); } -static VFIOHostDMAWindow *vfio_find_hostwin(VFIOContainer *container, - hwaddr iova, hwaddr end) -{ - VFIOHostDMAWindow *hostwin; - bool hostwin_found = false; - - QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { - if (hostwin->min_iova <= iova && end <= hostwin->max_iova) { - hostwin_found = true; - break; - } - } - - return hostwin_found ? hostwin : NULL; -} - static bool vfio_known_safe_misalignment(MemoryRegionSection *section) { MemoryRegion *mr = section->mr; @@ -626,7 +571,6 @@ static void vfio_listener_region_add(MemoryListener *listener, Int128 llend, llsize; void *vaddr; int ret; - VFIOHostDMAWindow *hostwin; Error *err = NULL; if (!vfio_listener_valid_section(section, "region_add")) { @@ -648,13 +592,6 @@ static void vfio_listener_region_add(MemoryListener *listener, goto fail; } - hostwin = vfio_find_hostwin(container, iova, end); - if (!hostwin) { - error_setg(&err, "Container %p can't map guest IOVA region" - " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, container, iova, end); - goto fail; - } - memory_region_ref(section->mr); if (memory_region_is_iommu(section->mr)) { @@ -693,6 +630,15 @@ static void vfio_listener_region_add(MemoryListener *listener, goto fail; } + if (container->iova_ranges) { + ret = memory_region_iommu_set_iova_ranges(giommu->iommu_mr, + container->iova_ranges, &err); + if (ret) { + g_free(giommu); + goto fail; + } + } + ret = memory_region_register_iommu_notifier(section->mr, &giommu->n, &err); if (ret) { @@ -726,7 +672,7 @@ static void vfio_listener_region_add(MemoryListener *listener, llsize = int128_sub(llend, int128_make64(iova)); if (memory_region_is_ram_device(section->mr)) { - hwaddr pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1; + hwaddr pgmask = (1ULL << ctz64(container->pgsizes)) - 1; if ((iova & pgmask) || (int128_get64(llsize) & pgmask)) { trace_vfio_listener_region_add_no_dma_map( @@ -825,12 +771,8 @@ static void vfio_listener_region_del(MemoryListener *listener, if (memory_region_is_ram_device(section->mr)) { hwaddr pgmask; - VFIOHostDMAWindow *hostwin; - - hostwin = vfio_find_hostwin(container, iova, end); - assert(hostwin); /* or region_add() would have failed */ - pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1; + pgmask = (1ULL << ctz64(container->pgsizes)) - 1; try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask)); } else if (memory_region_has_ram_discard_manager(section->mr)) { vfio_unregister_ram_discard_listener(container, section); diff --git a/hw/vfio/container.c b/hw/vfio/container.c index adc467210f..242010036a 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -20,20 +20,15 @@ #include "qemu/osdep.h" #include <sys/ioctl.h> -#ifdef CONFIG_KVM -#include <linux/kvm.h> -#endif #include <linux/vfio.h> #include "hw/vfio/vfio-common.h" -#include "hw/vfio/vfio.h" #include "exec/address-spaces.h" #include "exec/memory.h" #include "exec/ram_addr.h" #include "hw/hw.h" #include "qemu/error-report.h" #include "qemu/range.h" -#include "sysemu/kvm.h" #include "sysemu/reset.h" #include "trace.h" #include "qapi/error.h" @@ -205,92 +200,6 @@ int vfio_dma_map(VFIOContainer *container, hwaddr iova, return -errno; } -int vfio_container_add_section_window(VFIOContainer *container, - MemoryRegionSection *section, - Error **errp) -{ - VFIOHostDMAWindow *hostwin; - hwaddr pgsize = 0; - int ret; - - if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) { - return 0; - } - - /* For now intersections are not allowed, we may relax this later */ - QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { - if (ranges_overlap(hostwin->min_iova, - hostwin->max_iova - hostwin->min_iova + 1, - section->offset_within_address_space, - int128_get64(section->size))) { - error_setg(errp, - "region [0x%"PRIx64",0x%"PRIx64"] overlaps with existing" - "host DMA window [0x%"PRIx64",0x%"PRIx64"]", - section->offset_within_address_space, - section->offset_within_address_space + - int128_get64(section->size) - 1, - hostwin->min_iova, hostwin->max_iova); - return -EINVAL; - } - } - - ret = vfio_spapr_create_window(container, section, &pgsize); - if (ret) { - error_setg_errno(errp, -ret, "Failed to create SPAPR window"); - return ret; - } - - vfio_host_win_add(container, section->offset_within_address_space, - section->offset_within_address_space + - int128_get64(section->size) - 1, pgsize); -#ifdef CONFIG_KVM - if (kvm_enabled()) { - VFIOGroup *group; - IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr); - struct kvm_vfio_spapr_tce param; - struct kvm_device_attr attr = { - .group = KVM_DEV_VFIO_GROUP, - .attr = KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE, - .addr = (uint64_t)(unsigned long)¶m, - }; - - if (!memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_SPAPR_TCE_FD, - ¶m.tablefd)) { - QLIST_FOREACH(group, &container->group_list, container_next) { - param.groupfd = group->fd; - if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) { - error_setg_errno(errp, errno, - "vfio: failed GROUP_SET_SPAPR_TCE for " - "KVM VFIO device %d and group fd %d", - param.tablefd, param.groupfd); - return -errno; - } - trace_vfio_spapr_group_attach(param.groupfd, param.tablefd); - } - } - } -#endif - return 0; -} - -void vfio_container_del_section_window(VFIOContainer *container, - MemoryRegionSection *section) -{ - if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) { - return; - } - - vfio_spapr_remove_window(container, - section->offset_within_address_space); - if (vfio_host_win_del(container, - section->offset_within_address_space, - section->offset_within_address_space + - int128_get64(section->size) - 1) < 0) { - hw_error("%s: Cannot delete missing window at %"HWADDR_PRIx, - __func__, section->offset_within_address_space); - } -} - int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start) { int ret; @@ -355,14 +264,6 @@ int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap, return ret; } -static void vfio_listener_release(VFIOContainer *container) -{ - memory_listener_unregister(&container->listener); - if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { - memory_listener_unregister(&container->prereg_listener); - } -} - static struct vfio_info_cap_header * vfio_get_iommu_type1_info_cap(struct vfio_iommu_type1_info *info, uint16_t id) { @@ -382,7 +283,7 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, /* If the capability cannot be found, assume no DMA limiting */ hdr = vfio_get_iommu_type1_info_cap(info, VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL); - if (hdr == NULL) { + if (!hdr) { return false; } @@ -394,6 +295,32 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, return true; } +static bool vfio_get_info_iova_range(struct vfio_iommu_type1_info *info, + VFIOContainer *container) +{ + struct vfio_info_cap_header *hdr; + struct vfio_iommu_type1_info_cap_iova_range *cap; + + hdr = vfio_get_iommu_type1_info_cap(info, + VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE); + if (!hdr) { + return false; + } + + cap = (void *)hdr; + + for (int i = 0; i < cap->nr_iovas; i++) { + Range *range = g_new(Range, 1); + + range_set_bounds(range, cap->iova_ranges[i].start, + cap->iova_ranges[i].end); + container->iova_ranges = + range_list_insert(container->iova_ranges, range); + } + + return true; +} + static void vfio_kvm_device_add_group(VFIOGroup *group) { Error *err = NULL; @@ -535,6 +462,12 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, } } +static void vfio_free_container(VFIOContainer *container) +{ + g_list_free_full(container->iova_ranges, g_free); + g_free(container); +} + static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, Error **errp) { @@ -616,8 +549,8 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, container->error = NULL; container->dirty_pages_supported = false; container->dma_max_mappings = 0; + container->iova_ranges = NULL; QLIST_INIT(&container->giommu_list); - QLIST_INIT(&container->hostwin_list); QLIST_INIT(&container->vrdl_list); ret = vfio_init_container(container, group->fd, errp); @@ -652,84 +585,21 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, if (!vfio_get_info_dma_avail(info, &container->dma_max_mappings)) { container->dma_max_mappings = 65535; } - vfio_get_iommu_info_migration(container, info); - g_free(info); - /* - * FIXME: We should parse VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE - * information to get the actual window extent rather than assume - * a 64-bit IOVA address space. - */ - vfio_host_win_add(container, 0, (hwaddr)-1, container->pgsizes); + vfio_get_info_iova_range(info, container); + vfio_get_iommu_info_migration(container, info); + g_free(info); break; } case VFIO_SPAPR_TCE_v2_IOMMU: case VFIO_SPAPR_TCE_IOMMU: { - struct vfio_iommu_spapr_tce_info info; - bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU; - - /* - * The host kernel code implementing VFIO_IOMMU_DISABLE is called - * when container fd is closed so we do not call it explicitly - * in this file. - */ - if (!v2) { - ret = ioctl(fd, VFIO_IOMMU_ENABLE); - if (ret) { - error_setg_errno(errp, errno, "failed to enable container"); - ret = -errno; - goto enable_discards_exit; - } - } else { - container->prereg_listener = vfio_prereg_listener; - - memory_listener_register(&container->prereg_listener, - &address_space_memory); - if (container->error) { - memory_listener_unregister(&container->prereg_listener); - ret = -1; - error_propagate_prepend(errp, container->error, - "RAM memory listener initialization failed: "); - goto enable_discards_exit; - } - } - - info.argsz = sizeof(info); - ret = ioctl(fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info); + ret = vfio_spapr_container_init(container, errp); if (ret) { - error_setg_errno(errp, errno, - "VFIO_IOMMU_SPAPR_TCE_GET_INFO failed"); - ret = -errno; - if (v2) { - memory_listener_unregister(&container->prereg_listener); - } goto enable_discards_exit; } - - if (v2) { - container->pgsizes = info.ddw.pgsizes; - /* - * There is a default window in just created container. - * To make region_add/del simpler, we better remove this - * window now and let those iommu_listener callbacks - * create/remove them when needed. - */ - ret = vfio_spapr_remove_window(container, info.dma32_window_start); - if (ret) { - error_setg_errno(errp, -ret, - "failed to remove existing window"); - goto enable_discards_exit; - } - } else { - /* The default table uses 4K pages */ - container->pgsizes = 0x1000; - vfio_host_win_add(container, info.dma32_window_start, - info.dma32_window_start + - info.dma32_window_size - 1, - 0x1000); - } + break; } } @@ -759,13 +629,17 @@ listener_release_exit: QLIST_REMOVE(group, container_next); QLIST_REMOVE(container, next); vfio_kvm_device_del_group(group); - vfio_listener_release(container); + memory_listener_unregister(&container->listener); + if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || + container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { + vfio_spapr_container_deinit(container); + } enable_discards_exit: vfio_ram_block_discard_disable(container, false); free_container_exit: - g_free(container); + vfio_free_container(container); close_fd_exit: close(fd); @@ -789,7 +663,11 @@ static void vfio_disconnect_container(VFIOGroup *group) * group. */ if (QLIST_EMPTY(&container->group_list)) { - vfio_listener_release(container); + memory_listener_unregister(&container->listener); + if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || + container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { + vfio_spapr_container_deinit(container); + } } if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) { @@ -800,7 +678,6 @@ static void vfio_disconnect_container(VFIOGroup *group) if (QLIST_EMPTY(&container->group_list)) { VFIOAddressSpace *space = container->space; VFIOGuestIOMMU *giommu, *tmp; - VFIOHostDMAWindow *hostwin, *next; QLIST_REMOVE(container, next); @@ -811,15 +688,9 @@ static void vfio_disconnect_container(VFIOGroup *group) g_free(giommu); } - QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next, - next) { - QLIST_REMOVE(hostwin, hostwin_next); - g_free(hostwin); - } - trace_vfio_disconnect_container(container->fd); close(container->fd); - g_free(container); + vfio_free_container(container); vfio_put_address_space(space); } @@ -975,103 +846,6 @@ static void vfio_put_base_device(VFIODevice *vbasedev) close(vbasedev->fd); } -/* - * Interfaces for IBM EEH (Enhanced Error Handling) - */ -static bool vfio_eeh_container_ok(VFIOContainer *container) -{ - /* - * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO - * implementation is broken if there are multiple groups in a - * container. The hardware works in units of Partitionable - * Endpoints (== IOMMU groups) and the EEH operations naively - * iterate across all groups in the container, without any logic - * to make sure the groups have their state synchronized. For - * certain operations (ENABLE) that might be ok, until an error - * occurs, but for others (GET_STATE) it's clearly broken. - */ - - /* - * XXX Once fixed kernels exist, test for them here - */ - - if (QLIST_EMPTY(&container->group_list)) { - return false; - } - - if (QLIST_NEXT(QLIST_FIRST(&container->group_list), container_next)) { - return false; - } - - return true; -} - -static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op) -{ - struct vfio_eeh_pe_op pe_op = { - .argsz = sizeof(pe_op), - .op = op, - }; - int ret; - - if (!vfio_eeh_container_ok(container)) { - error_report("vfio/eeh: EEH_PE_OP 0x%x: " - "kernel requires a container with exactly one group", op); - return -EPERM; - } - - ret = ioctl(container->fd, VFIO_EEH_PE_OP, &pe_op); - if (ret < 0) { - error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op); - return -errno; - } - - return ret; -} - -static VFIOContainer *vfio_eeh_as_container(AddressSpace *as) -{ - VFIOAddressSpace *space = vfio_get_address_space(as); - VFIOContainer *container = NULL; - - if (QLIST_EMPTY(&space->containers)) { - /* No containers to act on */ - goto out; - } - - container = QLIST_FIRST(&space->containers); - - if (QLIST_NEXT(container, next)) { - /* - * We don't yet have logic to synchronize EEH state across - * multiple containers - */ - container = NULL; - goto out; - } - -out: - vfio_put_address_space(space); - return container; -} - -bool vfio_eeh_as_ok(AddressSpace *as) -{ - VFIOContainer *container = vfio_eeh_as_container(as); - - return (container != NULL) && vfio_eeh_container_ok(container); -} - -int vfio_eeh_as_op(AddressSpace *as, uint32_t op) -{ - VFIOContainer *container = vfio_eeh_as_container(as); - - if (!container) { - return -ENODEV; - } - return vfio_eeh_container_op(container, op); -} - static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp) { char *tmp, group_path[PATH_MAX], *group_name; diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c index 7e5da21b31..168847e7c5 100644 --- a/hw/vfio/helpers.c +++ b/hw/vfio/helpers.c @@ -23,7 +23,6 @@ #include <sys/ioctl.h> #include "hw/vfio/vfio-common.h" -#include "hw/vfio/vfio.h" #include "hw/hw.h" #include "trace.h" #include "qapi/error.h" diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index b27011cee7..c62c02f7b6 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -3081,7 +3081,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) struct stat st; int i, ret; bool is_mdev; - char uuid[UUID_FMT_LEN]; + char uuid[UUID_STR_LEN]; char *name; if (!vbasedev->sysfsdev) { diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c index 9ec1e95f6d..83da2f7ec2 100644 --- a/hw/vfio/spapr.c +++ b/hw/vfio/spapr.c @@ -11,6 +11,11 @@ #include "qemu/osdep.h" #include <sys/ioctl.h> #include <linux/vfio.h> +#ifdef CONFIG_KVM +#include <linux/kvm.h> +#endif +#include "sysemu/kvm.h" +#include "exec/address-spaces.h" #include "hw/vfio/vfio-common.h" #include "hw/hw.h" @@ -135,15 +140,90 @@ static void vfio_prereg_listener_region_del(MemoryListener *listener, trace_vfio_prereg_unregister(reg.vaddr, reg.size, ret ? -errno : 0); } -const MemoryListener vfio_prereg_listener = { +static const MemoryListener vfio_prereg_listener = { .name = "vfio-pre-reg", .region_add = vfio_prereg_listener_region_add, .region_del = vfio_prereg_listener_region_del, }; -int vfio_spapr_create_window(VFIOContainer *container, - MemoryRegionSection *section, - hwaddr *pgsize) +static void vfio_host_win_add(VFIOContainer *container, hwaddr min_iova, + hwaddr max_iova, uint64_t iova_pgsizes) +{ + VFIOHostDMAWindow *hostwin; + + QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { + if (ranges_overlap(hostwin->min_iova, + hostwin->max_iova - hostwin->min_iova + 1, + min_iova, + max_iova - min_iova + 1)) { + hw_error("%s: Overlapped IOMMU are not enabled", __func__); + } + } + + hostwin = g_malloc0(sizeof(*hostwin)); + + hostwin->min_iova = min_iova; + hostwin->max_iova = max_iova; + hostwin->iova_pgsizes = iova_pgsizes; + QLIST_INSERT_HEAD(&container->hostwin_list, hostwin, hostwin_next); +} + +static int vfio_host_win_del(VFIOContainer *container, + hwaddr min_iova, hwaddr max_iova) +{ + VFIOHostDMAWindow *hostwin; + + QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { + if (hostwin->min_iova == min_iova && hostwin->max_iova == max_iova) { + QLIST_REMOVE(hostwin, hostwin_next); + g_free(hostwin); + return 0; + } + } + + return -1; +} + +static VFIOHostDMAWindow *vfio_find_hostwin(VFIOContainer *container, + hwaddr iova, hwaddr end) +{ + VFIOHostDMAWindow *hostwin; + bool hostwin_found = false; + + QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { + if (hostwin->min_iova <= iova && end <= hostwin->max_iova) { + hostwin_found = true; + break; + } + } + + return hostwin_found ? hostwin : NULL; +} + +static int vfio_spapr_remove_window(VFIOContainer *container, + hwaddr offset_within_address_space) +{ + struct vfio_iommu_spapr_tce_remove remove = { + .argsz = sizeof(remove), + .start_addr = offset_within_address_space, + }; + int ret; + + ret = ioctl(container->fd, VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove); + if (ret) { + error_report("Failed to remove window at %"PRIx64, + (uint64_t)remove.start_addr); + return -errno; + } + + trace_vfio_spapr_remove_window(offset_within_address_space); + + return 0; +} + +static int vfio_spapr_create_window(VFIOContainer *container, + MemoryRegionSection *section, + hwaddr *pgsize) { int ret = 0; IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr); @@ -233,23 +313,195 @@ int vfio_spapr_create_window(VFIOContainer *container, return 0; } -int vfio_spapr_remove_window(VFIOContainer *container, - hwaddr offset_within_address_space) +int vfio_container_add_section_window(VFIOContainer *container, + MemoryRegionSection *section, + Error **errp) { - struct vfio_iommu_spapr_tce_remove remove = { - .argsz = sizeof(remove), - .start_addr = offset_within_address_space, - }; + VFIOHostDMAWindow *hostwin; + hwaddr pgsize = 0; int ret; - ret = ioctl(container->fd, VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove); + /* + * VFIO_SPAPR_TCE_IOMMU supports a single host window between + * [dma32_window_start, dma32_window_size), we need to ensure + * the section fall in this range. + */ + if (container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { + hwaddr iova, end; + + iova = section->offset_within_address_space; + end = iova + int128_get64(section->size) - 1; + + if (!vfio_find_hostwin(container, iova, end)) { + error_setg(errp, "Container %p can't map guest IOVA region" + " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, container, + iova, end); + return -EINVAL; + } + return 0; + } + + if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) { + return 0; + } + + /* For now intersections are not allowed, we may relax this later */ + QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { + if (ranges_overlap(hostwin->min_iova, + hostwin->max_iova - hostwin->min_iova + 1, + section->offset_within_address_space, + int128_get64(section->size))) { + error_setg(errp, + "region [0x%"PRIx64",0x%"PRIx64"] overlaps with existing" + "host DMA window [0x%"PRIx64",0x%"PRIx64"]", + section->offset_within_address_space, + section->offset_within_address_space + + int128_get64(section->size) - 1, + hostwin->min_iova, hostwin->max_iova); + return -EINVAL; + } + } + + ret = vfio_spapr_create_window(container, section, &pgsize); if (ret) { - error_report("Failed to remove window at %"PRIx64, - (uint64_t)remove.start_addr); - return -errno; + error_setg_errno(errp, -ret, "Failed to create SPAPR window"); + return ret; } - trace_vfio_spapr_remove_window(offset_within_address_space); + vfio_host_win_add(container, section->offset_within_address_space, + section->offset_within_address_space + + int128_get64(section->size) - 1, pgsize); +#ifdef CONFIG_KVM + if (kvm_enabled()) { + VFIOGroup *group; + IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr); + struct kvm_vfio_spapr_tce param; + struct kvm_device_attr attr = { + .group = KVM_DEV_VFIO_GROUP, + .attr = KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE, + .addr = (uint64_t)(unsigned long)¶m, + }; + + if (!memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_SPAPR_TCE_FD, + ¶m.tablefd)) { + QLIST_FOREACH(group, &container->group_list, container_next) { + param.groupfd = group->fd; + if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) { + error_setg_errno(errp, errno, + "vfio: failed GROUP_SET_SPAPR_TCE for " + "KVM VFIO device %d and group fd %d", + param.tablefd, param.groupfd); + return -errno; + } + trace_vfio_spapr_group_attach(param.groupfd, param.tablefd); + } + } + } +#endif + return 0; +} + +void vfio_container_del_section_window(VFIOContainer *container, + MemoryRegionSection *section) +{ + if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) { + return; + } + + vfio_spapr_remove_window(container, + section->offset_within_address_space); + if (vfio_host_win_del(container, + section->offset_within_address_space, + section->offset_within_address_space + + int128_get64(section->size) - 1) < 0) { + hw_error("%s: Cannot delete missing window at %"HWADDR_PRIx, + __func__, section->offset_within_address_space); + } +} + +int vfio_spapr_container_init(VFIOContainer *container, Error **errp) +{ + struct vfio_iommu_spapr_tce_info info; + bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU; + int ret, fd = container->fd; + + QLIST_INIT(&container->hostwin_list); + + /* + * The host kernel code implementing VFIO_IOMMU_DISABLE is called + * when container fd is closed so we do not call it explicitly + * in this file. + */ + if (!v2) { + ret = ioctl(fd, VFIO_IOMMU_ENABLE); + if (ret) { + error_setg_errno(errp, errno, "failed to enable container"); + return -errno; + } + } else { + container->prereg_listener = vfio_prereg_listener; + + memory_listener_register(&container->prereg_listener, + &address_space_memory); + if (container->error) { + ret = -1; + error_propagate_prepend(errp, container->error, + "RAM memory listener initialization failed: "); + goto listener_unregister_exit; + } + } + + info.argsz = sizeof(info); + ret = ioctl(fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info); + if (ret) { + error_setg_errno(errp, errno, + "VFIO_IOMMU_SPAPR_TCE_GET_INFO failed"); + ret = -errno; + goto listener_unregister_exit; + } + + if (v2) { + container->pgsizes = info.ddw.pgsizes; + /* + * There is a default window in just created container. + * To make region_add/del simpler, we better remove this + * window now and let those iommu_listener callbacks + * create/remove them when needed. + */ + ret = vfio_spapr_remove_window(container, info.dma32_window_start); + if (ret) { + error_setg_errno(errp, -ret, + "failed to remove existing window"); + goto listener_unregister_exit; + } + } else { + /* The default table uses 4K pages */ + container->pgsizes = 0x1000; + vfio_host_win_add(container, info.dma32_window_start, + info.dma32_window_start + + info.dma32_window_size - 1, + 0x1000); + } return 0; + +listener_unregister_exit: + if (v2) { + memory_listener_unregister(&container->prereg_listener); + } + return ret; +} + +void vfio_spapr_container_deinit(VFIOContainer *container) +{ + VFIOHostDMAWindow *hostwin, *next; + + if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { + memory_listener_unregister(&container->prereg_listener); + } + QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next, + next) { + QLIST_REMOVE(hostwin, hostwin_next); + g_free(hostwin); + } } |