diff options
Diffstat (limited to 'hw/vfio/common.c')
| -rw-r--r-- | hw/vfio/common.c | 55 |
1 files changed, 55 insertions, 0 deletions
diff --git a/hw/vfio/common.c b/hw/vfio/common.c index fb396cf00a..7c185e5a2e 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -32,6 +32,7 @@ #include "hw/hw.h" #include "qemu/error-report.h" #include "qemu/range.h" +#include "sysemu/balloon.h" #include "sysemu/kvm.h" #include "trace.h" #include "qapi/error.h" @@ -1044,6 +1045,33 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, space = vfio_get_address_space(as); + /* + * VFIO is currently incompatible with memory ballooning insofar as the + * madvise to purge (zap) the page from QEMU's address space does not + * interact with the memory API and therefore leaves stale virtual to + * physical mappings in the IOMMU if the page was previously pinned. We + * therefore add a balloon inhibit for each group added to a container, + * whether the container is used individually or shared. This provides + * us with options to allow devices within a group to opt-in and allow + * ballooning, so long as it is done consistently for a group (for instance + * if the device is an mdev device where it is known that the host vendor + * driver will never pin pages outside of the working set of the guest + * driver, which would thus not be ballooning candidates). + * + * The first opportunity to induce pinning occurs here where we attempt to + * attach the group to existing containers within the AddressSpace. If any + * pages are already zapped from the virtual address space, such as from a + * previous ballooning opt-in, new pinning will cause valid mappings to be + * re-established. Likewise, when the overall MemoryListener for a new + * container is registered, a replay of mappings within the AddressSpace + * will occur, re-establishing any previously zapped pages as well. + * + * NB. Balloon inhibiting does not currently block operation of the + * balloon driver or revoke previously pinned pages, it only prevents + * calling madvise to modify the virtual mapping of ballooned pages. + */ + qemu_balloon_inhibit(true); + QLIST_FOREACH(container, &space->containers, next) { if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { group->container = container; @@ -1108,6 +1136,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, info.iova_pgsizes = 4096; } vfio_host_win_add(container, 0, (hwaddr)-1, info.iova_pgsizes); + container->pgsizes = info.iova_pgsizes; } else if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_IOMMU) || ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_v2_IOMMU)) { struct vfio_iommu_spapr_tce_info info; @@ -1172,6 +1201,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, } if (v2) { + container->pgsizes = info.ddw.pgsizes; /* * There is a default window in just created container. * To make region_add/del simpler, we better remove this @@ -1186,6 +1216,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, } } else { /* The default table uses 4K pages */ + container->pgsizes = 0x1000; vfio_host_win_add(container, info.dma32_window_start, info.dma32_window_start + info.dma32_window_size - 1, @@ -1232,6 +1263,7 @@ close_fd_exit: close(fd); put_space_exit: + qemu_balloon_inhibit(false); vfio_put_address_space(space); return ret; @@ -1352,6 +1384,9 @@ void vfio_put_group(VFIOGroup *group) return; } + if (!group->balloon_allowed) { + qemu_balloon_inhibit(false); + } vfio_kvm_device_del_group(group); vfio_disconnect_container(group); QLIST_REMOVE(group, next); @@ -1387,6 +1422,26 @@ int vfio_get_device(VFIOGroup *group, const char *name, return ret; } + /* + * Clear the balloon inhibitor for this group if the driver knows the + * device operates compatibly with ballooning. Setting must be consistent + * per group, but since compatibility is really only possible with mdev + * currently, we expect singleton groups. + */ + if (vbasedev->balloon_allowed != group->balloon_allowed) { + if (!QLIST_EMPTY(&group->device_list)) { + error_setg(errp, + "Inconsistent device balloon setting within group"); + close(fd); + return -1; + } + + if (!group->balloon_allowed) { + group->balloon_allowed = true; + qemu_balloon_inhibit(false); + } + } + vbasedev->fd = fd; vbasedev->group = group; QLIST_INSERT_HEAD(&group->device_list, vbasedev, next); |