summary refs log tree commit diff stats
path: root/hw/vfio/common.c
diff options
context:
space:
mode:
Diffstat (limited to 'hw/vfio/common.c')
-rw-r--r--hw/vfio/common.c51
1 files changed, 51 insertions, 0 deletions
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index fb396cf00a..cd1f4af18a 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -32,6 +32,7 @@
 #include "hw/hw.h"
 #include "qemu/error-report.h"
 #include "qemu/range.h"
+#include "sysemu/balloon.h"
 #include "sysemu/kvm.h"
 #include "trace.h"
 #include "qapi/error.h"
@@ -1044,6 +1045,33 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
 
     space = vfio_get_address_space(as);
 
+    /*
+     * VFIO is currently incompatible with memory ballooning insofar as the
+     * madvise to purge (zap) the page from QEMU's address space does not
+     * interact with the memory API and therefore leaves stale virtual to
+     * physical mappings in the IOMMU if the page was previously pinned.  We
+     * therefore add a balloon inhibit for each group added to a container,
+     * whether the container is used individually or shared.  This provides
+     * us with options to allow devices within a group to opt-in and allow
+     * ballooning, so long as it is done consistently for a group (for instance
+     * if the device is an mdev device where it is known that the host vendor
+     * driver will never pin pages outside of the working set of the guest
+     * driver, which would thus not be ballooning candidates).
+     *
+     * The first opportunity to induce pinning occurs here where we attempt to
+     * attach the group to existing containers within the AddressSpace.  If any
+     * pages are already zapped from the virtual address space, such as from a
+     * previous ballooning opt-in, new pinning will cause valid mappings to be
+     * re-established.  Likewise, when the overall MemoryListener for a new
+     * container is registered, a replay of mappings within the AddressSpace
+     * will occur, re-establishing any previously zapped pages as well.
+     *
+     * NB. Balloon inhibiting does not currently block operation of the
+     * balloon driver or revoke previously pinned pages, it only prevents
+     * calling madvise to modify the virtual mapping of ballooned pages.
+     */
+    qemu_balloon_inhibit(true);
+
     QLIST_FOREACH(container, &space->containers, next) {
         if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
             group->container = container;
@@ -1232,6 +1260,7 @@ close_fd_exit:
     close(fd);
 
 put_space_exit:
+    qemu_balloon_inhibit(false);
     vfio_put_address_space(space);
 
     return ret;
@@ -1352,6 +1381,9 @@ void vfio_put_group(VFIOGroup *group)
         return;
     }
 
+    if (!group->balloon_allowed) {
+        qemu_balloon_inhibit(false);
+    }
     vfio_kvm_device_del_group(group);
     vfio_disconnect_container(group);
     QLIST_REMOVE(group, next);
@@ -1387,6 +1419,25 @@ int vfio_get_device(VFIOGroup *group, const char *name,
         return ret;
     }
 
+    /*
+     * Clear the balloon inhibitor for this group if the driver knows the
+     * device operates compatibly with ballooning.  Setting must be consistent
+     * per group, but since compatibility is really only possible with mdev
+     * currently, we expect singleton groups.
+     */
+    if (vbasedev->balloon_allowed != group->balloon_allowed) {
+        if (!QLIST_EMPTY(&group->device_list)) {
+            error_setg(errp,
+                       "Inconsistent device balloon setting within group");
+            return -1;
+        }
+
+        if (!group->balloon_allowed) {
+            group->balloon_allowed = true;
+            qemu_balloon_inhibit(false);
+        }
+    }
+
     vbasedev->fd = fd;
     vbasedev->group = group;
     QLIST_INSERT_HEAD(&group->device_list, vbasedev, next);