diff options
| author | Steve Sistare <steven.sistare@oracle.com> | 2025-06-10 08:39:17 -0700 |
|---|---|---|
| committer | Cédric Le Goater <clg@redhat.com> | 2025-06-11 14:01:58 +0200 |
| commit | c29a65ed68535288551d2bf70ae32f3df371a57d (patch) | |
| tree | 59548081eb53b45ba7353e06d6c5f40e0a17d92c /hw/vfio/container.c | |
| parent | 54857b08168a0a74711d1f773c16a7122499027b (diff) | |
| download | focaccia-qemu-c29a65ed68535288551d2bf70ae32f3df371a57d.tar.gz focaccia-qemu-c29a65ed68535288551d2bf70ae32f3df371a57d.zip | |
vfio/container: preserve descriptors
At vfio creation time, save the value of vfio container, group, and device descriptors in CPR state. On qemu restart, vfio_realize() finds and uses the saved descriptors. During reuse, device and iommu state is already configured, so operations in vfio_realize that would modify the configuration, such as vfio ioctl's, are skipped. The result is that vfio_realize constructs qemu data structures that reflect the current state of the device. Signed-off-by: Steve Sistare <steven.sistare@oracle.com> Reviewed-by: Cédric Le Goater <clg@redhat.com> Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com> Link: https://lore.kernel.org/qemu-devel/1749569991-25171-5-git-send-email-steven.sistare@oracle.com Signed-off-by: Cédric Le Goater <clg@redhat.com>
Diffstat (limited to 'hw/vfio/container.c')
| -rw-r--r-- | hw/vfio/container.c | 67 |
1 files changed, 52 insertions, 15 deletions
diff --git a/hw/vfio/container.c b/hw/vfio/container.c index 93cdf80e13..5caae4ccae 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -31,6 +31,8 @@ #include "system/reset.h" #include "trace.h" #include "qapi/error.h" +#include "migration/cpr.h" +#include "migration/blocker.h" #include "pci.h" #include "hw/vfio/vfio-container.h" #include "vfio-helpers.h" @@ -425,7 +427,12 @@ static VFIOContainer *vfio_create_container(int fd, VFIOGroup *group, return NULL; } - if (!vfio_set_iommu(fd, group->fd, &iommu_type, errp)) { + /* + * During CPR, just set the container type and skip the ioctls, as the + * container and group are already configured in the kernel. + */ + if (!cpr_is_incoming() && + !vfio_set_iommu(fd, group->fd, &iommu_type, errp)) { return NULL; } @@ -592,6 +599,11 @@ static bool vfio_container_group_add(VFIOContainer *container, VFIOGroup *group, group->container = container; QLIST_INSERT_HEAD(&container->group_list, group, container_next); vfio_group_add_kvm_device(group); + /* + * Remember the container fd for each group, so we can attach to the same + * container after CPR. + */ + cpr_resave_fd("vfio_container_for_group", group->groupid, container->fd); return true; } @@ -601,6 +613,7 @@ static void vfio_container_group_del(VFIOContainer *container, VFIOGroup *group) group->container = NULL; vfio_group_del_kvm_device(group); vfio_ram_block_discard_disable(container, false); + cpr_delete_fd("vfio_container_for_group", group->groupid); } static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as, @@ -615,17 +628,34 @@ static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as, bool group_was_added = false; space = vfio_address_space_get(as); + fd = cpr_find_fd("vfio_container_for_group", group->groupid); - QLIST_FOREACH(bcontainer, &space->containers, next) { - container = container_of(bcontainer, VFIOContainer, bcontainer); - if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { - return vfio_container_group_add(container, group, errp); + if (!cpr_is_incoming()) { + QLIST_FOREACH(bcontainer, &space->containers, next) { + container = container_of(bcontainer, VFIOContainer, bcontainer); + if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { + return vfio_container_group_add(container, group, errp); + } } - } - fd = qemu_open("/dev/vfio/vfio", O_RDWR, errp); - if (fd < 0) { - goto fail; + fd = qemu_open("/dev/vfio/vfio", O_RDWR, errp); + if (fd < 0) { + goto fail; + } + } else { + /* + * For incoming CPR, the group is already attached in the kernel. + * If a container with matching fd is found, then update the + * userland group list and return. If not, then after the loop, + * create the container struct and group list. + */ + QLIST_FOREACH(bcontainer, &space->containers, next) { + container = container_of(bcontainer, VFIOContainer, bcontainer); + + if (vfio_cpr_container_match(container, group, fd)) { + return vfio_container_group_add(container, group, errp); + } + } } ret = ioctl(fd, VFIO_GET_API_VERSION); @@ -697,6 +727,7 @@ static void vfio_container_disconnect(VFIOGroup *group) QLIST_REMOVE(group, container_next); group->container = NULL; + cpr_delete_fd("vfio_container_for_group", group->groupid); /* * Explicitly release the listener first before unset container, @@ -750,7 +781,7 @@ static VFIOGroup *vfio_group_get(int groupid, AddressSpace *as, Error **errp) group = g_malloc0(sizeof(*group)); snprintf(path, sizeof(path), "/dev/vfio/%d", groupid); - group->fd = qemu_open(path, O_RDWR, errp); + group->fd = cpr_open_fd(path, O_RDWR, "vfio_group", groupid, errp); if (group->fd < 0) { goto free_group_exit; } @@ -782,6 +813,7 @@ static VFIOGroup *vfio_group_get(int groupid, AddressSpace *as, Error **errp) return group; close_fd_exit: + cpr_delete_fd("vfio_group", groupid); close(group->fd); free_group_exit: @@ -803,6 +835,7 @@ static void vfio_group_put(VFIOGroup *group) vfio_container_disconnect(group); QLIST_REMOVE(group, next); trace_vfio_group_put(group->fd); + cpr_delete_fd("vfio_group", group->groupid); close(group->fd); g_free(group); } @@ -813,7 +846,7 @@ static bool vfio_device_get(VFIOGroup *group, const char *name, g_autofree struct vfio_device_info *info = NULL; int fd; - fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name); + fd = vfio_cpr_group_get_device_fd(group->fd, name); if (fd < 0) { error_setg_errno(errp, errno, "error getting device from group %d", group->groupid); @@ -826,8 +859,7 @@ static bool vfio_device_get(VFIOGroup *group, const char *name, info = vfio_get_device_info(fd); if (!info) { error_setg_errno(errp, errno, "error getting device info"); - close(fd); - return false; + goto fail; } /* @@ -841,8 +873,7 @@ static bool vfio_device_get(VFIOGroup *group, const char *name, if (!QLIST_EMPTY(&group->device_list)) { error_setg(errp, "Inconsistent setting of support for discarding " "RAM (e.g., balloon) within group"); - close(fd); - return false; + goto fail; } if (!group->ram_block_discard_allowed) { @@ -860,6 +891,11 @@ static bool vfio_device_get(VFIOGroup *group, const char *name, trace_vfio_device_get(name, info->flags, info->num_regions, info->num_irqs); return true; + +fail: + close(fd); + cpr_delete_fd(name, 0); + return false; } static void vfio_device_put(VFIODevice *vbasedev) @@ -870,6 +906,7 @@ static void vfio_device_put(VFIODevice *vbasedev) QLIST_REMOVE(vbasedev, next); vbasedev->group = NULL; trace_vfio_device_put(vbasedev->fd); + cpr_delete_fd(vbasedev->name, 0); close(vbasedev->fd); } |