From e563dc88c21915e111ecef0756cc291e9e473c35 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 2 Jul 2025 14:58:42 -0700 Subject: backends/iommufd: iommufd_backend_map_file_dma MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define iommufd_backend_map_file_dma to implement IOMMU_IOAS_MAP_FILE. This will be called as a substitute for iommufd_backend_map_dma, so the error conditions for BARs are copied as-is from that function. Signed-off-by: Steve Sistare Reviewed-by: Zhenzhong Duan Link: https://lore.kernel.org/qemu-devel/1751493538-202042-6-git-send-email-steven.sistare@oracle.com Signed-off-by: Cédric Le Goater --- backends/iommufd.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'backends/iommufd.c') diff --git a/backends/iommufd.c b/backends/iommufd.c index c2c47abf7e..3a2ecc7f5b 100644 --- a/backends/iommufd.c +++ b/backends/iommufd.c @@ -172,6 +172,40 @@ int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, return ret; } +int iommufd_backend_map_file_dma(IOMMUFDBackend *be, uint32_t ioas_id, + hwaddr iova, ram_addr_t size, + int mfd, unsigned long start, bool readonly) +{ + int ret, fd = be->fd; + struct iommu_ioas_map_file map = { + .size = sizeof(map), + .flags = IOMMU_IOAS_MAP_READABLE | + IOMMU_IOAS_MAP_FIXED_IOVA, + .ioas_id = ioas_id, + .fd = mfd, + .start = start, + .iova = iova, + .length = size, + }; + + if (!readonly) { + map.flags |= IOMMU_IOAS_MAP_WRITEABLE; + } + + ret = ioctl(fd, IOMMU_IOAS_MAP_FILE, &map); + trace_iommufd_backend_map_file_dma(fd, ioas_id, iova, size, mfd, start, + readonly, ret); + if (ret) { + ret = -errno; + + /* TODO: Not support mapping hardware PCI BAR region for now. */ + if (errno == EFAULT) { + warn_report("IOMMU_IOAS_MAP_FILE failed: %m, PCI BAR?"); + } + } + return ret; +} + int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, ram_addr_t size) { -- cgit 1.4.1 From ab48cedc648a60a3e51db73acf12148d90f19c4c Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 2 Jul 2025 14:58:43 -0700 Subject: backends/iommufd: change process ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define the change process ioctl Signed-off-by: Steve Sistare Reviewed-by: Cédric Le Goater Reviewed-by: Zhenzhong Duan Link: https://lore.kernel.org/qemu-devel/1751493538-202042-7-git-send-email-steven.sistare@oracle.com Signed-off-by: Cédric Le Goater --- backends/iommufd.c | 24 ++++++++++++++++++++++++ backends/trace-events | 1 + include/system/iommufd.h | 3 +++ 3 files changed, 28 insertions(+) (limited to 'backends/iommufd.c') diff --git a/backends/iommufd.c b/backends/iommufd.c index 3a2ecc7f5b..87f81a05f6 100644 --- a/backends/iommufd.c +++ b/backends/iommufd.c @@ -73,6 +73,30 @@ static void iommufd_backend_class_init(ObjectClass *oc, const void *data) object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd); } +bool iommufd_change_process_capable(IOMMUFDBackend *be) +{ + struct iommu_ioas_change_process args = {.size = sizeof(args)}; + + /* + * Call IOMMU_IOAS_CHANGE_PROCESS to verify it is a recognized ioctl. + * This is a no-op if the process has not changed since DMA was mapped. + */ + return !ioctl(be->fd, IOMMU_IOAS_CHANGE_PROCESS, &args); +} + +bool iommufd_change_process(IOMMUFDBackend *be, Error **errp) +{ + struct iommu_ioas_change_process args = {.size = sizeof(args)}; + bool ret = !ioctl(be->fd, IOMMU_IOAS_CHANGE_PROCESS, &args); + + if (!ret) { + error_setg_errno(errp, errno, "IOMMU_IOAS_CHANGE_PROCESS fd %d failed", + be->fd); + } + trace_iommufd_change_process(be->fd, ret); + return ret; +} + bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) { int fd; diff --git a/backends/trace-events b/backends/trace-events index e5f3e70cd1..56132d3fd2 100644 --- a/backends/trace-events +++ b/backends/trace-events @@ -7,6 +7,7 @@ dbus_vmstate_loading(const char *id) "id: %s" dbus_vmstate_saving(const char *id) "id: %s" # iommufd.c +iommufd_change_process(int fd, bool ret) "fd=%d (%d)" iommufd_backend_connect(int fd, bool owned, uint32_t users) "fd=%d owned=%d users=%d" iommufd_backend_disconnect(int fd, uint32_t users) "fd=%d users=%d" iommu_backend_set_fd(int fd) "pre-opened /dev/iommu fd=%d" diff --git a/include/system/iommufd.h b/include/system/iommufd.h index 2d24d93d17..db5f2c716c 100644 --- a/include/system/iommufd.h +++ b/include/system/iommufd.h @@ -69,6 +69,9 @@ bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t id, uint32_t *entry_num, void *data, Error **errp); +bool iommufd_change_process_capable(IOMMUFDBackend *be); +bool iommufd_change_process(IOMMUFDBackend *be, Error **errp); + #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd" OBJECT_DECLARE_TYPE(HostIOMMUDeviceIOMMUFD, HostIOMMUDeviceIOMMUFDClass, HOST_IOMMU_DEVICE_IOMMUFD) -- cgit 1.4.1 From 06c6a65852af0b7648cdb6ff6cf2e66929a7b5f5 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 2 Jul 2025 14:58:49 -0700 Subject: vfio/iommufd: register container for cpr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Register a vfio iommufd container and device for CPR, replacing the generic CPR register call with a more specific iommufd register call. Add a blocker if the kernel does not support IOMMU_IOAS_CHANGE_PROCESS. This is mostly boiler plate. The fields to to saved and restored are added in subsequent patches. Signed-off-by: Steve Sistare Reviewed-by: Zhenzhong Duan Link: https://lore.kernel.org/qemu-devel/1751493538-202042-13-git-send-email-steven.sistare@oracle.com Signed-off-by: Cédric Le Goater --- backends/iommufd.c | 10 ++++++ hw/vfio/cpr-iommufd.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++ hw/vfio/iommufd.c | 6 ++-- hw/vfio/meson.build | 1 + include/hw/vfio/vfio-cpr.h | 12 +++++++ include/system/iommufd.h | 1 + 6 files changed, 114 insertions(+), 2 deletions(-) create mode 100644 hw/vfio/cpr-iommufd.c (limited to 'backends/iommufd.c') diff --git a/backends/iommufd.c b/backends/iommufd.c index 87f81a05f6..c554ce5385 100644 --- a/backends/iommufd.c +++ b/backends/iommufd.c @@ -108,6 +108,13 @@ bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) } be->fd = fd; } + if (!be->users && !vfio_iommufd_cpr_register_iommufd(be, errp)) { + if (be->owned) { + close(be->fd); + be->fd = -1; + } + return false; + } be->users++; trace_iommufd_backend_connect(be->fd, be->owned, be->users); @@ -125,6 +132,9 @@ void iommufd_backend_disconnect(IOMMUFDBackend *be) be->fd = -1; } out: + if (!be->users) { + vfio_iommufd_cpr_unregister_iommufd(be); + } trace_iommufd_backend_disconnect(be->fd, be->users); } diff --git a/hw/vfio/cpr-iommufd.c b/hw/vfio/cpr-iommufd.c new file mode 100644 index 0000000000..2f58b43793 --- /dev/null +++ b/hw/vfio/cpr-iommufd.c @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2024-2025 Oracle and/or its affiliates. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/vfio/vfio-cpr.h" +#include "migration/blocker.h" +#include "migration/cpr.h" +#include "migration/migration.h" +#include "migration/vmstate.h" +#include "system/iommufd.h" +#include "vfio-iommufd.h" + +static bool vfio_cpr_supported(IOMMUFDBackend *be, Error **errp) +{ + if (!iommufd_change_process_capable(be)) { + if (errp) { + error_setg(errp, "vfio iommufd backend does not support " + "IOMMU_IOAS_CHANGE_PROCESS"); + } + return false; + } + return true; +} + +static const VMStateDescription iommufd_cpr_vmstate = { + .name = "iommufd", + .version_id = 0, + .minimum_version_id = 0, + .needed = cpr_incoming_needed, + .fields = (VMStateField[]) { + VMSTATE_END_OF_LIST() + } +}; + +bool vfio_iommufd_cpr_register_iommufd(IOMMUFDBackend *be, Error **errp) +{ + Error **cpr_blocker = &be->cpr_blocker; + + if (!vfio_cpr_supported(be, cpr_blocker)) { + return migrate_add_blocker_modes(cpr_blocker, errp, + MIG_MODE_CPR_TRANSFER, -1) == 0; + } + + vmstate_register(NULL, -1, &iommufd_cpr_vmstate, be); + + return true; +} + +void vfio_iommufd_cpr_unregister_iommufd(IOMMUFDBackend *be) +{ + vmstate_unregister(NULL, &iommufd_cpr_vmstate, be); + migrate_del_blocker(&be->cpr_blocker); +} + +bool vfio_iommufd_cpr_register_container(VFIOIOMMUFDContainer *container, + Error **errp) +{ + VFIOContainerBase *bcontainer = &container->bcontainer; + + migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier, + vfio_cpr_reboot_notifier, + MIG_MODE_CPR_REBOOT); + + vfio_cpr_add_kvm_notifier(); + + return true; +} + +void vfio_iommufd_cpr_unregister_container(VFIOIOMMUFDContainer *container) +{ + VFIOContainerBase *bcontainer = &container->bcontainer; + + migration_remove_notifier(&bcontainer->cpr_reboot_notifier); +} + +void vfio_iommufd_cpr_register_device(VFIODevice *vbasedev) +{ +} + +void vfio_iommufd_cpr_unregister_device(VFIODevice *vbasedev) +{ +} diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index 962a1e2b1f..ff291be235 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -446,7 +446,7 @@ static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container) if (!QLIST_EMPTY(&bcontainer->device_list)) { return; } - vfio_cpr_unregister_container(bcontainer); + vfio_iommufd_cpr_unregister_container(container); vfio_listener_unregister(bcontainer); iommufd_backend_free_id(container->be, container->ioas_id); object_unref(container); @@ -592,7 +592,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, goto err_listener_register; } - if (!vfio_cpr_register_container(bcontainer, errp)) { + if (!vfio_iommufd_cpr_register_container(container, errp)) { goto err_listener_register; } @@ -623,6 +623,7 @@ found_container: } vfio_device_prepare(vbasedev, bcontainer, &dev_info); + vfio_iommufd_cpr_register_device(vbasedev); trace_iommufd_cdev_device_info(vbasedev->name, devfd, vbasedev->num_irqs, vbasedev->num_regions, vbasedev->flags); @@ -660,6 +661,7 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev) iommufd_cdev_container_destroy(container); vfio_address_space_put(space); + vfio_iommufd_cpr_unregister_device(vbasedev); iommufd_cdev_unbind_and_disconnect(vbasedev); close(vbasedev->fd); } diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build index 63ea393076..7a881740a6 100644 --- a/hw/vfio/meson.build +++ b/hw/vfio/meson.build @@ -31,6 +31,7 @@ system_ss.add(when: 'CONFIG_VFIO', if_true: files( )) system_ss.add(when: ['CONFIG_VFIO', 'CONFIG_IOMMUFD'], if_true: files( 'iommufd.c', + 'cpr-iommufd.c', )) system_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files( 'display.c', diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h index fa7d43ddd8..87b4206d81 100644 --- a/include/hw/vfio/vfio-cpr.h +++ b/include/hw/vfio/vfio-cpr.h @@ -15,7 +15,10 @@ struct VFIOContainer; struct VFIOContainerBase; struct VFIOGroup; +struct VFIODevice; struct VFIOPCIDevice; +struct VFIOIOMMUFDContainer; +struct IOMMUFDBackend; typedef int (*dma_map_fn)(const struct VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, void *vaddr, @@ -44,6 +47,15 @@ bool vfio_cpr_register_container(struct VFIOContainerBase *bcontainer, Error **errp); void vfio_cpr_unregister_container(struct VFIOContainerBase *bcontainer); +bool vfio_iommufd_cpr_register_container(struct VFIOIOMMUFDContainer *container, + Error **errp); +void vfio_iommufd_cpr_unregister_container( + struct VFIOIOMMUFDContainer *container); +bool vfio_iommufd_cpr_register_iommufd(struct IOMMUFDBackend *be, Error **errp); +void vfio_iommufd_cpr_unregister_iommufd(struct IOMMUFDBackend *be); +void vfio_iommufd_cpr_register_device(struct VFIODevice *vbasedev); +void vfio_iommufd_cpr_unregister_device(struct VFIODevice *vbasedev); + int vfio_cpr_group_get_device_fd(int d, const char *name); bool vfio_cpr_container_match(struct VFIOContainer *container, diff --git a/include/system/iommufd.h b/include/system/iommufd.h index db5f2c716c..c9c72ffc45 100644 --- a/include/system/iommufd.h +++ b/include/system/iommufd.h @@ -32,6 +32,7 @@ struct IOMMUFDBackend { /*< protected >*/ int fd; /* /dev/iommu file descriptor */ bool owned; /* is the /dev/iommu opened internally */ + Error *cpr_blocker;/* set if be does not support CPR */ uint32_t users; /*< public >*/ -- cgit 1.4.1 From 2a3f0a59bd6479f75fa5335f82b85b4f9cd7ed4e Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 2 Jul 2025 14:58:52 -0700 Subject: vfio/iommufd: preserve descriptors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Save the iommu and vfio device fd in CPR state when it is created. After CPR, the fd number is found in CPR state and reused. Signed-off-by: Steve Sistare Reviewed-by: Zhenzhong Duan Link: https://lore.kernel.org/qemu-devel/1751493538-202042-16-git-send-email-steven.sistare@oracle.com Signed-off-by: Cédric Le Goater --- backends/iommufd.c | 35 +++++++++++++++++++++++++++++------ hw/vfio/cpr-iommufd.c | 10 ++++++++++ hw/vfio/device.c | 9 +-------- 3 files changed, 40 insertions(+), 14 deletions(-) (limited to 'backends/iommufd.c') diff --git a/backends/iommufd.c b/backends/iommufd.c index c554ce5385..e0917923bf 100644 --- a/backends/iommufd.c +++ b/backends/iommufd.c @@ -16,12 +16,18 @@ #include "qemu/module.h" #include "qom/object_interfaces.h" #include "qemu/error-report.h" +#include "migration/cpr.h" #include "monitor/monitor.h" #include "trace.h" #include "hw/vfio/vfio-device.h" #include #include +static const char *iommufd_fd_name(IOMMUFDBackend *be) +{ + return object_get_canonical_path_component(OBJECT(be)); +} + static void iommufd_backend_init(Object *obj) { IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); @@ -64,11 +70,27 @@ static bool iommufd_backend_can_be_deleted(UserCreatable *uc) return !be->users; } +static void iommufd_backend_complete(UserCreatable *uc, Error **errp) +{ + IOMMUFDBackend *be = IOMMUFD_BACKEND(uc); + const char *name = iommufd_fd_name(be); + + if (!be->owned) { + /* fd came from the command line. Fetch updated value from cpr state. */ + if (cpr_is_incoming()) { + be->fd = cpr_find_fd(name, 0); + } else { + cpr_save_fd(name, 0, be->fd); + } + } +} + static void iommufd_backend_class_init(ObjectClass *oc, const void *data) { UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); ucc->can_be_deleted = iommufd_backend_can_be_deleted; + ucc->complete = iommufd_backend_complete; object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd); } @@ -102,7 +124,7 @@ bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) int fd; if (be->owned && !be->users) { - fd = qemu_open("/dev/iommu", O_RDWR, errp); + fd = cpr_open_fd("/dev/iommu", O_RDWR, iommufd_fd_name(be), 0, errp); if (fd < 0) { return false; } @@ -127,14 +149,15 @@ void iommufd_backend_disconnect(IOMMUFDBackend *be) goto out; } be->users--; - if (!be->users && be->owned) { - close(be->fd); - be->fd = -1; - } -out: if (!be->users) { vfio_iommufd_cpr_unregister_iommufd(be); + if (be->owned) { + cpr_delete_fd(iommufd_fd_name(be), 0); + close(be->fd); + be->fd = -1; + } } +out: trace_iommufd_backend_disconnect(be->fd, be->users); } diff --git a/hw/vfio/cpr-iommufd.c b/hw/vfio/cpr-iommufd.c index 4166201e3f..a72b68daa8 100644 --- a/hw/vfio/cpr-iommufd.c +++ b/hw/vfio/cpr-iommufd.c @@ -166,12 +166,18 @@ void vfio_iommufd_cpr_unregister_container(VFIOIOMMUFDContainer *container) void vfio_iommufd_cpr_register_device(VFIODevice *vbasedev) { if (!cpr_is_incoming()) { + /* + * Beware fd may have already been saved by vfio_device_set_fd, + * so call resave to avoid a duplicate entry. + */ + cpr_resave_fd(vbasedev->name, 0, vbasedev->fd); vfio_cpr_save_device(vbasedev); } } void vfio_iommufd_cpr_unregister_device(VFIODevice *vbasedev) { + cpr_delete_fd(vbasedev->name, 0); vfio_cpr_delete_device(vbasedev->name); } @@ -180,5 +186,9 @@ void vfio_cpr_load_device(VFIODevice *vbasedev) if (cpr_is_incoming()) { bool ret = vfio_cpr_find_device(vbasedev); g_assert(ret); + + if (vbasedev->fd < 0) { + vbasedev->fd = cpr_find_fd(vbasedev->name, 0); + } } } diff --git a/hw/vfio/device.c b/hw/vfio/device.c index 0ae3f3c660..96cf21462c 100644 --- a/hw/vfio/device.c +++ b/hw/vfio/device.c @@ -351,14 +351,7 @@ void vfio_device_free_name(VFIODevice *vbasedev) void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) { - ERRP_GUARD(); - int fd = monitor_fd_param(monitor_cur(), str, errp); - - if (fd < 0) { - error_prepend(errp, "Could not parse remote object fd %s:", str); - return; - } - vbasedev->fd = fd; + vbasedev->fd = cpr_get_fd_param(vbasedev->dev->id, str, 0, errp); } static VFIODeviceIOOps vfio_device_io_ops_ioctl; -- cgit 1.4.1 From 6ff4cccd13155e718e630fe16a72d3cc9decde3b Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 2 Jul 2025 14:58:56 -0700 Subject: iommufd: preserve DMA mappings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During cpr-transfer load in new QEMU, the vfio_memory_listener causes spurious calls to map and unmap DMA regions, as devices are created and the address space is built. This memory was already already mapped by the device in old QEMU, so suppress the map and unmap callbacks during incoming CPR. Signed-off-by: Steve Sistare Reviewed-by: Zhenzhong Duan Link: https://lore.kernel.org/qemu-devel/1751493538-202042-20-git-send-email-steven.sistare@oracle.com Signed-off-by: Cédric Le Goater --- backends/iommufd.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'backends/iommufd.c') diff --git a/backends/iommufd.c b/backends/iommufd.c index e0917923bf..2a33c7ab0b 100644 --- a/backends/iommufd.c +++ b/backends/iommufd.c @@ -245,6 +245,10 @@ int iommufd_backend_map_file_dma(IOMMUFDBackend *be, uint32_t ioas_id, .length = size, }; + if (cpr_is_incoming()) { + return 0; + } + if (!readonly) { map.flags |= IOMMU_IOAS_MAP_WRITEABLE; } @@ -274,6 +278,10 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, .length = size, }; + if (cpr_is_incoming()) { + return 0; + } + ret = ioctl(fd, IOMMU_IOAS_UNMAP, &unmap); /* * IOMMUFD takes mapping as some kind of object, unmapping -- cgit 1.4.1