From b9b389b9e0f08b207786d501b790dab9cb2c09de Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 2 Jul 2025 14:58:46 -0700 Subject: vfio/iommufd: invariant device name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cpr-transfer will use the device name as a key to find the value of the device descriptor in new QEMU. However, if the descriptor number is specified by a command-line fd parameter, then vfio_device_get_name creates a name that includes the fd number. This causes a chicken-and-egg problem: new QEMU must know the fd number to construct a name to find the fd number. To fix, create an invariant name based on the id command-line parameter, if id is defined. The user will need to provide such an id to use CPR. Signed-off-by: Steve Sistare Reviewed-by: Cédric Le Goater Reviewed-by: Zhenzhong Duan Link: https://lore.kernel.org/qemu-devel/1751493538-202042-10-git-send-email-steven.sistare@oracle.com Signed-off-by: Cédric Le Goater --- hw/vfio/device.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'hw/vfio/device.c') diff --git a/hw/vfio/device.c b/hw/vfio/device.c index d91c695b69..3cd365fb8b 100644 --- a/hw/vfio/device.c +++ b/hw/vfio/device.c @@ -316,12 +316,17 @@ bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp) error_setg(errp, "Use FD passing only with iommufd backend"); return false; } - /* - * Give a name with fd so any function printing out vbasedev->name - * will not break. - */ if (!vbasedev->name) { - vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd); + + if (vbasedev->dev->id) { + vbasedev->name = g_strdup(vbasedev->dev->id); + return true; + } else { + /* + * Assign a name so any function printing it will not break. + */ + vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd); + } } } -- cgit 1.4.1 From 184053f04f6ad6b2950d4712063ffed43bb2720f Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 2 Jul 2025 14:58:47 -0700 Subject: vfio/iommufd: add vfio_device_free_name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define vfio_device_free_name to free the name created by vfio_device_get_name. A subsequent patch will do more there. No functional change. Signed-off-by: Steve Sistare Reviewed-by: Cédric Le Goater Reviewed-by: Zhenzhong Duan Link: https://lore.kernel.org/qemu-devel/1751493538-202042-11-git-send-email-steven.sistare@oracle.com Signed-off-by: Cédric Le Goater --- hw/vfio/ap.c | 4 ++-- hw/vfio/ccw.c | 4 ++-- hw/vfio/device.c | 5 +++++ hw/vfio/pci.c | 2 +- hw/vfio/platform.c | 2 +- include/hw/vfio/vfio-device.h | 1 + 6 files changed, 12 insertions(+), 6 deletions(-) (limited to 'hw/vfio/device.c') diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c index 1df4438149..7719f24579 100644 --- a/hw/vfio/ap.c +++ b/hw/vfio/ap.c @@ -265,7 +265,7 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp) error: error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name); - g_free(vbasedev->name); + vfio_device_free_name(vbasedev); } static void vfio_ap_unrealize(DeviceState *dev) @@ -275,7 +275,7 @@ static void vfio_ap_unrealize(DeviceState *dev) vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX); vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_CFG_CHG_IRQ_INDEX); vfio_device_detach(&vapdev->vdev); - g_free(vapdev->vdev.name); + vfio_device_free_name(&vapdev->vdev); } static const Property vfio_ap_properties[] = { diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c index cea9d6e005..9560b8d851 100644 --- a/hw/vfio/ccw.c +++ b/hw/vfio/ccw.c @@ -619,7 +619,7 @@ out_io_notifier_err: out_region_err: vfio_device_detach(vbasedev); out_attach_dev_err: - g_free(vbasedev->name); + vfio_device_free_name(vbasedev); out_unrealize: if (cdc->unrealize) { cdc->unrealize(cdev); @@ -637,7 +637,7 @@ static void vfio_ccw_unrealize(DeviceState *dev) vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX); vfio_ccw_put_region(vcdev); vfio_device_detach(&vcdev->vdev); - g_free(vcdev->vdev.name); + vfio_device_free_name(&vcdev->vdev); if (cdc->unrealize) { cdc->unrealize(cdev); diff --git a/hw/vfio/device.c b/hw/vfio/device.c index 3cd365fb8b..97eddd04f5 100644 --- a/hw/vfio/device.c +++ b/hw/vfio/device.c @@ -333,6 +333,11 @@ bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp) return true; } +void vfio_device_free_name(VFIODevice *vbasedev) +{ + g_clear_pointer(&vbasedev->name, g_free); +} + void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) { ERRP_GUARD(); diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index dd0b2a0b94..1093b28df7 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -2996,7 +2996,7 @@ void vfio_pci_put_device(VFIOPCIDevice *vdev) vfio_device_detach(&vdev->vbasedev); - g_free(vdev->vbasedev.name); + vfio_device_free_name(&vdev->vbasedev); g_free(vdev->msix); } diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c index 9a21f2e50a..5c1795a26f 100644 --- a/hw/vfio/platform.c +++ b/hw/vfio/platform.c @@ -530,7 +530,7 @@ static bool vfio_base_device_init(VFIODevice *vbasedev, Error **errp) { /* @fd takes precedence over @sysfsdev which takes precedence over @host */ if (vbasedev->fd < 0 && vbasedev->sysfsdev) { - g_free(vbasedev->name); + vfio_device_free_name(vbasedev); vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); } else if (vbasedev->fd < 0) { if (!vbasedev->name || strchr(vbasedev->name, '/')) { diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h index f503837ccc..1901a35aa9 100644 --- a/include/hw/vfio/vfio-device.h +++ b/include/hw/vfio/vfio-device.h @@ -279,6 +279,7 @@ int vfio_device_get_irq_info(VFIODevice *vbasedev, int index, /* Returns 0 on success, or a negative errno. */ bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp); +void vfio_device_free_name(VFIODevice *vbasedev); void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp); void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops, DeviceState *dev, bool ram_discard); -- cgit 1.4.1 From a434fd8f6462c1541927d22e07c58425d6cbd84b Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 2 Jul 2025 14:58:48 -0700 Subject: vfio/iommufd: device name blocker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If an invariant device name cannot be created, block CPR. Signed-off-by: Steve Sistare Reviewed-by: Zhenzhong Duan Link: https://lore.kernel.org/qemu-devel/1751493538-202042-12-git-send-email-steven.sistare@oracle.com Signed-off-by: Cédric Le Goater --- hw/vfio/device.c | 11 +++++++++++ include/hw/vfio/vfio-cpr.h | 1 + 2 files changed, 12 insertions(+) (limited to 'hw/vfio/device.c') diff --git a/hw/vfio/device.c b/hw/vfio/device.c index 97eddd04f5..0ae3f3c660 100644 --- a/hw/vfio/device.c +++ b/hw/vfio/device.c @@ -28,6 +28,8 @@ #include "qapi/error.h" #include "qemu/error-report.h" #include "qemu/units.h" +#include "migration/cpr.h" +#include "migration/blocker.h" #include "monitor/monitor.h" #include "vfio-helpers.h" @@ -324,8 +326,16 @@ bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp) } else { /* * Assign a name so any function printing it will not break. + * The fd number changes across processes, so this cannot be + * used as an invariant name for CPR. */ vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd); + error_setg(&vbasedev->cpr.id_blocker, + "vfio device with fd=%d needs an id property", + vbasedev->fd); + return migrate_add_blocker_modes(&vbasedev->cpr.id_blocker, + errp, MIG_MODE_CPR_TRANSFER, + -1) == 0; } } } @@ -336,6 +346,7 @@ bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp) void vfio_device_free_name(VFIODevice *vbasedev) { g_clear_pointer(&vbasedev->name, g_free); + migrate_del_blocker(&vbasedev->cpr.id_blocker); } void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h index d37acc4a0a..fa7d43ddd8 100644 --- a/include/hw/vfio/vfio-cpr.h +++ b/include/hw/vfio/vfio-cpr.h @@ -30,6 +30,7 @@ typedef struct VFIOContainerCPR { typedef struct VFIODeviceCPR { Error *mdev_blocker; + Error *id_blocker; } VFIODeviceCPR; bool vfio_legacy_cpr_register_container(struct VFIOContainer *container, -- cgit 1.4.1 From 2a3f0a59bd6479f75fa5335f82b85b4f9cd7ed4e Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 2 Jul 2025 14:58:52 -0700 Subject: vfio/iommufd: preserve descriptors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Save the iommu and vfio device fd in CPR state when it is created. After CPR, the fd number is found in CPR state and reused. Signed-off-by: Steve Sistare Reviewed-by: Zhenzhong Duan Link: https://lore.kernel.org/qemu-devel/1751493538-202042-16-git-send-email-steven.sistare@oracle.com Signed-off-by: Cédric Le Goater --- backends/iommufd.c | 35 +++++++++++++++++++++++++++++------ hw/vfio/cpr-iommufd.c | 10 ++++++++++ hw/vfio/device.c | 9 +-------- 3 files changed, 40 insertions(+), 14 deletions(-) (limited to 'hw/vfio/device.c') diff --git a/backends/iommufd.c b/backends/iommufd.c index c554ce5385..e0917923bf 100644 --- a/backends/iommufd.c +++ b/backends/iommufd.c @@ -16,12 +16,18 @@ #include "qemu/module.h" #include "qom/object_interfaces.h" #include "qemu/error-report.h" +#include "migration/cpr.h" #include "monitor/monitor.h" #include "trace.h" #include "hw/vfio/vfio-device.h" #include #include +static const char *iommufd_fd_name(IOMMUFDBackend *be) +{ + return object_get_canonical_path_component(OBJECT(be)); +} + static void iommufd_backend_init(Object *obj) { IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); @@ -64,11 +70,27 @@ static bool iommufd_backend_can_be_deleted(UserCreatable *uc) return !be->users; } +static void iommufd_backend_complete(UserCreatable *uc, Error **errp) +{ + IOMMUFDBackend *be = IOMMUFD_BACKEND(uc); + const char *name = iommufd_fd_name(be); + + if (!be->owned) { + /* fd came from the command line. Fetch updated value from cpr state. */ + if (cpr_is_incoming()) { + be->fd = cpr_find_fd(name, 0); + } else { + cpr_save_fd(name, 0, be->fd); + } + } +} + static void iommufd_backend_class_init(ObjectClass *oc, const void *data) { UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); ucc->can_be_deleted = iommufd_backend_can_be_deleted; + ucc->complete = iommufd_backend_complete; object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd); } @@ -102,7 +124,7 @@ bool iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) int fd; if (be->owned && !be->users) { - fd = qemu_open("/dev/iommu", O_RDWR, errp); + fd = cpr_open_fd("/dev/iommu", O_RDWR, iommufd_fd_name(be), 0, errp); if (fd < 0) { return false; } @@ -127,14 +149,15 @@ void iommufd_backend_disconnect(IOMMUFDBackend *be) goto out; } be->users--; - if (!be->users && be->owned) { - close(be->fd); - be->fd = -1; - } -out: if (!be->users) { vfio_iommufd_cpr_unregister_iommufd(be); + if (be->owned) { + cpr_delete_fd(iommufd_fd_name(be), 0); + close(be->fd); + be->fd = -1; + } } +out: trace_iommufd_backend_disconnect(be->fd, be->users); } diff --git a/hw/vfio/cpr-iommufd.c b/hw/vfio/cpr-iommufd.c index 4166201e3f..a72b68daa8 100644 --- a/hw/vfio/cpr-iommufd.c +++ b/hw/vfio/cpr-iommufd.c @@ -166,12 +166,18 @@ void vfio_iommufd_cpr_unregister_container(VFIOIOMMUFDContainer *container) void vfio_iommufd_cpr_register_device(VFIODevice *vbasedev) { if (!cpr_is_incoming()) { + /* + * Beware fd may have already been saved by vfio_device_set_fd, + * so call resave to avoid a duplicate entry. + */ + cpr_resave_fd(vbasedev->name, 0, vbasedev->fd); vfio_cpr_save_device(vbasedev); } } void vfio_iommufd_cpr_unregister_device(VFIODevice *vbasedev) { + cpr_delete_fd(vbasedev->name, 0); vfio_cpr_delete_device(vbasedev->name); } @@ -180,5 +186,9 @@ void vfio_cpr_load_device(VFIODevice *vbasedev) if (cpr_is_incoming()) { bool ret = vfio_cpr_find_device(vbasedev); g_assert(ret); + + if (vbasedev->fd < 0) { + vbasedev->fd = cpr_find_fd(vbasedev->name, 0); + } } } diff --git a/hw/vfio/device.c b/hw/vfio/device.c index 0ae3f3c660..96cf21462c 100644 --- a/hw/vfio/device.c +++ b/hw/vfio/device.c @@ -351,14 +351,7 @@ void vfio_device_free_name(VFIODevice *vbasedev) void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) { - ERRP_GUARD(); - int fd = monitor_fd_param(monitor_cur(), str, errp); - - if (fd < 0) { - error_prepend(errp, "Could not parse remote object fd %s:", str); - return; - } - vbasedev->fd = fd; + vbasedev->fd = cpr_get_fd_param(vbasedev->dev->id, str, 0, errp); } static VFIODeviceIOOps vfio_device_io_ops_ioctl; -- cgit 1.4.1