From 4526687bf12624d957088cd40ee02540a5404546 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Mon, 17 Feb 2025 18:34:55 +0100 Subject: vfio: Add property documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Investigate the git history to uncover when and why the VFIO properties were introduced and update the models. This is mostly targeting vfio-pci device, since vfio-platform, vfio-ap and vfio-ccw devices are simpler. Sort the properties based on the QEMU version in which they were introduced. Cc: Tony Krowiak Cc: Eric Farman Cc: Eric Auger Reviewed-by: Kirti Wankhede Reviewed-by: Anthony Krowiak Reviewed-by: Eric Farman # vfio-ccw Reviewed-by: Alex Williamson Reviewed-by: Eric Auger Link: https://lore.kernel.org/qemu-devel/20250217173455.449983-1-clg@redhat.com Signed-off-by: Cédric Le Goater --- hw/vfio/pci.c | 125 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) (limited to 'hw/vfio/pci.c') diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 89d900e9cf..4f92b50b13 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -3433,6 +3433,122 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) pdc->exit = vfio_exitfn; pdc->config_read = vfio_pci_read_config; pdc->config_write = vfio_pci_write_config; + + object_class_property_set_description(klass, /* 1.3 */ + "host", + "Host PCI address [domain:] of assigned device"); + object_class_property_set_description(klass, /* 1.3 */ + "x-intx-mmap-timeout-ms", + "When EOI is not provided by KVM/QEMU, wait time " + "(milliseconds) to re-enable device direct access " + "after INTx (DEBUG)"); + object_class_property_set_description(klass, /* 1.5 */ + "x-vga", + "Expose VGA address spaces for device"); + object_class_property_set_description(klass, /* 2.3 */ + "x-req", + "Disable device request notification support (DEBUG)"); + object_class_property_set_description(klass, /* 2.4 and 2.5 */ + "x-no-mmap", + "Disable MMAP for device. Allows to trace MMIO " + "accesses (DEBUG)"); + object_class_property_set_description(klass, /* 2.5 */ + "x-no-kvm-intx", + "Disable direct VFIO->KVM INTx injection. Allows to " + "trace INTx interrupts (DEBUG)"); + object_class_property_set_description(klass, /* 2.5 */ + "x-no-kvm-msi", + "Disable direct VFIO->KVM MSI injection. Allows to " + "trace MSI interrupts (DEBUG)"); + object_class_property_set_description(klass, /* 2.5 */ + "x-no-kvm-msix", + "Disable direct VFIO->KVM MSIx injection. Allows to " + "trace MSIx interrupts (DEBUG)"); + object_class_property_set_description(klass, /* 2.5 */ + "x-pci-vendor-id", + "Override PCI Vendor ID with provided value (DEBUG)"); + object_class_property_set_description(klass, /* 2.5 */ + "x-pci-device-id", + "Override PCI device ID with provided value (DEBUG)"); + object_class_property_set_description(klass, /* 2.5 */ + "x-pci-sub-vendor-id", + "Override PCI Subsystem Vendor ID with provided value " + "(DEBUG)"); + object_class_property_set_description(klass, /* 2.5 */ + "x-pci-sub-device-id", + "Override PCI Subsystem Device ID with provided value " + "(DEBUG)"); + object_class_property_set_description(klass, /* 2.6 */ + "sysfsdev", + "Host sysfs path of assigned device"); + object_class_property_set_description(klass, /* 2.7 */ + "x-igd-opregion", + "Expose host IGD OpRegion to guest"); + object_class_property_set_description(klass, /* 2.7 (See c4c45e943e51) */ + "x-igd-gms", + "Override IGD data stolen memory size (32MiB units)"); + object_class_property_set_description(klass, /* 2.11 */ + "x-nv-gpudirect-clique", + "Add NVIDIA GPUDirect capability indicating P2P DMA " + "clique for device [0-15]"); + object_class_property_set_description(klass, /* 2.12 */ + "x-no-geforce-quirks", + "Disable GeForce quirks (for NVIDIA Quadro/GRID/Tesla). " + "Improves performance"); + object_class_property_set_description(klass, /* 2.12 */ + "display", + "Enable display support for device, ex. vGPU"); + object_class_property_set_description(klass, /* 2.12 */ + "x-msix-relocation", + "Specify MSI-X MMIO relocation to the end of specified " + "existing BAR or new BAR to avoid virtualization overhead " + "due to adjacent device registers"); + object_class_property_set_description(klass, /* 3.0 */ + "x-no-kvm-ioeventfd", + "Disable registration of ioeventfds with KVM (DEBUG)"); + object_class_property_set_description(klass, /* 3.0 */ + "x-no-vfio-ioeventfd", + "Disable linking of KVM ioeventfds to VFIO ioeventfds " + "(DEBUG)"); + object_class_property_set_description(klass, /* 3.1 */ + "x-balloon-allowed", + "Override allowing ballooning with device (DEBUG, DANGER)"); + object_class_property_set_description(klass, /* 3.2 */ + "xres", + "Set X display resolution the vGPU should use"); + object_class_property_set_description(klass, /* 3.2 */ + "yres", + "Set Y display resolution the vGPU should use"); + object_class_property_set_description(klass, /* 5.2 */ + "x-pre-copy-dirty-page-tracking", + "Disable dirty pages tracking during iterative phase " + "(DEBUG)"); + object_class_property_set_description(klass, /* 5.2, 8.0 non-experimetal */ + "enable-migration", + "Enale device migration. Also requires a host VFIO PCI " + "variant or mdev driver with migration support enabled"); + object_class_property_set_description(klass, /* 8.1 */ + "vf-token", + "Specify UUID VF token. Required for VF when PF is owned " + "by another VFIO driver"); +#ifdef CONFIG_IOMMUFD + object_class_property_set_description(klass, /* 9.0 */ + "iommufd", + "Set host IOMMUFD backend device"); +#endif + object_class_property_set_description(klass, /* 9.1 */ + "x-device-dirty-page-tracking", + "Disable device dirty page tracking and use " + "container-based dirty page tracking (DEBUG)"); + object_class_property_set_description(klass, /* 9.1 */ + "migration-events", + "Emit VFIO migration QAPI event when a VFIO device " + "changes its migration state. For management applications"); + object_class_property_set_description(klass, /* 9.1 */ + "skip-vsc-check", + "Skip config space check for Vendor Specific Capability. " + "Setting to false will enforce strict checking of VSC content " + "(DEBUG)"); } static const TypeInfo vfio_pci_dev_info = { @@ -3461,6 +3577,15 @@ static void vfio_pci_nohotplug_dev_class_init(ObjectClass *klass, void *data) device_class_set_props(dc, vfio_pci_dev_nohotplug_properties); dc->hotpluggable = false; + + object_class_property_set_description(klass, /* 3.1 */ + "ramfb", + "Enable ramfb to provide pre-boot graphics for devices " + "enabling display option"); + object_class_property_set_description(klass, /* 8.2 */ + "x-ramfb-migrate", + "Override default migration support for ramfb support " + "(DEBUG)"); } static const TypeInfo vfio_pci_nohotplug_dev_info = { -- cgit 1.4.1 From 0681ec253141d838210b3c5e6bc0d2d71f2e111e Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 25 Feb 2025 14:52:26 -0700 Subject: pci: Use PCI PM capability initializer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Switch callers directly initializing the PCI PM capability with pci_add_capability() to use pci_pm_init(). Cc: Dmitry Fleytman Cc: Akihiko Odaki Cc: Jason Wang Cc: Stefan Weil Cc: Sriram Yagnaraman Cc: Keith Busch Cc: Klaus Jensen Cc: Jesper Devantier Cc: Michael S. Tsirkin Cc: Marcel Apfelbaum Cc: Cédric Le Goater Signed-off-by: Alex Williamson Reviewed-by: Eric Auger Reviewed-by: Akihiko Odaki Reviewed-by: Michael S. Tsirkin Link: https://lore.kernel.org/qemu-devel/20250225215237.3314011-3-alex.williamson@redhat.com Signed-off-by: Cédric Le Goater --- hw/net/e1000e.c | 3 +-- hw/net/eepro100.c | 4 +--- hw/net/igb.c | 3 +-- hw/nvme/ctrl.c | 3 +-- hw/pci-bridge/pcie_pci_bridge.c | 2 +- hw/vfio/pci.c | 7 ++++++- hw/virtio/virtio-pci.c | 3 +-- 7 files changed, 12 insertions(+), 13 deletions(-) (limited to 'hw/vfio/pci.c') diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c index f637853073..b72cbab7e8 100644 --- a/hw/net/e1000e.c +++ b/hw/net/e1000e.c @@ -372,8 +372,7 @@ static int e1000e_add_pm_capability(PCIDevice *pdev, uint8_t offset, uint16_t pmc) { Error *local_err = NULL; - int ret = pci_add_capability(pdev, PCI_CAP_ID_PM, offset, - PCI_PM_SIZEOF, &local_err); + int ret = pci_pm_init(pdev, offset, &local_err); if (local_err) { error_report_err(local_err); diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c index 6d853229ae..29a39865a6 100644 --- a/hw/net/eepro100.c +++ b/hw/net/eepro100.c @@ -551,9 +551,7 @@ static void e100_pci_reset(EEPRO100State *s, Error **errp) if (info->power_management) { /* Power Management Capabilities */ int cfg_offset = 0xdc; - int r = pci_add_capability(&s->dev, PCI_CAP_ID_PM, - cfg_offset, PCI_PM_SIZEOF, - errp); + int r = pci_pm_init(&s->dev, cfg_offset, errp); if (r < 0) { return; } diff --git a/hw/net/igb.c b/hw/net/igb.c index c965fc2fb6..e318df40e0 100644 --- a/hw/net/igb.c +++ b/hw/net/igb.c @@ -356,8 +356,7 @@ static int igb_add_pm_capability(PCIDevice *pdev, uint8_t offset, uint16_t pmc) { Error *local_err = NULL; - int ret = pci_add_capability(pdev, PCI_CAP_ID_PM, offset, - PCI_PM_SIZEOF, &local_err); + int ret = pci_pm_init(pdev, offset, &local_err); if (local_err) { error_report_err(local_err); diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index e62c6a3588..518d02dc66 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -8600,8 +8600,7 @@ static int nvme_add_pm_capability(PCIDevice *pci_dev, uint8_t offset) Error *err = NULL; int ret; - ret = pci_add_capability(pci_dev, PCI_CAP_ID_PM, offset, - PCI_PM_SIZEOF, &err); + ret = pci_pm_init(pci_dev, offset, &err); if (err) { error_report_err(err); return ret; diff --git a/hw/pci-bridge/pcie_pci_bridge.c b/hw/pci-bridge/pcie_pci_bridge.c index fd4514a595..9fa656b43b 100644 --- a/hw/pci-bridge/pcie_pci_bridge.c +++ b/hw/pci-bridge/pcie_pci_bridge.c @@ -52,7 +52,7 @@ static void pcie_pci_bridge_realize(PCIDevice *d, Error **errp) goto cap_error; } - pos = pci_add_capability(d, PCI_CAP_ID_PM, 0, PCI_PM_SIZEOF, errp); + pos = pci_pm_init(d, 0, errp); if (pos < 0) { goto pm_error; } diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 4f92b50b13..d33b795af0 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -2216,7 +2216,12 @@ static bool vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp) case PCI_CAP_ID_PM: vfio_check_pm_reset(vdev, pos); vdev->pm_cap = pos; - ret = pci_add_capability(pdev, cap_id, pos, size, errp) >= 0; + ret = pci_pm_init(pdev, pos, errp) >= 0; + /* + * PCI-core config space emulation needs write access to the power + * state enabled for tracking BAR mapping relative to PM state. + */ + pci_set_word(pdev->wmask + pos + PCI_PM_CTRL, PCI_PM_CTRL_STATE_MASK); break; case PCI_CAP_ID_AF: vfio_check_af_flr(vdev, pos); diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index c773a9130c..afe8b5551c 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -2204,8 +2204,7 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) pos = pcie_endpoint_cap_init(pci_dev, 0); assert(pos > 0); - pos = pci_add_capability(pci_dev, PCI_CAP_ID_PM, 0, - PCI_PM_SIZEOF, errp); + pos = pci_pm_init(pci_dev, 0, errp); if (pos < 0) { return; } -- cgit 1.4.1 From 05c6a8eff6298675080aa2692ee05a310b3483b4 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 25 Feb 2025 14:52:27 -0700 Subject: vfio/pci: Delete local pm_cap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is now redundant to PCIDevice.pm_cap. Cc: Cédric Le Goater Reviewed-by: Zhenzhong Duan Reviewed-by: Eric Auger Signed-off-by: Alex Williamson Reviewed-by: Michael S. Tsirkin Link: https://lore.kernel.org/qemu-devel/20250225215237.3314011-4-alex.williamson@redhat.com Signed-off-by: Cédric Le Goater --- hw/vfio/pci.c | 9 ++++----- hw/vfio/pci.h | 1 - 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'hw/vfio/pci.c') diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index d33b795af0..a8db19d8d2 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -2215,7 +2215,6 @@ static bool vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp) break; case PCI_CAP_ID_PM: vfio_check_pm_reset(vdev, pos); - vdev->pm_cap = pos; ret = pci_pm_init(pdev, pos, errp) >= 0; /* * PCI-core config space emulation needs write access to the power @@ -2412,17 +2411,17 @@ void vfio_pci_pre_reset(VFIOPCIDevice *vdev) vfio_disable_interrupts(vdev); /* Make sure the device is in D0 */ - if (vdev->pm_cap) { + if (pdev->pm_cap) { uint16_t pmcsr; uint8_t state; - pmcsr = vfio_pci_read_config(pdev, vdev->pm_cap + PCI_PM_CTRL, 2); + pmcsr = vfio_pci_read_config(pdev, pdev->pm_cap + PCI_PM_CTRL, 2); state = pmcsr & PCI_PM_CTRL_STATE_MASK; if (state) { pmcsr &= ~PCI_PM_CTRL_STATE_MASK; - vfio_pci_write_config(pdev, vdev->pm_cap + PCI_PM_CTRL, pmcsr, 2); + vfio_pci_write_config(pdev, pdev->pm_cap + PCI_PM_CTRL, pmcsr, 2); /* vfio handles the necessary delay here */ - pmcsr = vfio_pci_read_config(pdev, vdev->pm_cap + PCI_PM_CTRL, 2); + pmcsr = vfio_pci_read_config(pdev, pdev->pm_cap + PCI_PM_CTRL, 2); state = pmcsr & PCI_PM_CTRL_STATE_MASK; if (state) { error_report("vfio: Unable to power on device, stuck in D%d", diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index 43c166680a..d638c781f6 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -160,7 +160,6 @@ struct VFIOPCIDevice { int32_t bootindex; uint32_t igd_gms; OffAutoPCIBAR msix_relo; - uint8_t pm_cap; uint8_t nv_gpudirect_clique; bool pci_aer; bool req_enabled; -- cgit 1.4.1 From 518a69a598916749338de3852d41d961d4503115 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 25 Feb 2025 14:52:29 -0700 Subject: hw/vfio/pci: Re-order pre-reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We want the device in the D0 power state going into reset, but the config write can enable the BARs in the address space, which are then removed from the address space once we clear the memory enable bit in the command register. Re-order to clear the command bit first, so the power state change doesn't enable the BARs. Cc: Cédric Le Goater Reviewed-by: Zhenzhong Duan Reviewed-by: Eric Auger Signed-off-by: Alex Williamson Reviewed-by: Michael S. Tsirkin Link: https://lore.kernel.org/qemu-devel/20250225215237.3314011-6-alex.williamson@redhat.com Signed-off-by: Cédric Le Goater --- hw/vfio/pci.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'hw/vfio/pci.c') diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index a8db19d8d2..c1cee280ae 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -2410,6 +2410,15 @@ void vfio_pci_pre_reset(VFIOPCIDevice *vdev) vfio_disable_interrupts(vdev); + /* + * Stop any ongoing DMA by disconnecting I/O, MMIO, and bus master. + * Also put INTx Disable in known state. + */ + cmd = vfio_pci_read_config(pdev, PCI_COMMAND, 2); + cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER | + PCI_COMMAND_INTX_DISABLE); + vfio_pci_write_config(pdev, PCI_COMMAND, cmd, 2); + /* Make sure the device is in D0 */ if (pdev->pm_cap) { uint16_t pmcsr; @@ -2429,15 +2438,6 @@ void vfio_pci_pre_reset(VFIOPCIDevice *vdev) } } } - - /* - * Stop any ongoing DMA by disconnecting I/O, MMIO, and bus master. - * Also put INTx Disable in known state. - */ - cmd = vfio_pci_read_config(pdev, PCI_COMMAND, 2); - cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER | - PCI_COMMAND_INTX_DISABLE); - vfio_pci_write_config(pdev, PCI_COMMAND, cmd, 2); } void vfio_pci_post_reset(VFIOPCIDevice *vdev) -- cgit 1.4.1 From 623af41dd331d1a57a41bc3374e3d134adb33f4c Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Tue, 4 Mar 2025 23:03:58 +0100 Subject: vfio/migration: Add x-migration-multifd-transfer VFIO property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This property allows configuring whether to transfer the particular device state via multifd channels when live migrating that device. It defaults to AUTO, which means that VFIO device state transfer via multifd channels is attempted in configurations that otherwise support it. Signed-off-by: Maciej S. Szmigiero Reviewed-by: Cédric Le Goater Link: https://lore.kernel.org/qemu-devel/d6dbb326e3d53c7104d62c96c9e3dd64e1c7b940.1741124640.git.maciej.szmigiero@oracle.com [ clg: Added documentation ] Signed-off-by: Cédric Le Goater --- docs/devel/migration/vfio.rst | 15 +++++++++++++++ hw/vfio/migration-multifd.c | 18 +++++++++++++++++- hw/vfio/pci.c | 7 +++++++ include/hw/vfio/vfio-common.h | 2 ++ 4 files changed, 41 insertions(+), 1 deletion(-) (limited to 'hw/vfio/pci.c') diff --git a/docs/devel/migration/vfio.rst b/docs/devel/migration/vfio.rst index a803a09bc1..673e354754 100644 --- a/docs/devel/migration/vfio.rst +++ b/docs/devel/migration/vfio.rst @@ -232,3 +232,18 @@ Postcopy ======== Postcopy migration is currently not supported for VFIO devices. + +Multifd +======= + +Starting from QEMU version 10.0 there's a possibility to transfer VFIO device +_STOP_COPY state via multifd channels. This helps reduce downtime - especially +with multiple VFIO devices or with devices having a large migration state. +As an additional benefit, setting the VFIO device to _STOP_COPY state and +saving its config space is also parallelized (run in a separate thread) in +such migration mode. + +The multifd VFIO device state transfer is controlled by +"x-migration-multifd-transfer" VFIO device property. This property defaults to +AUTO, which means that VFIO device state transfer via multifd channels is +attempted in configurations that otherwise support it. diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c index bfb9a72fa4..aacddc503b 100644 --- a/hw/vfio/migration-multifd.c +++ b/hw/vfio/migration-multifd.c @@ -476,18 +476,34 @@ bool vfio_multifd_transfer_supported(void) bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev) { - return false; + VFIOMigration *migration = vbasedev->migration; + + return migration->multifd_transfer; } bool vfio_multifd_setup(VFIODevice *vbasedev, bool alloc_multifd, Error **errp) { VFIOMigration *migration = vbasedev->migration; + if (vbasedev->migration_multifd_transfer == ON_OFF_AUTO_AUTO) { + migration->multifd_transfer = vfio_multifd_transfer_supported(); + } else { + migration->multifd_transfer = + vbasedev->migration_multifd_transfer == ON_OFF_AUTO_ON; + } + if (!vfio_multifd_transfer_enabled(vbasedev)) { /* Nothing further to check or do */ return true; } + if (!vfio_multifd_transfer_supported()) { + error_setg(errp, + "%s: Multifd device transfer requested but unsupported in the current config", + vbasedev->name); + return false; + } + if (alloc_multifd) { assert(!migration->multifd); migration->multifd = vfio_multifd_new(); diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index c1cee280ae..1bbf15cea3 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -3381,6 +3381,9 @@ static const Property vfio_pci_dev_properties[] = { VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false), DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice, vbasedev.enable_migration, ON_OFF_AUTO_AUTO), + DEFINE_PROP_ON_OFF_AUTO("x-migration-multifd-transfer", VFIOPCIDevice, + vbasedev.migration_multifd_transfer, + ON_OFF_AUTO_AUTO), DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice, vbasedev.migration_events, false), DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false), @@ -3553,6 +3556,10 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) "Skip config space check for Vendor Specific Capability. " "Setting to false will enforce strict checking of VSC content " "(DEBUG)"); + object_class_property_set_description(klass, /* 10.0 */ + "x-migration-multifd-transfer", + "Transfer this device state via " + "multifd channels when live migrating it"); } static const TypeInfo vfio_pci_dev_info = { diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 961931d9f4..04b123a6c9 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -91,6 +91,7 @@ typedef struct VFIOMigration { uint64_t mig_flags; uint64_t precopy_init_size; uint64_t precopy_dirty_size; + bool multifd_transfer; VFIOMultifd *multifd; bool initial_data_sent; @@ -153,6 +154,7 @@ typedef struct VFIODevice { bool no_mmap; bool ram_block_discard_allowed; OnOffAuto enable_migration; + OnOffAuto migration_multifd_transfer; bool migration_events; VFIODeviceOps *ops; unsigned int num_irqs; -- cgit 1.4.1 From 4c765ceaace4e7828e2790d8f4829f69989888de Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Tue, 4 Mar 2025 23:03:59 +0100 Subject: vfio/migration: Make x-migration-multifd-transfer VFIO property mutable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DEFINE_PROP_ON_OFF_AUTO() property isn't runtime-mutable so using it would mean that the source VM would need to decide upfront at startup time whether it wants to do a multifd device state transfer at some point. Source VM can run for a long time before being migrated so it is desirable to have a fallback mechanism to the old way of transferring VFIO device state if it turns to be necessary. This brings this property to the same mutability level as ordinary migration parameters, which too can be adjusted at the run time. Signed-off-by: Maciej S. Szmigiero Reviewed-by: Cédric Le Goater Link: https://lore.kernel.org/qemu-devel/f2f2d66bda477da3e6cb8c0311006cff36e8651d.1741124640.git.maciej.szmigiero@oracle.com Signed-off-by: Cédric Le Goater --- hw/vfio/migration-multifd.c | 4 ++++ hw/vfio/pci.c | 20 +++++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) (limited to 'hw/vfio/pci.c') diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c index aacddc503b..233724710b 100644 --- a/hw/vfio/migration-multifd.c +++ b/hw/vfio/migration-multifd.c @@ -485,6 +485,10 @@ bool vfio_multifd_setup(VFIODevice *vbasedev, bool alloc_multifd, Error **errp) { VFIOMigration *migration = vbasedev->migration; + /* + * Make a copy of this setting at the start in case it is changed + * mid-migration. + */ if (vbasedev->migration_multifd_transfer == ON_OFF_AUTO_AUTO) { migration->multifd_transfer = vfio_multifd_transfer_supported(); } else { diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 1bbf15cea3..fdbc15885d 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -3357,6 +3357,8 @@ static void vfio_instance_init(Object *obj) pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; } +static PropertyInfo vfio_pci_migration_multifd_transfer_prop; + static const Property vfio_pci_dev_properties[] = { DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host), DEFINE_PROP_UUID_NODEFAULT("vf-token", VFIOPCIDevice, vf_token), @@ -3381,9 +3383,10 @@ static const Property vfio_pci_dev_properties[] = { VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false), DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice, vbasedev.enable_migration, ON_OFF_AUTO_AUTO), - DEFINE_PROP_ON_OFF_AUTO("x-migration-multifd-transfer", VFIOPCIDevice, - vbasedev.migration_multifd_transfer, - ON_OFF_AUTO_AUTO), + DEFINE_PROP("x-migration-multifd-transfer", VFIOPCIDevice, + vbasedev.migration_multifd_transfer, + vfio_pci_migration_multifd_transfer_prop, OnOffAuto, + .set_default = true, .defval.i = ON_OFF_AUTO_AUTO), DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice, vbasedev.migration_events, false), DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false), @@ -3608,6 +3611,17 @@ static const TypeInfo vfio_pci_nohotplug_dev_info = { static void register_vfio_pci_dev_type(void) { + /* + * Ordinary ON_OFF_AUTO property isn't runtime-mutable, but source VM can + * run for a long time before being migrated so it is desirable to have a + * fallback mechanism to the old way of transferring VFIO device state if + * it turns to be necessary. + * The following makes this type of property have the same mutability level + * as ordinary migration parameters. + */ + vfio_pci_migration_multifd_transfer_prop = qdev_prop_on_off_auto; + vfio_pci_migration_multifd_transfer_prop.realized_set_allowed = true; + type_register_static(&vfio_pci_dev_info); type_register_static(&vfio_pci_nohotplug_dev_info); } -- cgit 1.4.1