diff options
61 files changed, 664 insertions, 428 deletions
diff --git a/configs/targets/s390x-linux-user.mak b/configs/targets/s390x-linux-user.mak index e2978248ed..24c04c8589 100644 --- a/configs/targets/s390x-linux-user.mak +++ b/configs/targets/s390x-linux-user.mak @@ -2,4 +2,4 @@ TARGET_ARCH=s390x TARGET_SYSTBL_ABI=common,64 TARGET_SYSTBL=syscall.tbl TARGET_BIG_ENDIAN=y -TARGET_XML_FILES= gdb-xml/s390x-core64.xml gdb-xml/s390-acr.xml gdb-xml/s390-fpr.xml gdb-xml/s390-vx.xml gdb-xml/s390-cr.xml gdb-xml/s390-virt.xml gdb-xml/s390-gs.xml +TARGET_XML_FILES= gdb-xml/s390x-core64.xml gdb-xml/s390-acr.xml gdb-xml/s390-fpr.xml gdb-xml/s390-vx.xml gdb-xml/s390-cr.xml gdb-xml/s390-virt.xml gdb-xml/s390-virt-kvm.xml gdb-xml/s390-gs.xml diff --git a/configs/targets/s390x-softmmu.mak b/configs/targets/s390x-softmmu.mak index 258b4cf358..70d2f9f0ba 100644 --- a/configs/targets/s390x-softmmu.mak +++ b/configs/targets/s390x-softmmu.mak @@ -1,4 +1,4 @@ TARGET_ARCH=s390x TARGET_BIG_ENDIAN=y TARGET_SUPPORTS_MTTCG=y -TARGET_XML_FILES= gdb-xml/s390x-core64.xml gdb-xml/s390-acr.xml gdb-xml/s390-fpr.xml gdb-xml/s390-vx.xml gdb-xml/s390-cr.xml gdb-xml/s390-virt.xml gdb-xml/s390-gs.xml +TARGET_XML_FILES= gdb-xml/s390x-core64.xml gdb-xml/s390-acr.xml gdb-xml/s390-fpr.xml gdb-xml/s390-vx.xml gdb-xml/s390-cr.xml gdb-xml/s390-virt.xml gdb-xml/s390-virt-kvm.xml gdb-xml/s390-gs.xml diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt index 343120f2ef..a3e949f6b3 100644 --- a/docs/devel/multiple-iothreads.txt +++ b/docs/devel/multiple-iothreads.txt @@ -61,6 +61,7 @@ There are several old APIs that use the main loop AioContext: * LEGACY qemu_aio_set_event_notifier() - monitor an event notifier * LEGACY timer_new_ms() - create a timer * LEGACY qemu_bh_new() - create a BH + * LEGACY qemu_bh_new_guarded() - create a BH with a device re-entrancy guard * LEGACY qemu_aio_wait() - run an event loop iteration Since they implicitly work on the main loop they cannot be used in code that @@ -72,8 +73,14 @@ Instead, use the AioContext functions directly (see include/block/aio.h): * aio_set_event_notifier() - monitor an event notifier * aio_timer_new() - create a timer * aio_bh_new() - create a BH + * aio_bh_new_guarded() - create a BH with a device re-entrancy guard * aio_poll() - run an event loop iteration +The qemu_bh_new_guarded/aio_bh_new_guarded APIs accept a "MemReentrancyGuard" +argument, which is used to check for and prevent re-entrancy problems. For +BHs associated with devices, the reentrancy-guard is contained in the +corresponding DeviceState and named "mem_reentrancy_guard". + The AioContext can be obtained from the IOThread using iothread_get_aio_context() or for the main loop using qemu_get_aio_context(). Code that takes an AioContext argument works both in IOThreads or the main diff --git a/gdb-xml/s390-virt-kvm.xml b/gdb-xml/s390-virt-kvm.xml new file mode 100644 index 0000000000..a256eddaf5 --- /dev/null +++ b/gdb-xml/s390-virt-kvm.xml @@ -0,0 +1,14 @@ +<?xml version="1.0"?> +<!-- Copyright 2023 IBM Corp. + + This work is licensed under the terms of the GNU GPL, version 2 or + (at your option) any later version. See the COPYING file in the + top-level directory. --> + +<!DOCTYPE feature SYSTEM "gdb-target.dtd"> +<feature name="org.gnu.gdb.s390.virt.kvm"> + <reg name="pp" bitsize="64" type="uint64" group="system"/> + <reg name="pfault_token" bitsize="64" type="uint64" group="system"/> + <reg name="pfault_select" bitsize="64" type="uint64" group="system"/> + <reg name="pfault_compare" bitsize="64" type="uint64" group="system"/> +</feature> diff --git a/gdb-xml/s390-virt.xml b/gdb-xml/s390-virt.xml index e2e9a7ad3c..438eb68aab 100644 --- a/gdb-xml/s390-virt.xml +++ b/gdb-xml/s390-virt.xml @@ -11,8 +11,4 @@ <reg name="cputm" bitsize="64" type="uint64" group="system"/> <reg name="last_break" bitsize="64" type="code_ptr" group="system"/> <reg name="prefix" bitsize="64" type="data_ptr" group="system"/> - <reg name="pp" bitsize="64" type="uint64" group="system"/> - <reg name="pfault_token" bitsize="64" type="uint64" group="system"/> - <reg name="pfault_select" bitsize="64" type="uint64" group="system"/> - <reg name="pfault_compare" bitsize="64" type="uint64" group="system"/> </feature> diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c index 74f3a05f88..0e266c552b 100644 --- a/hw/9pfs/xen-9p-backend.c +++ b/hw/9pfs/xen-9p-backend.c @@ -61,6 +61,7 @@ typedef struct Xen9pfsDev { int num_rings; Xen9pfsRing *rings; + MemReentrancyGuard mem_reentrancy_guard; } Xen9pfsDev; static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev); @@ -443,7 +444,9 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev) xen_9pdev->rings[i].ring.out = xen_9pdev->rings[i].data + XEN_FLEX_RING_SIZE(ring_order); - xen_9pdev->rings[i].bh = qemu_bh_new(xen_9pfs_bh, &xen_9pdev->rings[i]); + xen_9pdev->rings[i].bh = qemu_bh_new_guarded(xen_9pfs_bh, + &xen_9pdev->rings[i], + &xen_9pdev->mem_reentrancy_guard); xen_9pdev->rings[i].out_cons = 0; xen_9pdev->rings[i].out_size = 0; xen_9pdev->rings[i].inprogress = false; diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index b28d81737e..a6202997ee 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -127,7 +127,8 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, } else { s->ctx = qemu_get_aio_context(); } - s->bh = aio_bh_new(s->ctx, notify_guest_bh, s); + s->bh = aio_bh_new_guarded(s->ctx, notify_guest_bh, s, + &DEVICE(vdev)->mem_reentrancy_guard); s->batch_notify_vqs = bitmap_new(conf->num_queues); *dataplane = s; diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c index 734da42ea7..d8bc39d359 100644 --- a/hw/block/dataplane/xen-block.c +++ b/hw/block/dataplane/xen-block.c @@ -633,8 +633,9 @@ XenBlockDataPlane *xen_block_dataplane_create(XenDevice *xendev, } else { dataplane->ctx = qemu_get_aio_context(); } - dataplane->bh = aio_bh_new(dataplane->ctx, xen_block_dataplane_bh, - dataplane); + dataplane->bh = aio_bh_new_guarded(dataplane->ctx, xen_block_dataplane_bh, + dataplane, + &DEVICE(xendev)->mem_reentrancy_guard); return dataplane; } diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c index 7d4601cb5d..dd619f0731 100644 --- a/hw/char/virtio-serial-bus.c +++ b/hw/char/virtio-serial-bus.c @@ -985,7 +985,8 @@ static void virtser_port_device_realize(DeviceState *dev, Error **errp) return; } - port->bh = qemu_bh_new(flush_queued_data_bh, port); + port->bh = qemu_bh_new_guarded(flush_queued_data_bh, port, + &dev->mem_reentrancy_guard); port->elem = NULL; } diff --git a/hw/core/machine.c b/hw/core/machine.c index 2ce97a5d3b..47a34841a5 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -39,7 +39,9 @@ #include "hw/virtio/virtio.h" #include "hw/virtio/virtio-pci.h" -GlobalProperty hw_compat_8_0[] = {}; +GlobalProperty hw_compat_8_0[] = { + { "migration", "multifd-flush-after-each-section", "on"}, +}; const size_t hw_compat_8_0_len = G_N_ELEMENTS(hw_compat_8_0); GlobalProperty hw_compat_7_2[] = { diff --git a/hw/display/qxl.c b/hw/display/qxl.c index 80ce1e9a93..f1c0eb7dfc 100644 --- a/hw/display/qxl.c +++ b/hw/display/qxl.c @@ -2201,11 +2201,14 @@ static void qxl_realize_common(PCIQXLDevice *qxl, Error **errp) qemu_add_vm_change_state_handler(qxl_vm_change_state_handler, qxl); - qxl->update_irq = qemu_bh_new(qxl_update_irq_bh, qxl); + qxl->update_irq = qemu_bh_new_guarded(qxl_update_irq_bh, qxl, + &DEVICE(qxl)->mem_reentrancy_guard); qxl_reset_state(qxl); - qxl->update_area_bh = qemu_bh_new(qxl_render_update_area_bh, qxl); - qxl->ssd.cursor_bh = qemu_bh_new(qemu_spice_cursor_refresh_bh, &qxl->ssd); + qxl->update_area_bh = qemu_bh_new_guarded(qxl_render_update_area_bh, qxl, + &DEVICE(qxl)->mem_reentrancy_guard); + qxl->ssd.cursor_bh = qemu_bh_new_guarded(qemu_spice_cursor_refresh_bh, &qxl->ssd, + &DEVICE(qxl)->mem_reentrancy_guard); } static void qxl_realize_primary(PCIDevice *dev, Error **errp) diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c index 5e15c79b94..66ac9b6cc5 100644 --- a/hw/display/virtio-gpu.c +++ b/hw/display/virtio-gpu.c @@ -1339,8 +1339,10 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp) g->ctrl_vq = virtio_get_queue(vdev, 0); g->cursor_vq = virtio_get_queue(vdev, 1); - g->ctrl_bh = qemu_bh_new(virtio_gpu_ctrl_bh, g); - g->cursor_bh = qemu_bh_new(virtio_gpu_cursor_bh, g); + g->ctrl_bh = qemu_bh_new_guarded(virtio_gpu_ctrl_bh, g, + &qdev->mem_reentrancy_guard); + g->cursor_bh = qemu_bh_new_guarded(virtio_gpu_cursor_bh, g, + &qdev->mem_reentrancy_guard); QTAILQ_INIT(&g->reslist); QTAILQ_INIT(&g->cmdq); QTAILQ_INIT(&g->fenceq); diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index 55902e1df7..4e76d6b191 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -1509,7 +1509,8 @@ static void ahci_cmd_done(const IDEDMA *dma) ahci_write_fis_d2h(ad); if (ad->port_regs.cmd_issue && !ad->check_bh) { - ad->check_bh = qemu_bh_new(ahci_check_cmd_bh, ad); + ad->check_bh = qemu_bh_new_guarded(ahci_check_cmd_bh, ad, + &ad->mem_reentrancy_guard); qemu_bh_schedule(ad->check_bh); } } diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci_internal.h index 303fcd7235..2480455372 100644 --- a/hw/ide/ahci_internal.h +++ b/hw/ide/ahci_internal.h @@ -321,6 +321,7 @@ struct AHCIDevice { bool init_d2h_sent; AHCICmdHdr *cur_cmd; NCQTransferState ncq_tfs[AHCI_MAX_CMDS]; + MemReentrancyGuard mem_reentrancy_guard; }; struct AHCIPCIState { diff --git a/hw/ide/core.c b/hw/ide/core.c index 45d14a25e9..de48ff9f86 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -513,6 +513,7 @@ BlockAIOCB *ide_issue_trim( BlockCompletionFunc *cb, void *cb_opaque, void *opaque) { IDEState *s = opaque; + IDEDevice *dev = s->unit ? s->bus->slave : s->bus->master; TrimAIOCB *iocb; /* Paired with a decrement in ide_trim_bh_cb() */ @@ -520,7 +521,8 @@ BlockAIOCB *ide_issue_trim( iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque); iocb->s = s; - iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb); + iocb->bh = qemu_bh_new_guarded(ide_trim_bh_cb, iocb, + &DEVICE(dev)->mem_reentrancy_guard); iocb->ret = 0; iocb->qiov = qiov; iocb->i = -1; diff --git a/hw/intc/apic.c b/hw/intc/apic.c index 20b5a94073..ac3d47d231 100644 --- a/hw/intc/apic.c +++ b/hw/intc/apic.c @@ -885,6 +885,13 @@ static void apic_realize(DeviceState *dev, Error **errp) memory_region_init_io(&s->io_memory, OBJECT(s), &apic_io_ops, s, "apic-msi", APIC_SPACE_SIZE); + /* + * apic-msi's apic_mem_write can call into ioapic_eoi_broadcast, which can + * write back to apic-msi. As such mark the apic-msi region re-entrancy + * safe. + */ + s->io_memory.disable_reentrancy_guard = true; + s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, apic_timer, s); local_apics[s->id] = s; diff --git a/hw/misc/bcm2835_property.c b/hw/misc/bcm2835_property.c index 890ae7bae5..de056ea2df 100644 --- a/hw/misc/bcm2835_property.c +++ b/hw/misc/bcm2835_property.c @@ -382,6 +382,13 @@ static void bcm2835_property_init(Object *obj) memory_region_init_io(&s->iomem, OBJECT(s), &bcm2835_property_ops, s, TYPE_BCM2835_PROPERTY, 0x10); + + /* + * bcm2835_property_ops call into bcm2835_mbox, which in-turn reads from + * iomem. As such, mark iomem as re-entracy safe. + */ + s->iomem.disable_reentrancy_guard = true; + sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem); sysbus_init_irq(SYS_BUS_DEVICE(s), &s->mbox_irq); } diff --git a/hw/misc/imx_rngc.c b/hw/misc/imx_rngc.c index 632c03779c..082c6980ad 100644 --- a/hw/misc/imx_rngc.c +++ b/hw/misc/imx_rngc.c @@ -228,8 +228,10 @@ static void imx_rngc_realize(DeviceState *dev, Error **errp) sysbus_init_mmio(sbd, &s->iomem); sysbus_init_irq(sbd, &s->irq); - s->self_test_bh = qemu_bh_new(imx_rngc_self_test, s); - s->seed_bh = qemu_bh_new(imx_rngc_seed, s); + s->self_test_bh = qemu_bh_new_guarded(imx_rngc_self_test, s, + &dev->mem_reentrancy_guard); + s->seed_bh = qemu_bh_new_guarded(imx_rngc_seed, s, + &dev->mem_reentrancy_guard); } static void imx_rngc_reset(DeviceState *dev) diff --git a/hw/misc/macio/mac_dbdma.c b/hw/misc/macio/mac_dbdma.c index 43bb1f56ba..80a789f32b 100644 --- a/hw/misc/macio/mac_dbdma.c +++ b/hw/misc/macio/mac_dbdma.c @@ -914,7 +914,7 @@ static void mac_dbdma_realize(DeviceState *dev, Error **errp) { DBDMAState *s = MAC_DBDMA(dev); - s->bh = qemu_bh_new(DBDMA_run_bh, s); + s->bh = qemu_bh_new_guarded(DBDMA_run_bh, s, &dev->mem_reentrancy_guard); } static void mac_dbdma_class_init(ObjectClass *oc, void *data) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 53e1c32643..447f669921 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -2917,7 +2917,8 @@ static void virtio_net_add_queue(VirtIONet *n, int index) n->vqs[index].tx_vq = virtio_add_queue(vdev, n->net_conf.tx_queue_size, virtio_net_handle_tx_bh); - n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]); + n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index], + &DEVICE(vdev)->mem_reentrancy_guard); } n->vqs[index].tx_waiting = 0; diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index f59dfe1cbe..fd917fcda1 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -4607,7 +4607,8 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr, QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry); } - sq->bh = qemu_bh_new(nvme_process_sq, sq); + sq->bh = qemu_bh_new_guarded(nvme_process_sq, sq, + &DEVICE(sq->ctrl)->mem_reentrancy_guard); if (n->dbbuf_enabled) { sq->db_addr = n->dbbuf_dbs + (sqid << 3); @@ -5253,7 +5254,8 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr, } } n->cq[cqid] = cq; - cq->bh = qemu_bh_new(nvme_post_cqes, cq); + cq->bh = qemu_bh_new_guarded(nvme_post_cqes, cq, + &DEVICE(cq->ctrl)->mem_reentrancy_guard); } static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req) diff --git a/hw/pci-host/raven.c b/hw/pci-host/raven.c index 072ffe3c5e..9a11ac4b2b 100644 --- a/hw/pci-host/raven.c +++ b/hw/pci-host/raven.c @@ -294,6 +294,13 @@ static void raven_pcihost_initfn(Object *obj) memory_region_init(&s->pci_memory, obj, "pci-memory", 0x3f000000); address_space_init(&s->pci_io_as, &s->pci_io, "raven-io"); + /* + * Raven's raven_io_ops use the address-space API to access pci-conf-idx + * (which is also owned by the raven device). As such, mark the + * pci_io_non_contiguous as re-entrancy safe. + */ + s->pci_io_non_contiguous.disable_reentrancy_guard = true; + /* CPU address space */ memory_region_add_subregion(address_space_mem, PCI_IO_BASE_ADDR, &s->pci_io); diff --git a/hw/rdma/Kconfig b/hw/rdma/Kconfig index 8e2211288f..840320bdc0 100644 --- a/hw/rdma/Kconfig +++ b/hw/rdma/Kconfig @@ -1,3 +1,3 @@ config VMW_PVRDMA default y if PCI_DEVICES - depends on PVRDMA && PCI && MSI_NONBROKEN + depends on PVRDMA && MSI_NONBROKEN && VMXNET3_PCI diff --git a/hw/rdma/meson.build b/hw/rdma/meson.build index 7325f40c32..fc7917192f 100644 --- a/hw/rdma/meson.build +++ b/hw/rdma/meson.build @@ -1,10 +1,12 @@ -specific_ss.add(when: 'CONFIG_VMW_PVRDMA', if_true: files( +softmmu_ss.add(when: 'CONFIG_VMW_PVRDMA', if_true: files( 'rdma.c', 'rdma_backend.c', - 'rdma_rm.c', 'rdma_utils.c', + 'vmw/pvrdma_qp_ops.c', +)) +specific_ss.add(when: 'CONFIG_VMW_PVRDMA', if_true: files( + 'rdma_rm.c', 'vmw/pvrdma_cmd.c', 'vmw/pvrdma_dev_ring.c', 'vmw/pvrdma_main.c', - 'vmw/pvrdma_qp_ops.c', )) diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c index cfd85de3e6..038d564433 100644 --- a/hw/rdma/rdma_rm.c +++ b/hw/rdma/rdma_rm.c @@ -23,10 +23,6 @@ #include "rdma_backend.h" #include "rdma_rm.h" -/* Page directory and page tables */ -#define PG_DIR_SZ { TARGET_PAGE_SIZE / sizeof(__u64) } -#define PG_TBL_SZ { TARGET_PAGE_SIZE / sizeof(__u64) } - void rdma_format_device_counters(RdmaDeviceResources *dev_res, GString *buf) { g_string_append_printf(buf, "\ttx : %" PRId64 "\n", diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c index af93557a9a..db27872963 100644 --- a/hw/scsi/lsi53c895a.c +++ b/hw/scsi/lsi53c895a.c @@ -2302,6 +2302,12 @@ static void lsi_scsi_realize(PCIDevice *dev, Error **errp) memory_region_init_io(&s->io_io, OBJECT(s), &lsi_io_ops, s, "lsi-io", 256); + /* + * Since we use the address-space API to interact with ram_io, disable the + * re-entrancy guard. + */ + s->ram_io.disable_reentrancy_guard = true; + address_space_init(&s->pci_io_as, pci_address_space_io(dev), "lsi-pci-io"); qdev_init_gpio_out(d, &s->ext_irq, 1); diff --git a/hw/scsi/mptsas.c b/hw/scsi/mptsas.c index c485da792c..3de288b454 100644 --- a/hw/scsi/mptsas.c +++ b/hw/scsi/mptsas.c @@ -1322,7 +1322,8 @@ static void mptsas_scsi_realize(PCIDevice *dev, Error **errp) } s->max_devices = MPTSAS_NUM_PORTS; - s->request_bh = qemu_bh_new(mptsas_fetch_requests, s); + s->request_bh = qemu_bh_new_guarded(mptsas_fetch_requests, s, + &DEVICE(dev)->mem_reentrancy_guard); scsi_bus_init(&s->bus, sizeof(s->bus), &dev->qdev, &mptsas_scsi_info); } diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c index c97176110c..3c20b47ad0 100644 --- a/hw/scsi/scsi-bus.c +++ b/hw/scsi/scsi-bus.c @@ -193,7 +193,8 @@ static void scsi_dma_restart_cb(void *opaque, bool running, RunState state) AioContext *ctx = blk_get_aio_context(s->conf.blk); /* The reference is dropped in scsi_dma_restart_bh.*/ object_ref(OBJECT(s)); - s->bh = aio_bh_new(ctx, scsi_dma_restart_bh, s); + s->bh = aio_bh_new_guarded(ctx, scsi_dma_restart_bh, s, + &DEVICE(s)->mem_reentrancy_guard); qemu_bh_schedule(s->bh); } } diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c index fa76696855..4de34536e9 100644 --- a/hw/scsi/vmw_pvscsi.c +++ b/hw/scsi/vmw_pvscsi.c @@ -1184,7 +1184,8 @@ pvscsi_realizefn(PCIDevice *pci_dev, Error **errp) pcie_endpoint_cap_init(pci_dev, PVSCSI_EXP_EP_OFFSET); } - s->completion_worker = qemu_bh_new(pvscsi_process_completion_queue, s); + s->completion_worker = qemu_bh_new_guarded(pvscsi_process_completion_queue, s, + &DEVICE(pci_dev)->mem_reentrancy_guard); scsi_bus_init(&s->bus, sizeof(s->bus), DEVICE(pci_dev), &pvscsi_scsi_info); /* override default SCSI bus hotplug-handler, with pvscsi's one */ diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c index 88f99c05d5..f013ded91e 100644 --- a/hw/usb/dev-uas.c +++ b/hw/usb/dev-uas.c @@ -937,7 +937,8 @@ static void usb_uas_realize(USBDevice *dev, Error **errp) QTAILQ_INIT(&uas->results); QTAILQ_INIT(&uas->requests); - uas->status_bh = qemu_bh_new(usb_uas_send_status_bh, uas); + uas->status_bh = qemu_bh_new_guarded(usb_uas_send_status_bh, uas, + &d->mem_reentrancy_guard); dev->flags |= (1 << USB_DEV_FLAG_IS_SCSI_STORAGE); scsi_bus_init(&uas->bus, sizeof(uas->bus), DEVICE(dev), &usb_uas_scsi_info); diff --git a/hw/usb/hcd-dwc2.c b/hw/usb/hcd-dwc2.c index 8755e9cbb0..a0c4e782b2 100644 --- a/hw/usb/hcd-dwc2.c +++ b/hw/usb/hcd-dwc2.c @@ -1364,7 +1364,8 @@ static void dwc2_realize(DeviceState *dev, Error **errp) s->fi = USB_FRMINTVL - 1; s->eof_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_frame_boundary, s); s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_work_timer, s); - s->async_bh = qemu_bh_new(dwc2_work_bh, s); + s->async_bh = qemu_bh_new_guarded(dwc2_work_bh, s, + &dev->mem_reentrancy_guard); sysbus_init_irq(sbd, &s->irq); } diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c index d4da8dcb8d..c930c60921 100644 --- a/hw/usb/hcd-ehci.c +++ b/hw/usb/hcd-ehci.c @@ -2533,7 +2533,8 @@ void usb_ehci_realize(EHCIState *s, DeviceState *dev, Error **errp) } s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ehci_work_timer, s); - s->async_bh = qemu_bh_new(ehci_work_bh, s); + s->async_bh = qemu_bh_new_guarded(ehci_work_bh, s, + &dev->mem_reentrancy_guard); s->device = dev; s->vmstate = qemu_add_vm_change_state_handler(usb_ehci_vm_state_change, s); diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c index 8ac1175ad2..77baaa7a6b 100644 --- a/hw/usb/hcd-uhci.c +++ b/hw/usb/hcd-uhci.c @@ -1190,7 +1190,7 @@ void usb_uhci_common_realize(PCIDevice *dev, Error **errp) USB_SPEED_MASK_LOW | USB_SPEED_MASK_FULL); } } - s->bh = qemu_bh_new(uhci_bh, s); + s->bh = qemu_bh_new_guarded(uhci_bh, s, &DEVICE(dev)->mem_reentrancy_guard); s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, uhci_frame_timer, s); s->num_ports_vmstate = NB_PORTS; QTAILQ_INIT(&s->queues); diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c index 176868d345..f500db85ab 100644 --- a/hw/usb/host-libusb.c +++ b/hw/usb/host-libusb.c @@ -1141,7 +1141,8 @@ static void usb_host_nodev_bh(void *opaque) static void usb_host_nodev(USBHostDevice *s) { if (!s->bh_nodev) { - s->bh_nodev = qemu_bh_new(usb_host_nodev_bh, s); + s->bh_nodev = qemu_bh_new_guarded(usb_host_nodev_bh, s, + &DEVICE(s)->mem_reentrancy_guard); } qemu_bh_schedule(s->bh_nodev); } @@ -1739,7 +1740,8 @@ static int usb_host_post_load(void *opaque, int version_id) USBHostDevice *dev = opaque; if (!dev->bh_postld) { - dev->bh_postld = qemu_bh_new(usb_host_post_load_bh, dev); + dev->bh_postld = qemu_bh_new_guarded(usb_host_post_load_bh, dev, + &DEVICE(dev)->mem_reentrancy_guard); } qemu_bh_schedule(dev->bh_postld); dev->bh_postld_pending = true; diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c index fd7df599bc..39fbaaab16 100644 --- a/hw/usb/redirect.c +++ b/hw/usb/redirect.c @@ -1441,8 +1441,10 @@ static void usbredir_realize(USBDevice *udev, Error **errp) } } - dev->chardev_close_bh = qemu_bh_new(usbredir_chardev_close_bh, dev); - dev->device_reject_bh = qemu_bh_new(usbredir_device_reject_bh, dev); + dev->chardev_close_bh = qemu_bh_new_guarded(usbredir_chardev_close_bh, dev, + &DEVICE(dev)->mem_reentrancy_guard); + dev->device_reject_bh = qemu_bh_new_guarded(usbredir_device_reject_bh, dev, + &DEVICE(dev)->mem_reentrancy_guard); dev->attach_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, usbredir_do_attach, dev); packet_id_queue_init(&dev->cancelled, dev, "cancelled"); diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c index 66cb3f7c24..38ee660a30 100644 --- a/hw/usb/xen-usb.c +++ b/hw/usb/xen-usb.c @@ -1032,7 +1032,8 @@ static void usbback_alloc(struct XenLegacyDevice *xendev) QTAILQ_INIT(&usbif->req_free_q); QSIMPLEQ_INIT(&usbif->hotplug_q); - usbif->bh = qemu_bh_new(usbback_bh, usbif); + usbif->bh = qemu_bh_new_guarded(usbback_bh, usbif, + &DEVICE(xendev)->mem_reentrancy_guard); } static int usbback_free(struct XenLegacyDevice *xendev) diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index fd06fcfb3f..d004cf29d2 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -886,8 +886,9 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp) precopy_add_notifier(&s->free_page_hint_notify); object_ref(OBJECT(s->iothread)); - s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread), - virtio_ballloon_get_free_page_hints, s); + s->free_page_bh = aio_bh_new_guarded(iothread_get_aio_context(s->iothread), + virtio_ballloon_get_free_page_hints, s, + &dev->mem_reentrancy_guard); } if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_REPORTING)) { diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c index 802e1b9659..2fe804510f 100644 --- a/hw/virtio/virtio-crypto.c +++ b/hw/virtio/virtio-crypto.c @@ -1074,7 +1074,8 @@ static void virtio_crypto_device_realize(DeviceState *dev, Error **errp) vcrypto->vqs[i].dataq = virtio_add_queue(vdev, 1024, virtio_crypto_handle_dataq_bh); vcrypto->vqs[i].dataq_bh = - qemu_bh_new(virtio_crypto_dataq_bh, &vcrypto->vqs[i]); + qemu_bh_new_guarded(virtio_crypto_dataq_bh, &vcrypto->vqs[i], + &dev->mem_reentrancy_guard); vcrypto->vqs[i].vcrypto = vcrypto; } diff --git a/include/block/aio.h b/include/block/aio.h index e267d918fd..89bbc536f9 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -23,6 +23,8 @@ #include "qemu/thread.h" #include "qemu/timer.h" #include "block/graph-lock.h" +#include "hw/qdev-core.h" + typedef struct BlockAIOCB BlockAIOCB; typedef void BlockCompletionFunc(void *opaque, int ret); @@ -323,9 +325,11 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, * is opaque and must be allocated prior to its use. * * @name: A human-readable identifier for debugging purposes. + * @reentrancy_guard: A guard set when entering a cb to prevent + * device-reentrancy issues */ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, - const char *name); + const char *name, MemReentrancyGuard *reentrancy_guard); /** * aio_bh_new: Allocate a new bottom half structure @@ -334,7 +338,17 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, * string. */ #define aio_bh_new(ctx, cb, opaque) \ - aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb))) + aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), NULL) + +/** + * aio_bh_new_guarded: Allocate a new bottom half structure with a + * reentrancy_guard + * + * A convenience wrapper for aio_bh_new_full() that uses the cb as the name + * string. + */ +#define aio_bh_new_guarded(ctx, cb, opaque, guard) \ + aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), guard) /** * aio_notify: Force processing of pending events. diff --git a/include/exec/memory.h b/include/exec/memory.h index 15ade918ba..e45ce6061f 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -767,6 +767,8 @@ struct MemoryRegion { bool is_iommu; RAMBlock *ram_block; Object *owner; + /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR access hotpath */ + DeviceState *dev; const MemoryRegionOps *ops; void *opaque; @@ -791,6 +793,9 @@ struct MemoryRegion { unsigned ioeventfd_nb; MemoryRegionIoeventfd *ioeventfds; RamDiscardManager *rdm; /* Only for RAM */ + + /* For devices designed to perform re-entrant IO into their own IO MRs */ + bool disable_reentrancy_guard; }; struct IOMMUMemoryRegion { diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index bd50ad5ee1..7623703943 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -162,6 +162,10 @@ struct NamedClockList { QLIST_ENTRY(NamedClockList) node; }; +typedef struct { + bool engaged_in_io; +} MemReentrancyGuard; + /** * DeviceState: * @realized: Indicates whether the device has been fully constructed. @@ -194,6 +198,9 @@ struct DeviceState { int alias_required_for_version; ResettableState reset; GSList *unplug_blockers; + + /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */ + MemReentrancyGuard mem_reentrancy_guard; }; struct DeviceListener { diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h index b3e54e00bc..68e70e61aa 100644 --- a/include/qemu/main-loop.h +++ b/include/qemu/main-loop.h @@ -387,9 +387,12 @@ void qemu_cond_timedwait_iothread(QemuCond *cond, int ms); /* internal interfaces */ +#define qemu_bh_new_guarded(cb, opaque, guard) \ + qemu_bh_new_full((cb), (opaque), (stringify(cb)), guard) #define qemu_bh_new(cb, opaque) \ - qemu_bh_new_full((cb), (opaque), (stringify(cb))) -QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name); + qemu_bh_new_full((cb), (opaque), (stringify(cb)), NULL) +QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name, + MemReentrancyGuard *reentrancy_guard); void qemu_bh_schedule_idle(QEMUBH *bh); enum { diff --git a/include/qemu/stats64.h b/include/qemu/stats64.h index 802402254b..99b5cb724a 100644 --- a/include/qemu/stats64.h +++ b/include/qemu/stats64.h @@ -40,6 +40,11 @@ static inline uint64_t stat64_get(const Stat64 *s) return qatomic_read__nocheck(&s->value); } +static inline void stat64_set(Stat64 *s, uint64_t value) +{ + qatomic_set__nocheck(&s->value, value); +} + static inline void stat64_add(Stat64 *s, uint64_t value) { qatomic_add(&s->value, value); @@ -62,6 +67,7 @@ static inline void stat64_max(Stat64 *s, uint64_t value) } #else uint64_t stat64_get(const Stat64 *s); +void stat64_set(Stat64 *s, uint64_t value); bool stat64_min_slow(Stat64 *s, uint64_t value); bool stat64_max_slow(Stat64 *s, uint64_t value); bool stat64_add32_carry(Stat64 *s, uint32_t low, uint32_t high); diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c index a6ffae0002..6624f39bc6 100644 --- a/migration/block-dirty-bitmap.c +++ b/migration/block-dirty-bitmap.c @@ -605,11 +605,12 @@ static int init_dirty_bitmap_migration(DBMSaveState *s) SaveBitmapState *dbms; GHashTable *handled_by_blk = g_hash_table_new(NULL, NULL); BlockBackend *blk; - const MigrationParameters *mig_params = &migrate_get_current()->parameters; GHashTable *alias_map = NULL; + const BitmapMigrationNodeAliasList *block_bitmap_mapping = + migrate_block_bitmap_mapping(); - if (mig_params->has_block_bitmap_mapping) { - alias_map = construct_alias_map(mig_params->block_bitmap_mapping, true, + if (block_bitmap_mapping) { + alias_map = construct_alias_map(block_bitmap_mapping, true, &error_abort); } @@ -1158,7 +1159,8 @@ static int dirty_bitmap_load_header(QEMUFile *f, DBMLoadState *s, static int dirty_bitmap_load(QEMUFile *f, void *opaque, int version_id) { GHashTable *alias_map = NULL; - const MigrationParameters *mig_params = &migrate_get_current()->parameters; + const BitmapMigrationNodeAliasList *block_bitmap_mapping = + migrate_block_bitmap_mapping(); DBMLoadState *s = &((DBMState *)opaque)->load; int ret = 0; @@ -1170,8 +1172,8 @@ static int dirty_bitmap_load(QEMUFile *f, void *opaque, int version_id) return -EINVAL; } - if (mig_params->has_block_bitmap_mapping) { - alias_map = construct_alias_map(mig_params->block_bitmap_mapping, + if (block_bitmap_mapping) { + alias_map = construct_alias_map(block_bitmap_mapping, false, &error_abort); } diff --git a/migration/migration.c b/migration/migration.c index 22e8586623..abcadbb619 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -52,8 +52,6 @@ #include "io/channel-tls.h" #include "migration/colo.h" #include "hw/boards.h" -#include "hw/qdev-properties.h" -#include "hw/qdev-properties-system.h" #include "monitor/monitor.h" #include "net/announce.h" #include "qemu/queue.h" @@ -65,51 +63,6 @@ #include "sysemu/qtest.h" #include "options.h" -#define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ - -/* Time in milliseconds we are allowed to stop the source, - * for sending the last part */ -#define DEFAULT_MIGRATE_SET_DOWNTIME 300 - -/* Default compression thread count */ -#define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 -/* Default decompression thread count, usually decompression is at - * least 4 times as fast as compression.*/ -#define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2 -/*0: means nocompress, 1: best speed, ... 9: best compress ratio */ -#define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 -/* Define default autoconverge cpu throttle migration parameters */ -#define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50 -#define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20 -#define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10 -#define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99 - -/* Migration XBZRLE default cache size */ -#define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024) - -/* The delay time (in ms) between two COLO checkpoints */ -#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100) -#define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 -#define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE -/* 0: means nocompress, 1: best speed, ... 9: best compress ratio */ -#define DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL 1 -/* 0: means nocompress, 1: best speed, ... 20: best compress ratio */ -#define DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL 1 - -/* Background transfer rate for postcopy, 0 means unlimited, note - * that page requests can still exceed this limit. - */ -#define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0 - -/* - * Parameters for self_announce_delay giving a stream of RARP/ARP - * packets after migration. - */ -#define DEFAULT_MIGRATE_ANNOUNCE_INITIAL 50 -#define DEFAULT_MIGRATE_ANNOUNCE_MAX 550 -#define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS 5 -#define DEFAULT_MIGRATE_ANNOUNCE_STEP 100 - static NotifierList migration_state_notifiers = NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); @@ -1005,7 +958,8 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) if (s->state != MIGRATION_STATUS_COMPLETED) { info->ram->remaining = ram_bytes_remaining(); - info->ram->dirty_pages_rate = ram_counters.dirty_pages_rate; + info->ram->dirty_pages_rate = + stat64_get(&ram_counters.dirty_pages_rate); } } @@ -1164,21 +1118,6 @@ void migrate_set_state(int *state, int old_state, int new_state) } } -static void migrate_set_block_incremental(MigrationState *s, bool value) -{ - s->parameters.block_incremental = value; -} - -static void block_cleanup_parameters(MigrationState *s) -{ - if (s->must_remove_block_options) { - /* setting to false can never fail */ - migrate_cap_set(MIGRATION_CAPABILITY_BLOCK, false, &error_abort); - migrate_set_block_incremental(s, false); - s->must_remove_block_options = false; - } -} - static void migrate_fd_cleanup(MigrationState *s) { qemu_bh_delete(s->cleanup_bh); @@ -1233,7 +1172,7 @@ static void migrate_fd_cleanup(MigrationState *s) error_report_err(error_copy(s->error)); } notifier_list_notify(&migration_state_notifiers, s); - block_cleanup_parameters(s); + block_cleanup_parameters(); yank_unregister_instance(MIGRATION_YANK_INSTANCE); } @@ -1668,7 +1607,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, } if (blk_inc) { - migrate_set_block_incremental(s, true); + migrate_set_block_incremental(true); } migrate_init(s); @@ -1727,7 +1666,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk, "a valid migration protocol"); migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_FAILED); - block_cleanup_parameters(s); + block_cleanup_parameters(); return; } @@ -2737,7 +2676,7 @@ static void migration_update_counters(MigrationState *s, transferred = current_bytes - s->iteration_initial_bytes; time_spent = current_time - s->iteration_start_time; bandwidth = (double)transferred / time_spent; - s->threshold_size = bandwidth * s->parameters.downtime_limit; + s->threshold_size = bandwidth * migrate_downtime_limit(); s->mbps = (((double) transferred * 8.0) / ((double) time_spent / 1000.0)) / 1000.0 / 1000.0; @@ -2751,8 +2690,10 @@ static void migration_update_counters(MigrationState *s, * if we haven't sent anything, we don't want to * recalculate. 10000 is a small enough number for our purposes */ - if (ram_counters.dirty_pages_rate && transferred > 10000) { - s->expected_downtime = ram_counters.remaining / bandwidth; + if (stat64_get(&ram_counters.dirty_pages_rate) && + transferred > 10000) { + s->expected_downtime = + stat64_get(&ram_counters.dirty_bytes_last_sync) / bandwidth; } qemu_file_reset_rate_limit(s->to_dst_file); @@ -3244,7 +3185,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) */ migrate_error_free(s); - s->expected_downtime = s->parameters.downtime_limit; + s->expected_downtime = migrate_downtime_limit(); if (resume) { assert(s->cleanup_bh); } else { @@ -3332,116 +3273,6 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) s->migration_thread_running = true; } -#define DEFINE_PROP_MIG_CAP(name, x) \ - DEFINE_PROP_BOOL(name, MigrationState, capabilities[x], false) - -static Property migration_properties[] = { - DEFINE_PROP_BOOL("store-global-state", MigrationState, - store_global_state, true), - DEFINE_PROP_BOOL("send-configuration", MigrationState, - send_configuration, true), - DEFINE_PROP_BOOL("send-section-footer", MigrationState, - send_section_footer, true), - DEFINE_PROP_BOOL("decompress-error-check", MigrationState, - decompress_error_check, true), - DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, - clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), - DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState, - preempt_pre_7_2, false), - - /* Migration parameters */ - DEFINE_PROP_UINT8("x-compress-level", MigrationState, - parameters.compress_level, - DEFAULT_MIGRATE_COMPRESS_LEVEL), - DEFINE_PROP_UINT8("x-compress-threads", MigrationState, - parameters.compress_threads, - DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT), - DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState, - parameters.compress_wait_thread, true), - DEFINE_PROP_UINT8("x-decompress-threads", MigrationState, - parameters.decompress_threads, - DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT), - DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState, - parameters.throttle_trigger_threshold, - DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD), - DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState, - parameters.cpu_throttle_initial, - DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL), - DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState, - parameters.cpu_throttle_increment, - DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT), - DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState, - parameters.cpu_throttle_tailslow, false), - DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState, - parameters.max_bandwidth, MAX_THROTTLE), - DEFINE_PROP_UINT64("x-downtime-limit", MigrationState, - parameters.downtime_limit, - DEFAULT_MIGRATE_SET_DOWNTIME), - DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState, - parameters.x_checkpoint_delay, - DEFAULT_MIGRATE_X_CHECKPOINT_DELAY), - DEFINE_PROP_UINT8("multifd-channels", MigrationState, - parameters.multifd_channels, - DEFAULT_MIGRATE_MULTIFD_CHANNELS), - DEFINE_PROP_MULTIFD_COMPRESSION("multifd-compression", MigrationState, - parameters.multifd_compression, - DEFAULT_MIGRATE_MULTIFD_COMPRESSION), - DEFINE_PROP_UINT8("multifd-zlib-level", MigrationState, - parameters.multifd_zlib_level, - DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL), - DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, - parameters.multifd_zstd_level, - DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), - DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, - parameters.xbzrle_cache_size, - DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), - DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState, - parameters.max_postcopy_bandwidth, - DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH), - DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState, - parameters.max_cpu_throttle, - DEFAULT_MIGRATE_MAX_CPU_THROTTLE), - DEFINE_PROP_SIZE("announce-initial", MigrationState, - parameters.announce_initial, - DEFAULT_MIGRATE_ANNOUNCE_INITIAL), - DEFINE_PROP_SIZE("announce-max", MigrationState, - parameters.announce_max, - DEFAULT_MIGRATE_ANNOUNCE_MAX), - DEFINE_PROP_SIZE("announce-rounds", MigrationState, - parameters.announce_rounds, - DEFAULT_MIGRATE_ANNOUNCE_ROUNDS), - DEFINE_PROP_SIZE("announce-step", MigrationState, - parameters.announce_step, - DEFAULT_MIGRATE_ANNOUNCE_STEP), - DEFINE_PROP_STRING("tls-creds", MigrationState, parameters.tls_creds), - DEFINE_PROP_STRING("tls-hostname", MigrationState, parameters.tls_hostname), - DEFINE_PROP_STRING("tls-authz", MigrationState, parameters.tls_authz), - - /* Migration capabilities */ - DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), - DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL), - DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE), - DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS), - DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS), - DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS), - DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM), - DEFINE_PROP_MIG_CAP("x-postcopy-preempt", - MIGRATION_CAPABILITY_POSTCOPY_PREEMPT), - DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO), - DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM), - DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK), - DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH), - DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), - DEFINE_PROP_MIG_CAP("x-background-snapshot", - MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT), -#ifdef CONFIG_LINUX - DEFINE_PROP_MIG_CAP("x-zero-copy-send", - MIGRATION_CAPABILITY_ZERO_COPY_SEND), -#endif - - DEFINE_PROP_END_OF_LIST(), -}; - static void migration_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -3470,7 +3301,6 @@ static void migration_instance_finalize(Object *obj) static void migration_instance_init(Object *obj) { MigrationState *ms = MIGRATION_OBJ(obj); - MigrationParameters *params = &ms->parameters; ms->state = MIGRATION_STATUS_NONE; ms->mbps = -1; @@ -3478,33 +3308,7 @@ static void migration_instance_init(Object *obj) qemu_sem_init(&ms->pause_sem, 0); qemu_mutex_init(&ms->error_mutex); - params->tls_hostname = g_strdup(""); - params->tls_creds = g_strdup(""); - - /* Set has_* up only for parameter checks */ - params->has_compress_level = true; - params->has_compress_threads = true; - params->has_compress_wait_thread = true; - params->has_decompress_threads = true; - params->has_throttle_trigger_threshold = true; - params->has_cpu_throttle_initial = true; - params->has_cpu_throttle_increment = true; - params->has_cpu_throttle_tailslow = true; - params->has_max_bandwidth = true; - params->has_downtime_limit = true; - params->has_x_checkpoint_delay = true; - params->has_block_incremental = true; - params->has_multifd_channels = true; - params->has_multifd_compression = true; - params->has_multifd_zlib_level = true; - params->has_multifd_zstd_level = true; - params->has_xbzrle_cache_size = true; - params->has_max_postcopy_bandwidth = true; - params->has_max_cpu_throttle = true; - params->has_announce_initial = true; - params->has_announce_max = true; - params->has_announce_rounds = true; - params->has_announce_step = true; + migrate_params_init(&ms->parameters); qemu_sem_init(&ms->postcopy_pause_sem, 0); qemu_sem_init(&ms->postcopy_pause_rp_sem, 0); diff --git a/migration/migration.h b/migration/migration.h index 2b71df8617..3a918514e7 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -405,6 +405,17 @@ struct MigrationState { bool preempt_pre_7_2; /* + * flush every channel after each section sent. + * + * This assures that we can't mix pages from one iteration through + * ram pages with pages for the following iteration. We really + * only need to do this flush after we have go through all the + * dirty pages. For historical reasons, we do that after each + * section. This is suboptimal (we flush too many times). + * Default value is false. (since 8.1) + */ + bool multifd_flush_after_each_section; + /* * This decides the size of guest memory chunk that will be used * to track dirty bitmap clearing. The size of memory chunk will * be GUEST_PAGE_SIZE << N. Say, N=0 means we will clear dirty diff --git a/migration/multifd.c b/migration/multifd.c index cce3ad6988..6a59c03dd2 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -635,6 +635,7 @@ int multifd_send_sync_main(QEMUFile *f) for (i = 0; i < migrate_multifd_channels(); i++) { MultiFDSendParams *p = &multifd_send_state->params[i]; + qemu_sem_wait(&multifd_send_state->channels_ready); trace_multifd_send_sync_main_wait(p->id); qemu_sem_wait(&p->sem_sync); @@ -668,6 +669,7 @@ static void *multifd_send_thread(void *opaque) p->num_packets = 1; while (true) { + qemu_sem_post(&multifd_send_state->channels_ready); qemu_sem_wait(&p->sem); if (qatomic_read(&multifd_send_state->exiting)) { @@ -736,7 +738,6 @@ static void *multifd_send_thread(void *opaque) if (flags & MULTIFD_FLAG_SYNC) { qemu_sem_post(&p->sem_sync); } - qemu_sem_post(&multifd_send_state->channels_ready); } else if (p->quit) { qemu_mutex_unlock(&p->mutex); break; diff --git a/migration/options.c b/migration/options.c index c6030587cf..53b7fc5d5d 100644 --- a/migration/options.c +++ b/migration/options.c @@ -31,29 +31,180 @@ #define MAX_MIGRATE_DOWNTIME_SECONDS 2000 #define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000) +#define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ + +/* Time in milliseconds we are allowed to stop the source, + * for sending the last part */ +#define DEFAULT_MIGRATE_SET_DOWNTIME 300 + +/* Default compression thread count */ +#define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 +/* Default decompression thread count, usually decompression is at + * least 4 times as fast as compression.*/ +#define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2 +/*0: means nocompress, 1: best speed, ... 9: best compress ratio */ +#define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 +/* Define default autoconverge cpu throttle migration parameters */ +#define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50 +#define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20 +#define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10 +#define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99 + +/* Migration XBZRLE default cache size */ +#define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024) + +/* The delay time (in ms) between two COLO checkpoints */ +#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100) +#define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 +#define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE +/* 0: means nocompress, 1: best speed, ... 9: best compress ratio */ +#define DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL 1 +/* 0: means nocompress, 1: best speed, ... 20: best compress ratio */ +#define DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL 1 + +/* Background transfer rate for postcopy, 0 means unlimited, note + * that page requests can still exceed this limit. + */ +#define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0 + +/* + * Parameters for self_announce_delay giving a stream of RARP/ARP + * packets after migration. + */ +#define DEFAULT_MIGRATE_ANNOUNCE_INITIAL 50 +#define DEFAULT_MIGRATE_ANNOUNCE_MAX 550 +#define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS 5 +#define DEFAULT_MIGRATE_ANNOUNCE_STEP 100 + +#define DEFINE_PROP_MIG_CAP(name, x) \ + DEFINE_PROP_BOOL(name, MigrationState, capabilities[x], false) + +Property migration_properties[] = { + DEFINE_PROP_BOOL("store-global-state", MigrationState, + store_global_state, true), + DEFINE_PROP_BOOL("send-configuration", MigrationState, + send_configuration, true), + DEFINE_PROP_BOOL("send-section-footer", MigrationState, + send_section_footer, true), + DEFINE_PROP_BOOL("decompress-error-check", MigrationState, + decompress_error_check, true), + DEFINE_PROP_BOOL("multifd-flush-after-each-section", MigrationState, + multifd_flush_after_each_section, false), + DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, + clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), + DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState, + preempt_pre_7_2, false), + + /* Migration parameters */ + DEFINE_PROP_UINT8("x-compress-level", MigrationState, + parameters.compress_level, + DEFAULT_MIGRATE_COMPRESS_LEVEL), + DEFINE_PROP_UINT8("x-compress-threads", MigrationState, + parameters.compress_threads, + DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT), + DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState, + parameters.compress_wait_thread, true), + DEFINE_PROP_UINT8("x-decompress-threads", MigrationState, + parameters.decompress_threads, + DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT), + DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState, + parameters.throttle_trigger_threshold, + DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD), + DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState, + parameters.cpu_throttle_initial, + DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL), + DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState, + parameters.cpu_throttle_increment, + DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT), + DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState, + parameters.cpu_throttle_tailslow, false), + DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState, + parameters.max_bandwidth, MAX_THROTTLE), + DEFINE_PROP_UINT64("x-downtime-limit", MigrationState, + parameters.downtime_limit, + DEFAULT_MIGRATE_SET_DOWNTIME), + DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState, + parameters.x_checkpoint_delay, + DEFAULT_MIGRATE_X_CHECKPOINT_DELAY), + DEFINE_PROP_UINT8("multifd-channels", MigrationState, + parameters.multifd_channels, + DEFAULT_MIGRATE_MULTIFD_CHANNELS), + DEFINE_PROP_MULTIFD_COMPRESSION("multifd-compression", MigrationState, + parameters.multifd_compression, + DEFAULT_MIGRATE_MULTIFD_COMPRESSION), + DEFINE_PROP_UINT8("multifd-zlib-level", MigrationState, + parameters.multifd_zlib_level, + DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL), + DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, + parameters.multifd_zstd_level, + DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), + DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, + parameters.xbzrle_cache_size, + DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), + DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState, + parameters.max_postcopy_bandwidth, + DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH), + DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState, + parameters.max_cpu_throttle, + DEFAULT_MIGRATE_MAX_CPU_THROTTLE), + DEFINE_PROP_SIZE("announce-initial", MigrationState, + parameters.announce_initial, + DEFAULT_MIGRATE_ANNOUNCE_INITIAL), + DEFINE_PROP_SIZE("announce-max", MigrationState, + parameters.announce_max, + DEFAULT_MIGRATE_ANNOUNCE_MAX), + DEFINE_PROP_SIZE("announce-rounds", MigrationState, + parameters.announce_rounds, + DEFAULT_MIGRATE_ANNOUNCE_ROUNDS), + DEFINE_PROP_SIZE("announce-step", MigrationState, + parameters.announce_step, + DEFAULT_MIGRATE_ANNOUNCE_STEP), + DEFINE_PROP_STRING("tls-creds", MigrationState, parameters.tls_creds), + DEFINE_PROP_STRING("tls-hostname", MigrationState, parameters.tls_hostname), + DEFINE_PROP_STRING("tls-authz", MigrationState, parameters.tls_authz), + + /* Migration capabilities */ + DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), + DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL), + DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE), + DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS), + DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS), + DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS), + DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM), + DEFINE_PROP_MIG_CAP("x-postcopy-preempt", + MIGRATION_CAPABILITY_POSTCOPY_PREEMPT), + DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO), + DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM), + DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK), + DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH), + DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), + DEFINE_PROP_MIG_CAP("x-background-snapshot", + MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT), +#ifdef CONFIG_LINUX + DEFINE_PROP_MIG_CAP("x-zero-copy-send", + MIGRATION_CAPABILITY_ZERO_COPY_SEND), +#endif + + DEFINE_PROP_END_OF_LIST(), +}; + bool migrate_auto_converge(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; } bool migrate_background_snapshot(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; } bool migrate_block(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->capabilities[MIGRATION_CAPABILITY_BLOCK]; } @@ -61,95 +212,76 @@ bool migrate_block(void) bool migrate_colo(void) { MigrationState *s = migrate_get_current(); + return s->capabilities[MIGRATION_CAPABILITY_X_COLO]; } bool migrate_compress(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->capabilities[MIGRATION_CAPABILITY_COMPRESS]; } bool migrate_dirty_bitmaps(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; } bool migrate_events(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->capabilities[MIGRATION_CAPABILITY_EVENTS]; } bool migrate_ignore_shared(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; } bool migrate_late_block_activate(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->capabilities[MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; } bool migrate_multifd(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; } bool migrate_pause_before_switchover(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->capabilities[MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; } bool migrate_postcopy_blocktime(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; } bool migrate_postcopy_preempt(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]; } bool migrate_postcopy_ram(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; } @@ -163,60 +295,55 @@ bool migrate_rdma_pin_all(void) bool migrate_release_ram(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; } bool migrate_return_path(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; } bool migrate_validate_uuid(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; } bool migrate_xbzrle(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->capabilities[MIGRATION_CAPABILITY_XBZRLE]; } bool migrate_zero_blocks(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; } bool migrate_zero_copy_send(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; } /* pseudo capabilities */ +bool migrate_multifd_flush_after_each_section(void) +{ + MigrationState *s = migrate_get_current(); + + return s->multifd_flush_after_each_section; +} + bool migrate_postcopy(void) { return migrate_postcopy_ram() || migrate_dirty_bitmaps(); @@ -224,9 +351,7 @@ bool migrate_postcopy(void) bool migrate_tls(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->parameters.tls_creds && *s->parameters.tls_creds; } @@ -494,128 +619,114 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, /* parameters */ -bool migrate_block_incremental(void) +const BitmapMigrationNodeAliasList *migrate_block_bitmap_mapping(void) { - MigrationState *s; + MigrationState *s = migrate_get_current(); - s = migrate_get_current(); + return s->parameters.block_bitmap_mapping; +} + +bool migrate_block_incremental(void) +{ + MigrationState *s = migrate_get_current(); return s->parameters.block_incremental; } uint32_t migrate_checkpoint_delay(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->parameters.x_checkpoint_delay; } int migrate_compress_level(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->parameters.compress_level; } int migrate_compress_threads(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->parameters.compress_threads; } int migrate_compress_wait_thread(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->parameters.compress_wait_thread; } uint8_t migrate_cpu_throttle_increment(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->parameters.cpu_throttle_increment; } uint8_t migrate_cpu_throttle_initial(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->parameters.cpu_throttle_initial; } bool migrate_cpu_throttle_tailslow(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->parameters.cpu_throttle_tailslow; } int migrate_decompress_threads(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->parameters.decompress_threads; } -uint8_t migrate_max_cpu_throttle(void) +uint64_t migrate_downtime_limit(void) { - MigrationState *s; + MigrationState *s = migrate_get_current(); - s = migrate_get_current(); + return s->parameters.downtime_limit; +} + +uint8_t migrate_max_cpu_throttle(void) +{ + MigrationState *s = migrate_get_current(); return s->parameters.max_cpu_throttle; } uint64_t migrate_max_bandwidth(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->parameters.max_bandwidth; } int64_t migrate_max_postcopy_bandwidth(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->parameters.max_postcopy_bandwidth; } int migrate_multifd_channels(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->parameters.multifd_channels; } MultiFDCompression migrate_multifd_compression(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); assert(s->parameters.multifd_compression < MULTIFD_COMPRESSION__MAX); return s->parameters.multifd_compression; @@ -623,42 +734,76 @@ MultiFDCompression migrate_multifd_compression(void) int migrate_multifd_zlib_level(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->parameters.multifd_zlib_level; } int migrate_multifd_zstd_level(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->parameters.multifd_zstd_level; } uint8_t migrate_throttle_trigger_threshold(void) { - MigrationState *s; - - s = migrate_get_current(); + MigrationState *s = migrate_get_current(); return s->parameters.throttle_trigger_threshold; } -uint64_t migrate_xbzrle_cache_size(void) +const char *migrate_tls_authz(void) +{ + MigrationState *s = migrate_get_current(); + + return s->parameters.tls_authz; +} + +const char *migrate_tls_creds(void) { - MigrationState *s; + MigrationState *s = migrate_get_current(); - s = migrate_get_current(); + return s->parameters.tls_creds; +} + +const char *migrate_tls_hostname(void) +{ + MigrationState *s = migrate_get_current(); + + return s->parameters.tls_hostname; +} + +uint64_t migrate_xbzrle_cache_size(void) +{ + MigrationState *s = migrate_get_current(); return s->parameters.xbzrle_cache_size; } +/* parameter setters */ + +void migrate_set_block_incremental(bool value) +{ + MigrationState *s = migrate_get_current(); + + s->parameters.block_incremental = value; +} + /* parameters helpers */ +void block_cleanup_parameters(void) +{ + MigrationState *s = migrate_get_current(); + + if (s->must_remove_block_options) { + /* setting to false can never fail */ + migrate_cap_set(MIGRATION_CAPABILITY_BLOCK, false, &error_abort); + migrate_set_block_incremental(false); + s->must_remove_block_options = false; + } +} + AnnounceParameters *migrate_announce_params(void) { static AnnounceParameters ap; @@ -741,6 +886,37 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) return params; } +void migrate_params_init(MigrationParameters *params) +{ + params->tls_hostname = g_strdup(""); + params->tls_creds = g_strdup(""); + + /* Set has_* up only for parameter checks */ + params->has_compress_level = true; + params->has_compress_threads = true; + params->has_compress_wait_thread = true; + params->has_decompress_threads = true; + params->has_throttle_trigger_threshold = true; + params->has_cpu_throttle_initial = true; + params->has_cpu_throttle_increment = true; + params->has_cpu_throttle_tailslow = true; + params->has_max_bandwidth = true; + params->has_downtime_limit = true; + params->has_x_checkpoint_delay = true; + params->has_block_incremental = true; + params->has_multifd_channels = true; + params->has_multifd_compression = true; + params->has_multifd_zlib_level = true; + params->has_multifd_zstd_level = true; + params->has_xbzrle_cache_size = true; + params->has_max_postcopy_bandwidth = true; + params->has_max_cpu_throttle = true; + params->has_announce_initial = true; + params->has_announce_max = true; + params->has_announce_rounds = true; + params->has_announce_step = true; +} + /* * Check whether the parameters are valid. Error will be put into errp * (if provided). Return true if valid, otherwise false. diff --git a/migration/options.h b/migration/options.h index 89067e59a0..3c322867cd 100644 --- a/migration/options.h +++ b/migration/options.h @@ -14,6 +14,9 @@ #ifndef QEMU_MIGRATION_OPTIONS_H #define QEMU_MIGRATION_OPTIONS_H +#include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" + /* constants */ /* Amount of time to allocate to each "chunk" of bandwidth-throttled @@ -21,6 +24,10 @@ #define BUFFER_DELAY 100 #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY) +/* migration properties */ + +extern Property migration_properties[]; + /* capabilities */ bool migrate_auto_converge(void); @@ -52,6 +59,7 @@ bool migrate_zero_copy_send(void); * check, but they are not a capability. */ +bool migrate_multifd_flush_after_each_section(void); bool migrate_postcopy(void); bool migrate_tls(void); @@ -62,6 +70,7 @@ bool migrate_cap_set(int cap, bool value, Error **errp); /* parameters */ +const BitmapMigrationNodeAliasList *migrate_block_bitmap_mapping(void); bool migrate_block_incremental(void); uint32_t migrate_checkpoint_delay(void); int migrate_compress_level(void); @@ -71,6 +80,7 @@ uint8_t migrate_cpu_throttle_increment(void); uint8_t migrate_cpu_throttle_initial(void); bool migrate_cpu_throttle_tailslow(void); int migrate_decompress_threads(void); +uint64_t migrate_downtime_limit(void); uint8_t migrate_max_cpu_throttle(void); uint64_t migrate_max_bandwidth(void); int64_t migrate_max_postcopy_bandwidth(void); @@ -79,10 +89,19 @@ MultiFDCompression migrate_multifd_compression(void); int migrate_multifd_zlib_level(void); int migrate_multifd_zstd_level(void); uint8_t migrate_throttle_trigger_threshold(void); +const char *migrate_tls_authz(void); +const char *migrate_tls_creds(void); +const char *migrate_tls_hostname(void); uint64_t migrate_xbzrle_cache_size(void); +/* parameters setters */ + +void migrate_set_block_incremental(bool value); + /* parameters helpers */ bool migrate_params_check(MigrationParameters *params, Error **errp); +void migrate_params_init(MigrationParameters *params); +void block_cleanup_parameters(void); #endif diff --git a/migration/ram.c b/migration/ram.c index 01356f60a4..89be3e3320 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -86,6 +86,7 @@ #define RAM_SAVE_FLAG_XBZRLE 0x40 /* 0x80 is reserved in qemu-file.h for RAM_SAVE_FLAG_HOOK */ #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100 +#define RAM_SAVE_FLAG_MULTIFD_FLUSH 0x200 /* We can't use any flag that is bigger than 0x200 */ int (*xbzrle_encode_buffer_func)(uint8_t *, uint8_t *, int, @@ -1129,8 +1130,9 @@ static void migration_update_rates(RAMState *rs, int64_t end_time) double compressed_size; /* calculate period counters */ - ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000 - / (end_time - rs->time_last_bitmap_sync); + stat64_set(&ram_counters.dirty_pages_rate, + rs->num_dirty_pages_period * 1000 / + (end_time - rs->time_last_bitmap_sync)); if (!page_count) { return; @@ -1222,7 +1224,7 @@ static void migration_bitmap_sync(RAMState *rs) RAMBLOCK_FOREACH_NOT_IGNORED(block) { ramblock_sync_dirty_bitmap(rs, block); } - ram_counters.remaining = ram_bytes_remaining(); + stat64_set(&ram_counters.dirty_bytes_last_sync, ram_bytes_remaining()); } qemu_mutex_unlock(&rs->bitmap_mutex); @@ -1581,6 +1583,7 @@ retry: * associated with the search process. * * Returns: + * <0: An error happened * PAGE_ALL_CLEAN: no dirty page found, give up * PAGE_TRY_AGAIN: no dirty page found, retry for next block * PAGE_DIRTY_FOUND: dirty page found @@ -1608,6 +1611,15 @@ static int find_dirty_block(RAMState *rs, PageSearchStatus *pss) pss->page = 0; pss->block = QLIST_NEXT_RCU(pss->block, next); if (!pss->block) { + if (!migrate_multifd_flush_after_each_section()) { + QEMUFile *f = rs->pss[RAM_CHANNEL_PRECOPY].pss_channel; + int ret = multifd_send_sync_main(f); + if (ret < 0) { + return ret; + } + qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); + qemu_fflush(f); + } /* * If memory migration starts over, we will meet a dirtied page * which may still exists in compression threads's ring, so we @@ -2600,6 +2612,9 @@ static int ram_find_and_save_block(RAMState *rs) break; } else if (res == PAGE_TRY_AGAIN) { continue; + } else if (res < 0) { + pages = res; + break; } } } @@ -3286,6 +3301,10 @@ static int ram_save_setup(QEMUFile *f, void *opaque) return ret; } + if (!migrate_multifd_flush_after_each_section()) { + qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); + } + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); qemu_fflush(f); @@ -3394,9 +3413,11 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) out: if (ret >= 0 && migration_is_setup_or_active(migrate_get_current()->state)) { - ret = multifd_send_sync_main(rs->pss[RAM_CHANNEL_PRECOPY].pss_channel); - if (ret < 0) { - return ret; + if (migrate_multifd_flush_after_each_section()) { + ret = multifd_send_sync_main(rs->pss[RAM_CHANNEL_PRECOPY].pss_channel); + if (ret < 0) { + return ret; + } } qemu_put_be64(f, RAM_SAVE_FLAG_EOS); @@ -3469,6 +3490,9 @@ static int ram_save_complete(QEMUFile *f, void *opaque) return ret; } + if (!migrate_multifd_flush_after_each_section()) { + qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); + } qemu_put_be64(f, RAM_SAVE_FLAG_EOS); qemu_fflush(f); @@ -4150,10 +4174,14 @@ int ram_load_postcopy(QEMUFile *f, int channel) } decompress_data_with_multi_threads(f, page_buffer, len); break; - + case RAM_SAVE_FLAG_MULTIFD_FLUSH: + multifd_recv_sync_main(); + break; case RAM_SAVE_FLAG_EOS: /* normal exit */ - multifd_recv_sync_main(); + if (migrate_multifd_flush_after_each_section()) { + multifd_recv_sync_main(); + } break; default: error_report("Unknown combination of migration flags: 0x%x" @@ -4422,9 +4450,14 @@ static int ram_load_precopy(QEMUFile *f) break; } break; + case RAM_SAVE_FLAG_MULTIFD_FLUSH: + multifd_recv_sync_main(); + break; case RAM_SAVE_FLAG_EOS: /* normal exit */ - multifd_recv_sync_main(); + if (migrate_multifd_flush_after_each_section()) { + multifd_recv_sync_main(); + } break; default: if (flags & RAM_SAVE_FLAG_HOOK) { diff --git a/migration/ram.h b/migration/ram.h index a6e0d70226..04b05e1b2c 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -41,7 +41,8 @@ * one thread). */ typedef struct { - int64_t dirty_pages_rate; + Stat64 dirty_bytes_last_sync; + Stat64 dirty_pages_rate; Stat64 dirty_sync_count; Stat64 dirty_sync_missed_zero_copy; Stat64 downtime_bytes; @@ -51,7 +52,6 @@ typedef struct { Stat64 postcopy_bytes; Stat64 postcopy_requests; Stat64 precopy_bytes; - int64_t remaining; Stat64 transferred; } RAMStats; diff --git a/migration/tls.c b/migration/tls.c index acd38e0b62..cd29177957 100644 --- a/migration/tls.c +++ b/migration/tls.c @@ -34,20 +34,19 @@ migration_tls_get_creds(MigrationState *s, Error **errp) { Object *creds; + const char *tls_creds = migrate_tls_creds(); QCryptoTLSCreds *ret; - creds = object_resolve_path_component( - object_get_objects_root(), s->parameters.tls_creds); + creds = object_resolve_path_component(object_get_objects_root(), tls_creds); if (!creds) { - error_setg(errp, "No TLS credentials with id '%s'", - s->parameters.tls_creds); + error_setg(errp, "No TLS credentials with id '%s'", tls_creds); return NULL; } ret = (QCryptoTLSCreds *)object_dynamic_cast( creds, TYPE_QCRYPTO_TLS_CREDS); if (!ret) { error_setg(errp, "Object with id '%s' is not TLS credentials", - s->parameters.tls_creds); + tls_creds); return NULL; } if (!qcrypto_tls_creds_check_endpoint(ret, endpoint, errp)) { @@ -87,10 +86,7 @@ void migration_tls_channel_process_incoming(MigrationState *s, return; } - tioc = qio_channel_tls_new_server( - ioc, creds, - s->parameters.tls_authz, - errp); + tioc = qio_channel_tls_new_server(ioc, creds, migrate_tls_authz(), errp); if (!tioc) { return; } @@ -134,8 +130,9 @@ QIOChannelTLS *migration_tls_client_create(MigrationState *s, return NULL; } - if (s->parameters.tls_hostname && *s->parameters.tls_hostname) { - hostname = s->parameters.tls_hostname; + const char *tls_hostname = migrate_tls_hostname(); + if (tls_hostname && *tls_hostname) { + hostname = tls_hostname; } return qio_channel_tls_new_client(ioc, creds, hostname, errp); diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index d768171dcf..eeaec436eb 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -2865,6 +2865,14 @@ sub process { if ($line =~ /\bsignal\s*\(/ && !($line =~ /SIG_(?:IGN|DFL)/)) { ERROR("use sigaction to establish signal handlers; signal is not portable\n" . $herecurr); } +# recommend qemu_bh_new_guarded instead of qemu_bh_new + if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\bqemu_bh_new\s*\(/) { + ERROR("use qemu_bh_new_guarded() instead of qemu_bh_new() to avoid reentrancy problems\n" . $herecurr); + } +# recommend aio_bh_new_guarded instead of aio_bh_new + if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\baio_bh_new\s*\(/) { + ERROR("use aio_bh_new_guarded() instead of aio_bh_new() to avoid reentrancy problems\n" . $herecurr); + } # check for module_init(), use category-specific init macros explicitly please if ($line =~ /^module_init\s*\(/) { ERROR("please use block_init(), type_init() etc. instead of module_init()\n" . $herecurr); diff --git a/softmmu/memory.c b/softmmu/memory.c index b1a6cae6f5..b7b3386e9d 100644 --- a/softmmu/memory.c +++ b/softmmu/memory.c @@ -542,6 +542,18 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, access_size_max = 4; } + /* Do not allow more than one simultaneous access to a device's IO Regions */ + if (mr->dev && !mr->disable_reentrancy_guard && + !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) { + if (mr->dev->mem_reentrancy_guard.engaged_in_io) { + warn_report_once("Blocked re-entrant IO on MemoryRegion: " + "%s at addr: 0x%" HWADDR_PRIX, + memory_region_name(mr), addr); + return MEMTX_ACCESS_ERROR; + } + mr->dev->mem_reentrancy_guard.engaged_in_io = true; + } + /* FIXME: support unaligned access? */ access_size = MAX(MIN(size, access_size_max), access_size_min); access_mask = MAKE_64BIT_MASK(0, access_size * 8); @@ -556,6 +568,9 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, access_mask, attrs); } } + if (mr->dev) { + mr->dev->mem_reentrancy_guard.engaged_in_io = false; + } return r; } @@ -1170,6 +1185,7 @@ static void memory_region_do_init(MemoryRegion *mr, } mr->name = g_strdup(name); mr->owner = owner; + mr->dev = (DeviceState *) object_dynamic_cast(mr->owner, TYPE_DEVICE); mr->ram_block = NULL; if (name) { diff --git a/target/s390x/gdbstub.c b/target/s390x/gdbstub.c index 0cb69395b4..6fbfd41bc8 100644 --- a/target/s390x/gdbstub.c +++ b/target/s390x/gdbstub.c @@ -206,12 +206,8 @@ static int cpu_write_c_reg(CPUS390XState *env, uint8_t *mem_buf, int n) #define S390_VIRT_CPUTM_REGNUM 1 #define S390_VIRT_BEA_REGNUM 2 #define S390_VIRT_PREFIX_REGNUM 3 -#define S390_VIRT_PP_REGNUM 4 -#define S390_VIRT_PFT_REGNUM 5 -#define S390_VIRT_PFS_REGNUM 6 -#define S390_VIRT_PFC_REGNUM 7 /* total number of registers in s390-virt.xml */ -#define S390_NUM_VIRT_REGS 8 +#define S390_NUM_VIRT_REGS 4 static int cpu_read_virt_reg(CPUS390XState *env, GByteArray *mem_buf, int n) { @@ -224,14 +220,6 @@ static int cpu_read_virt_reg(CPUS390XState *env, GByteArray *mem_buf, int n) return gdb_get_regl(mem_buf, env->gbea); case S390_VIRT_PREFIX_REGNUM: return gdb_get_regl(mem_buf, env->psa); - case S390_VIRT_PP_REGNUM: - return gdb_get_regl(mem_buf, env->pp); - case S390_VIRT_PFT_REGNUM: - return gdb_get_regl(mem_buf, env->pfault_token); - case S390_VIRT_PFS_REGNUM: - return gdb_get_regl(mem_buf, env->pfault_select); - case S390_VIRT_PFC_REGNUM: - return gdb_get_regl(mem_buf, env->pfault_compare); default: return 0; } @@ -256,19 +244,51 @@ static int cpu_write_virt_reg(CPUS390XState *env, uint8_t *mem_buf, int n) env->psa = ldtul_p(mem_buf); cpu_synchronize_post_init(env_cpu(env)); return 8; - case S390_VIRT_PP_REGNUM: + default: + return 0; + } +} + +/* the values represent the positions in s390-virt-kvm.xml */ +#define S390_VIRT_KVM_PP_REGNUM 0 +#define S390_VIRT_KVM_PFT_REGNUM 1 +#define S390_VIRT_KVM_PFS_REGNUM 2 +#define S390_VIRT_KVM_PFC_REGNUM 3 +/* total number of registers in s390-virt-kvm.xml */ +#define S390_NUM_VIRT_KVM_REGS 4 + +static int cpu_read_virt_kvm_reg(CPUS390XState *env, GByteArray *mem_buf, int n) +{ + switch (n) { + case S390_VIRT_KVM_PP_REGNUM: + return gdb_get_regl(mem_buf, env->pp); + case S390_VIRT_KVM_PFT_REGNUM: + return gdb_get_regl(mem_buf, env->pfault_token); + case S390_VIRT_KVM_PFS_REGNUM: + return gdb_get_regl(mem_buf, env->pfault_select); + case S390_VIRT_KVM_PFC_REGNUM: + return gdb_get_regl(mem_buf, env->pfault_compare); + default: + return 0; + } +} + +static int cpu_write_virt_kvm_reg(CPUS390XState *env, uint8_t *mem_buf, int n) +{ + switch (n) { + case S390_VIRT_KVM_PP_REGNUM: env->pp = ldtul_p(mem_buf); cpu_synchronize_post_init(env_cpu(env)); return 8; - case S390_VIRT_PFT_REGNUM: + case S390_VIRT_KVM_PFT_REGNUM: env->pfault_token = ldtul_p(mem_buf); cpu_synchronize_post_init(env_cpu(env)); return 8; - case S390_VIRT_PFS_REGNUM: + case S390_VIRT_KVM_PFS_REGNUM: env->pfault_select = ldtul_p(mem_buf); cpu_synchronize_post_init(env_cpu(env)); return 8; - case S390_VIRT_PFC_REGNUM: + case S390_VIRT_KVM_PFC_REGNUM: env->pfault_compare = ldtul_p(mem_buf); cpu_synchronize_post_init(env_cpu(env)); return 8; @@ -321,10 +341,15 @@ void s390_cpu_gdb_init(CPUState *cs) cpu_write_c_reg, S390_NUM_C_REGS, "s390-cr.xml", 0); + gdb_register_coprocessor(cs, cpu_read_virt_reg, + cpu_write_virt_reg, + S390_NUM_VIRT_REGS, "s390-virt.xml", 0); + if (kvm_enabled()) { - gdb_register_coprocessor(cs, cpu_read_virt_reg, - cpu_write_virt_reg, - S390_NUM_VIRT_REGS, "s390-virt.xml", 0); + gdb_register_coprocessor(cs, cpu_read_virt_kvm_reg, + cpu_write_virt_kvm_reg, + S390_NUM_VIRT_KVM_REGS, "s390-virt-kvm.xml", + 0); } #endif } diff --git a/tests/qtest/vhost-user-test.c b/tests/qtest/vhost-user-test.c index bf9f7c4248..e4f95b2858 100644 --- a/tests/qtest/vhost-user-test.c +++ b/tests/qtest/vhost-user-test.c @@ -351,7 +351,7 @@ static void chr_read(void *opaque, const uint8_t *buf, int size) if (size != msg.size) { qos_printf("%s: Wrong message size received %d != %d\n", __func__, size, msg.size); - return; + goto out; } } @@ -509,6 +509,7 @@ static void chr_read(void *opaque, const uint8_t *buf, int size) break; } +out: g_mutex_unlock(&s->data_mutex); } diff --git a/tests/unit/ptimer-test-stubs.c b/tests/unit/ptimer-test-stubs.c index f2bfcede93..8c9407c560 100644 --- a/tests/unit/ptimer-test-stubs.c +++ b/tests/unit/ptimer-test-stubs.c @@ -107,7 +107,8 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask) return deadline; } -QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name) +QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name, + MemReentrancyGuard *reentrancy_guard) { QEMUBH *bh = g_new(QEMUBH, 1); diff --git a/util/async.c b/util/async.c index 21016a1ac7..a9b528c370 100644 --- a/util/async.c +++ b/util/async.c @@ -65,6 +65,7 @@ struct QEMUBH { void *opaque; QSLIST_ENTRY(QEMUBH) next; unsigned flags; + MemReentrancyGuard *reentrancy_guard; }; /* Called concurrently from any thread */ @@ -137,7 +138,7 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, } QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, - const char *name) + const char *name, MemReentrancyGuard *reentrancy_guard) { QEMUBH *bh; bh = g_new(QEMUBH, 1); @@ -146,13 +147,28 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, .cb = cb, .opaque = opaque, .name = name, + .reentrancy_guard = reentrancy_guard, }; return bh; } void aio_bh_call(QEMUBH *bh) { + bool last_engaged_in_io = false; + + if (bh->reentrancy_guard) { + last_engaged_in_io = bh->reentrancy_guard->engaged_in_io; + if (bh->reentrancy_guard->engaged_in_io) { + trace_reentrant_aio(bh->ctx, bh->name); + } + bh->reentrancy_guard->engaged_in_io = true; + } + bh->cb(bh->opaque); + + if (bh->reentrancy_guard) { + bh->reentrancy_guard->engaged_in_io = last_engaged_in_io; + } } /* Multiple occurrences of aio_bh_poll cannot be called concurrently. */ diff --git a/util/main-loop.c b/util/main-loop.c index e180c85145..7022f02ef8 100644 --- a/util/main-loop.c +++ b/util/main-loop.c @@ -605,9 +605,11 @@ void main_loop_wait(int nonblocking) /* Functions to operate on the main QEMU AioContext. */ -QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name) +QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name, + MemReentrancyGuard *reentrancy_guard) { - return aio_bh_new_full(qemu_aio_context, cb, opaque, name); + return aio_bh_new_full(qemu_aio_context, cb, opaque, name, + reentrancy_guard); } /* diff --git a/util/stats64.c b/util/stats64.c index 897613c949..09736014ec 100644 --- a/util/stats64.c +++ b/util/stats64.c @@ -57,6 +57,17 @@ uint64_t stat64_get(const Stat64 *s) return ((uint64_t)high << 32) | low; } +void stat64_set(Stat64 *s, uint64_t val) +{ + while (!stat64_wrtrylock(s)) { + cpu_relax(); + } + + qatomic_set(&s->high, val >> 32); + qatomic_set(&s->low, val); + stat64_wrunlock(s); +} + bool stat64_add32_carry(Stat64 *s, uint32_t low, uint32_t high) { uint32_t old; diff --git a/util/trace-events b/util/trace-events index 16f78d8fe5..3f7e766683 100644 --- a/util/trace-events +++ b/util/trace-events @@ -11,6 +11,7 @@ poll_remove(void *ctx, void *node, int fd) "ctx %p node %p fd %d" # async.c aio_co_schedule(void *ctx, void *co) "ctx %p co %p" aio_co_schedule_bh_cb(void *ctx, void *co) "ctx %p co %p" +reentrant_aio(void *ctx, const char *name) "ctx %p name %s" # thread-pool.c thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p" |