From 76936bbc313190f8e4ea5e33f793fd00bf49b3f0 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Wed, 20 Mar 2024 07:49:00 +0100 Subject: migration: Always report an error in ram_save_setup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will prepare ground for future changes adding an Error** argument to the save_setup() handler. We need to make sure that on failure, ram_save_setup() sets a new error. Reviewed-by: Fabiano Rosas Signed-off-by: Cédric Le Goater Link: https://lore.kernel.org/r/20240320064911.545001-5-clg@redhat.com Signed-off-by: Peter Xu --- migration/ram.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'migration/ram.c') diff --git a/migration/ram.c b/migration/ram.c index 8deb84984f..44d7073730 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -3074,12 +3074,14 @@ static int ram_save_setup(QEMUFile *f, void *opaque) int ret, max_hg_page_size; if (compress_threads_save_setup()) { + error_report("%s: failed to start compress threads", __func__); return -1; } /* migration has already setup the bitmap, reuse it. */ if (!migration_in_colo_state()) { if (ram_init_all(rsp) != 0) { + error_report("%s: failed to setup RAM for migration", __func__); compress_threads_save_cleanup(); return -1; } @@ -3116,12 +3118,14 @@ static int ram_save_setup(QEMUFile *f, void *opaque) ret = rdma_registration_start(f, RAM_CONTROL_SETUP); if (ret < 0) { + error_report("%s: failed to start RDMA registration", __func__); qemu_file_set_error(f, ret); return ret; } ret = rdma_registration_stop(f, RAM_CONTROL_SETUP); if (ret < 0) { + error_report("%s: failed to stop RDMA registration", __func__); qemu_file_set_error(f, ret); return ret; } @@ -3138,6 +3142,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) ret = multifd_send_sync_main(); bql_lock(); if (ret < 0) { + error_report("%s: multifd synchronization failed", __func__); return ret; } @@ -3147,7 +3152,11 @@ static int ram_save_setup(QEMUFile *f, void *opaque) } qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - return qemu_fflush(f); + ret = qemu_fflush(f); + if (ret < 0) { + error_report("%s failed : %s", __func__, strerror(-ret)); + } + return ret; } static void ram_save_file_bmap(QEMUFile *f) -- cgit 1.4.1 From 01c3ac681bd6709d2bf6a7d9591c40a394e39536 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Wed, 20 Mar 2024 07:49:03 +0100 Subject: migration: Add Error** argument to .save_setup() handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The purpose is to record a potential error in the migration stream if qemu_savevm_state_setup() fails. Most of the current .save_setup() handlers can be modified to use the Error argument instead of managing their own and calling locally error_report(). Cc: Nicholas Piggin Cc: Harsh Prateek Bora Cc: Halil Pasic Cc: Thomas Huth Cc: Eric Blake Cc: Vladimir Sementsov-Ogievskiy Cc: John Snow Cc: Stefan Hajnoczi Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Peter Xu Reviewed-by: Thomas Huth Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Cédric Le Goater Link: https://lore.kernel.org/r/20240320064911.545001-8-clg@redhat.com Signed-off-by: Peter Xu --- hw/ppc/spapr.c | 2 +- hw/s390x/s390-stattrib.c | 6 ++---- hw/vfio/migration.c | 17 ++++++++--------- include/migration/register.h | 3 ++- migration/block-dirty-bitmap.c | 4 +++- migration/block.c | 13 ++++--------- migration/ram.c | 15 ++++++++------- migration/savevm.c | 4 +--- 8 files changed, 29 insertions(+), 35 deletions(-) (limited to 'migration/ram.c') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index e9bc97fee0..823164e81c 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2172,7 +2172,7 @@ static const VMStateDescription vmstate_spapr = { } }; -static int htab_save_setup(QEMUFile *f, void *opaque) +static int htab_save_setup(QEMUFile *f, void *opaque, Error **errp) { SpaprMachineState *spapr = opaque; diff --git a/hw/s390x/s390-stattrib.c b/hw/s390x/s390-stattrib.c index b743e8a2fe..bc04187b2b 100644 --- a/hw/s390x/s390-stattrib.c +++ b/hw/s390x/s390-stattrib.c @@ -168,19 +168,17 @@ static int cmma_load(QEMUFile *f, void *opaque, int version_id) return ret; } -static int cmma_save_setup(QEMUFile *f, void *opaque) +static int cmma_save_setup(QEMUFile *f, void *opaque, Error **errp) { S390StAttribState *sas = S390_STATTRIB(opaque); S390StAttribClass *sac = S390_STATTRIB_GET_CLASS(sas); - Error *local_err = NULL; int res; /* * Signal that we want to start a migration, thus needing PGSTE dirty * tracking. */ - res = sac->set_migrationmode(sas, true, &local_err); + res = sac->set_migrationmode(sas, true, errp); if (res) { - error_report_err(local_err); return res; } qemu_put_be64(f, STATTR_FLAG_EOS); diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index bf5a29ddc1..5763c0b683 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -376,7 +376,7 @@ static int vfio_save_prepare(void *opaque, Error **errp) return 0; } -static int vfio_save_setup(QEMUFile *f, void *opaque) +static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp) { VFIODevice *vbasedev = opaque; VFIOMigration *migration = vbasedev->migration; @@ -390,8 +390,8 @@ static int vfio_save_setup(QEMUFile *f, void *opaque) stop_copy_size); migration->data_buffer = g_try_malloc0(migration->data_buffer_size); if (!migration->data_buffer) { - error_report("%s: Failed to allocate migration data buffer", - vbasedev->name); + error_setg(errp, "%s: Failed to allocate migration data buffer", + vbasedev->name); return -ENOMEM; } @@ -401,8 +401,8 @@ static int vfio_save_setup(QEMUFile *f, void *opaque) ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_PRE_COPY, VFIO_DEVICE_STATE_RUNNING); if (ret) { - error_report("%s: Failed to set new PRE_COPY state", - vbasedev->name); + error_setg(errp, "%s: Failed to set new PRE_COPY state", + vbasedev->name); return ret; } @@ -413,8 +413,8 @@ static int vfio_save_setup(QEMUFile *f, void *opaque) /* vfio_save_complete_precopy() will go to STOP_COPY */ break; default: - error_report("%s: Invalid device state %d", vbasedev->name, - migration->device_state); + error_setg(errp, "%s: Invalid device state %d", vbasedev->name, + migration->device_state); return -EINVAL; } } @@ -425,8 +425,7 @@ static int vfio_save_setup(QEMUFile *f, void *opaque) ret = qemu_file_get_error(f); if (ret < 0) { - error_report("%s: save setup failed : %s", vbasedev->name, - strerror(-ret)); + error_setg_errno(errp, -ret, "%s: save setup failed", vbasedev->name); } return ret; diff --git a/include/migration/register.h b/include/migration/register.h index d7b70a8be6..64fc7c1103 100644 --- a/include/migration/register.h +++ b/include/migration/register.h @@ -60,10 +60,11 @@ typedef struct SaveVMHandlers { * * @f: QEMUFile where to send the data * @opaque: data pointer passed to register_savevm_live() + * @errp: pointer to Error*, to store an error if it happens. * * Returns zero to indicate success and negative for error */ - int (*save_setup)(QEMUFile *f, void *opaque); + int (*save_setup)(QEMUFile *f, void *opaque, Error **errp); /** * @save_cleanup diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c index 2708abf3d7..542a8c297b 100644 --- a/migration/block-dirty-bitmap.c +++ b/migration/block-dirty-bitmap.c @@ -1213,12 +1213,14 @@ fail: return ret; } -static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque) +static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque, Error **errp) { DBMSaveState *s = &((DBMState *)opaque)->save; SaveBitmapState *dbms = NULL; if (init_dirty_bitmap_migration(s) < 0) { + error_setg(errp, + "Failed to initialize dirty tracking bitmap for blocks"); return -1; } diff --git a/migration/block.c b/migration/block.c index f8a11beb37..bae6e94891 100644 --- a/migration/block.c +++ b/migration/block.c @@ -711,10 +711,9 @@ static void block_migration_cleanup(void *opaque) blk_mig_unlock(); } -static int block_save_setup(QEMUFile *f, void *opaque) +static int block_save_setup(QEMUFile *f, void *opaque, Error **errp) { int ret; - Error *local_err = NULL; trace_migration_block_save("setup", block_mig_state.submitted, block_mig_state.transferred); @@ -722,25 +721,21 @@ static int block_save_setup(QEMUFile *f, void *opaque) warn_report("block migration is deprecated;" " use blockdev-mirror with NBD instead"); - ret = init_blk_migration(f, &local_err); + ret = init_blk_migration(f, errp); if (ret < 0) { - error_report_err(local_err); return ret; } /* start track dirty blocks */ ret = set_dirty_tracking(); if (ret) { - error_setg_errno(&local_err, -ret, - "Failed to start block dirty tracking"); - error_report_err(local_err); + error_setg_errno(errp, -ret, "Failed to start block dirty tracking"); return ret; } ret = flush_blks(f); if (ret) { - error_setg_errno(&local_err, -ret, "Flushing block failed"); - error_report_err(local_err); + error_setg_errno(errp, -ret, "Flushing block failed"); return ret; } blk_mig_reset_dirty_cursor(); diff --git a/migration/ram.c b/migration/ram.c index 44d7073730..6ea5a06e00 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -3066,22 +3066,23 @@ static bool mapped_ram_read_header(QEMUFile *file, MappedRamHeader *header, * * @f: QEMUFile where to send the data * @opaque: RAMState pointer + * @errp: pointer to Error*, to store an error if it happens. */ -static int ram_save_setup(QEMUFile *f, void *opaque) +static int ram_save_setup(QEMUFile *f, void *opaque, Error **errp) { RAMState **rsp = opaque; RAMBlock *block; int ret, max_hg_page_size; if (compress_threads_save_setup()) { - error_report("%s: failed to start compress threads", __func__); + error_setg(errp, "%s: failed to start compress threads", __func__); return -1; } /* migration has already setup the bitmap, reuse it. */ if (!migration_in_colo_state()) { if (ram_init_all(rsp) != 0) { - error_report("%s: failed to setup RAM for migration", __func__); + error_setg(errp, "%s: failed to setup RAM for migration", __func__); compress_threads_save_cleanup(); return -1; } @@ -3118,14 +3119,14 @@ static int ram_save_setup(QEMUFile *f, void *opaque) ret = rdma_registration_start(f, RAM_CONTROL_SETUP); if (ret < 0) { - error_report("%s: failed to start RDMA registration", __func__); + error_setg(errp, "%s: failed to start RDMA registration", __func__); qemu_file_set_error(f, ret); return ret; } ret = rdma_registration_stop(f, RAM_CONTROL_SETUP); if (ret < 0) { - error_report("%s: failed to stop RDMA registration", __func__); + error_setg(errp, "%s: failed to stop RDMA registration", __func__); qemu_file_set_error(f, ret); return ret; } @@ -3142,7 +3143,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) ret = multifd_send_sync_main(); bql_lock(); if (ret < 0) { - error_report("%s: multifd synchronization failed", __func__); + error_setg(errp, "%s: multifd synchronization failed", __func__); return ret; } @@ -3154,7 +3155,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) qemu_put_be64(f, RAM_SAVE_FLAG_EOS); ret = qemu_fflush(f); if (ret < 0) { - error_report("%s failed : %s", __func__, strerror(-ret)); + error_setg_errno(errp, -ret, "%s failed", __func__); } return ret; } diff --git a/migration/savevm.c b/migration/savevm.c index 327e9b346e..a2679ba0b8 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1342,11 +1342,9 @@ int qemu_savevm_state_setup(QEMUFile *f, Error **errp) } save_section_header(f, se, QEMU_VM_SECTION_START); - ret = se->ops->save_setup(f, se->opaque); + ret = se->ops->save_setup(f, se->opaque, errp); save_section_footer(f, se); if (ret < 0) { - error_setg(errp, "failed to setup SaveStateEntry with id(name): " - "%d(%s): %d", se->section_id, se->idstr, ret); qemu_file_set_error(f, ret); break; } -- cgit 1.4.1 From e4fa064d5610a96e50b49c1ea34c98ef12d0034a Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Wed, 20 Mar 2024 07:49:04 +0100 Subject: migration: Add Error** argument to .load_setup() handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will be useful to report errors at a higher level, mostly in VFIO today. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Peter Xu Signed-off-by: Cédric Le Goater Link: https://lore.kernel.org/r/20240320064911.545001-9-clg@redhat.com [peterx: drop comment for ERRP_GUARD, per Markus] Signed-off-by: Peter Xu --- hw/vfio/migration.c | 9 +++++++-- include/migration/register.h | 3 ++- migration/ram.c | 3 ++- migration/savevm.c | 11 +++++++---- 4 files changed, 18 insertions(+), 8 deletions(-) (limited to 'migration/ram.c') diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index 5763c0b683..06ae40969b 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -588,12 +588,17 @@ static void vfio_save_state(QEMUFile *f, void *opaque) } } -static int vfio_load_setup(QEMUFile *f, void *opaque) +static int vfio_load_setup(QEMUFile *f, void *opaque, Error **errp) { VFIODevice *vbasedev = opaque; + int ret; - return vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RESUMING, + ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RESUMING, vbasedev->migration->device_state); + if (ret) { + error_setg(errp, "%s: Failed to set RESUMING state", vbasedev->name); + } + return ret; } static int vfio_load_cleanup(void *opaque) diff --git a/include/migration/register.h b/include/migration/register.h index 64fc7c1103..f60e797894 100644 --- a/include/migration/register.h +++ b/include/migration/register.h @@ -234,10 +234,11 @@ typedef struct SaveVMHandlers { * * @f: QEMUFile where to receive the data * @opaque: data pointer passed to register_savevm_live() + * @errp: pointer to Error*, to store an error if it happens. * * Returns zero to indicate success and negative for error */ - int (*load_setup)(QEMUFile *f, void *opaque); + int (*load_setup)(QEMUFile *f, void *opaque, Error **errp); /** * @load_cleanup diff --git a/migration/ram.c b/migration/ram.c index 6ea5a06e00..4cd4f0158c 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -3704,8 +3704,9 @@ void colo_release_ram_cache(void) * * @f: QEMUFile where to receive the data * @opaque: RAMState pointer + * @errp: pointer to Error*, to store an error if it happens. */ -static int ram_load_setup(QEMUFile *f, void *opaque) +static int ram_load_setup(QEMUFile *f, void *opaque, Error **errp) { xbzrle_load_setup(); ramblock_recv_map_init(); diff --git a/migration/savevm.c b/migration/savevm.c index a2679ba0b8..5d200cf42a 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -2768,8 +2768,9 @@ static void qemu_loadvm_state_switchover_ack_needed(MigrationIncomingState *mis) trace_loadvm_state_switchover_ack_needed(mis->switchover_ack_pending_num); } -static int qemu_loadvm_state_setup(QEMUFile *f) +static int qemu_loadvm_state_setup(QEMUFile *f, Error **errp) { + ERRP_GUARD(); SaveStateEntry *se; int ret; @@ -2784,10 +2785,11 @@ static int qemu_loadvm_state_setup(QEMUFile *f) } } - ret = se->ops->load_setup(f, se->opaque); + ret = se->ops->load_setup(f, se->opaque, errp); if (ret < 0) { + error_prepend(errp, "Load state of device %s failed: ", + se->idstr); qemu_file_set_error(f, ret); - error_report("Load state of device %s failed", se->idstr); return ret; } } @@ -2968,7 +2970,8 @@ int qemu_loadvm_state(QEMUFile *f) return ret; } - if (qemu_loadvm_state_setup(f) != 0) { + if (qemu_loadvm_state_setup(f, &local_err) != 0) { + error_report_err(local_err); return -EINVAL; } -- cgit 1.4.1 From 92c20b2fc5cd3b423973a65aac945a605f93142e Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Wed, 20 Mar 2024 07:49:06 +0100 Subject: migration: Introduce ram_bitmaps_destroy() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We will use it in ram_init_bitmaps() to clear the allocated bitmaps when support for error reporting is added to memory_global_dirty_log_start(). Signed-off-by: Cédric Le Goater Reviewed-by: Peter Xu Reviewed-by: Fabiano Rosas Link: https://lore.kernel.org/r/20240320064911.545001-11-clg@redhat.com Signed-off-by: Peter Xu --- migration/ram.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'migration/ram.c') diff --git a/migration/ram.c b/migration/ram.c index 4cd4f0158c..f0bd71438a 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2438,10 +2438,23 @@ static void xbzrle_cleanup(void) XBZRLE_cache_unlock(); } +static void ram_bitmaps_destroy(void) +{ + RAMBlock *block; + + RAMBLOCK_FOREACH_NOT_IGNORED(block) { + g_free(block->clear_bmap); + block->clear_bmap = NULL; + g_free(block->bmap); + block->bmap = NULL; + g_free(block->file_bmap); + block->file_bmap = NULL; + } +} + static void ram_save_cleanup(void *opaque) { RAMState **rsp = opaque; - RAMBlock *block; /* We don't use dirty log with background snapshots */ if (!migrate_background_snapshot()) { @@ -2458,12 +2471,7 @@ static void ram_save_cleanup(void *opaque) } } - RAMBLOCK_FOREACH_NOT_IGNORED(block) { - g_free(block->clear_bmap); - block->clear_bmap = NULL; - g_free(block->bmap); - block->bmap = NULL; - } + ram_bitmaps_destroy(); xbzrle_cleanup(); compress_threads_save_cleanup(); -- cgit 1.4.1 From 639ec3fbf96c15b1568f52a50b9fa727cde3144b Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Wed, 20 Mar 2024 07:49:07 +0100 Subject: memory: Add Error** argument to the global_dirty_log routines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that the log_global*() handlers take an Error** parameter and return a bool, do the same for memory_global_dirty_log_start() and memory_global_dirty_log_stop(). The error is reported in the callers for now and it will be propagated in the call stack in the next changes. To be noted a functional change in ram_init_bitmaps(), if the dirty pages logger fails to start, there is no need to synchronize the dirty pages bitmaps. colo_incoming_start_dirty_log() could be modified in a similar way. Cc: Stefano Stabellini Cc: Anthony Perard Cc: Paul Durrant Cc: "Michael S. Tsirkin" Cc: Paolo Bonzini Cc: David Hildenbrand Cc: Hyman Huang Signed-off-by: Cédric Le Goater Reviewed-by: Fabiano Rosas Acked-by: Peter Xu Link: https://lore.kernel.org/r/20240320064911.545001-12-clg@redhat.com Signed-off-by: Peter Xu --- hw/i386/xen/xen-hvm.c | 2 +- include/exec/memory.h | 5 ++++- migration/dirtyrate.c | 13 +++++++++++-- migration/ram.c | 23 +++++++++++++++++++++-- system/memory.c | 11 +++++------ 5 files changed, 42 insertions(+), 12 deletions(-) (limited to 'migration/ram.c') diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c index f6e9a1bc86..006d219ad5 100644 --- a/hw/i386/xen/xen-hvm.c +++ b/hw/i386/xen/xen-hvm.c @@ -669,7 +669,7 @@ void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length) void qmp_xen_set_global_dirty_log(bool enable, Error **errp) { if (enable) { - memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION); + memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION, errp); } else { memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION); } diff --git a/include/exec/memory.h b/include/exec/memory.h index 5555567bc4..c129ee6db7 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -2570,8 +2570,11 @@ void memory_listener_unregister(MemoryListener *listener); * memory_global_dirty_log_start: begin dirty logging for all regions * * @flags: purpose of starting dirty log, migration or dirty rate + * @errp: pointer to Error*, to store an error if it happens. + * + * Return: true on success, else false setting @errp with error. */ -void memory_global_dirty_log_start(unsigned int flags); +bool memory_global_dirty_log_start(unsigned int flags, Error **errp); /** * memory_global_dirty_log_stop: end dirty logging for all regions diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c index 1d2e85746f..d02d70b7b4 100644 --- a/migration/dirtyrate.c +++ b/migration/dirtyrate.c @@ -90,9 +90,15 @@ static int64_t do_calculate_dirtyrate(DirtyPageRecord dirty_pages, void global_dirty_log_change(unsigned int flag, bool start) { + Error *local_err = NULL; + bool ret; + bql_lock(); if (start) { - memory_global_dirty_log_start(flag); + ret = memory_global_dirty_log_start(flag, &local_err); + if (!ret) { + error_report_err(local_err); + } } else { memory_global_dirty_log_stop(flag); } @@ -608,9 +614,12 @@ static void calculate_dirtyrate_dirty_bitmap(struct DirtyRateConfig config) { int64_t start_time; DirtyPageRecord dirty_pages; + Error *local_err = NULL; bql_lock(); - memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE); + if (!memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE, &local_err)) { + error_report_err(local_err); + } /* * 1'round of log sync may return all 1 bits with diff --git a/migration/ram.c b/migration/ram.c index f0bd71438a..bade3e9281 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2862,18 +2862,32 @@ static void migration_bitmap_clear_discarded_pages(RAMState *rs) static void ram_init_bitmaps(RAMState *rs) { + Error *local_err = NULL; + bool ret = true; + qemu_mutex_lock_ramlist(); WITH_RCU_READ_LOCK_GUARD() { ram_list_init_bitmaps(); /* We don't use dirty log with background snapshots */ if (!migrate_background_snapshot()) { - memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION); + ret = memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION, + &local_err); + if (!ret) { + error_report_err(local_err); + goto out_unlock; + } migration_bitmap_sync_precopy(rs, false); } } +out_unlock: qemu_mutex_unlock_ramlist(); + if (!ret) { + ram_bitmaps_destroy(); + return; + } + /* * After an eventual first bitmap sync, fixup the initial bitmap * containing all 1s to exclude any discarded pages from migration. @@ -3665,6 +3679,8 @@ int colo_init_ram_cache(void) void colo_incoming_start_dirty_log(void) { RAMBlock *block = NULL; + Error *local_err = NULL; + /* For memory_global_dirty_log_start below. */ bql_lock(); qemu_mutex_lock_ramlist(); @@ -3676,7 +3692,10 @@ void colo_incoming_start_dirty_log(void) /* Discard this dirty bitmap record */ bitmap_zero(block->bmap, block->max_length >> TARGET_PAGE_BITS); } - memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION); + if (!memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION, + &local_err)) { + error_report_err(local_err); + } } ram_state->migration_dirty_pages = 0; qemu_mutex_unlock_ramlist(); diff --git a/system/memory.c b/system/memory.c index 86d6c33180..c02c1d4bed 100644 --- a/system/memory.c +++ b/system/memory.c @@ -2937,10 +2937,9 @@ err: return false; } -void memory_global_dirty_log_start(unsigned int flags) +bool memory_global_dirty_log_start(unsigned int flags, Error **errp) { unsigned int old_flags; - Error *local_err = NULL; assert(flags && !(flags & (~GLOBAL_DIRTY_MASK))); @@ -2952,7 +2951,7 @@ void memory_global_dirty_log_start(unsigned int flags) flags &= ~global_dirty_tracking; if (!flags) { - return; + return true; } old_flags = global_dirty_tracking; @@ -2960,17 +2959,17 @@ void memory_global_dirty_log_start(unsigned int flags) trace_global_dirty_changed(global_dirty_tracking); if (!old_flags) { - if (!memory_global_dirty_log_do_start(&local_err)) { + if (!memory_global_dirty_log_do_start(errp)) { global_dirty_tracking &= ~flags; trace_global_dirty_changed(global_dirty_tracking); - error_report_err(local_err); - return; + return false; } memory_region_transaction_begin(); memory_region_update_pending = true; memory_region_transaction_commit(); } + return true; } static void memory_global_dirty_log_do_stop(unsigned int flags) -- cgit 1.4.1 From 16ecd25a4f324fd98cb974a0fd80390c7e136ea7 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Wed, 20 Mar 2024 07:49:08 +0100 Subject: migration: Add Error** argument to ram_state_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the return value not exploited, follow the recommendations of qapi/error.h and change it to a bool Signed-off-by: Cédric Le Goater Reviewed-by: Fabiano Rosas Link: https://lore.kernel.org/r/20240320064911.545001-13-clg@redhat.com Signed-off-by: Peter Xu --- migration/ram.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'migration/ram.c') diff --git a/migration/ram.c b/migration/ram.c index bade3e9281..26ce11a337 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2780,13 +2780,13 @@ err_out: return -ENOMEM; } -static int ram_state_init(RAMState **rsp) +static bool ram_state_init(RAMState **rsp, Error **errp) { *rsp = g_try_new0(RAMState, 1); if (!*rsp) { - error_report("%s: Init ramstate fail", __func__); - return -1; + error_setg(errp, "%s: Init ramstate fail", __func__); + return false; } qemu_mutex_init(&(*rsp)->bitmap_mutex); @@ -2802,7 +2802,7 @@ static int ram_state_init(RAMState **rsp) (*rsp)->migration_dirty_pages = (*rsp)->ram_bytes_total >> TARGET_PAGE_BITS; ram_state_reset(*rsp); - return 0; + return true; } static void ram_list_init_bitmaps(void) @@ -2897,7 +2897,10 @@ out_unlock: static int ram_init_all(RAMState **rsp) { - if (ram_state_init(rsp)) { + Error *local_err = NULL; + + if (!ram_state_init(rsp, &local_err)) { + error_report_err(local_err); return -1; } @@ -3624,7 +3627,11 @@ void ram_handle_zero(void *host, uint64_t size) static void colo_init_ram_state(void) { - ram_state_init(&ram_state); + Error *local_err = NULL; + + if (!ram_state_init(&ram_state, &local_err)) { + error_report_err(local_err); + } } /* -- cgit 1.4.1 From 7bee8ba8bbcef27cc98bc85747258e942f8d9717 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Wed, 20 Mar 2024 07:49:09 +0100 Subject: migration: Add Error** argument to xbzrle_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the return value (-ENOMEM) is not exploited, follow the recommendations of qapi/error.h and change it to a bool Signed-off-by: Cédric Le Goater Reviewed-by: Fabiano Rosas Link: https://lore.kernel.org/r/20240320064911.545001-14-clg@redhat.com Signed-off-by: Peter Xu --- migration/ram.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'migration/ram.c') diff --git a/migration/ram.c b/migration/ram.c index 26ce11a337..70797ef5d8 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2727,44 +2727,41 @@ int ram_discard_range(const char *rbname, uint64_t start, size_t length) * For every allocation, we will try not to crash the VM if the * allocation failed. */ -static int xbzrle_init(void) +static bool xbzrle_init(Error **errp) { - Error *local_err = NULL; - if (!migrate_xbzrle()) { - return 0; + return true; } XBZRLE_cache_lock(); XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE); if (!XBZRLE.zero_target_page) { - error_report("%s: Error allocating zero page", __func__); + error_setg(errp, "%s: Error allocating zero page", __func__); goto err_out; } XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(), - TARGET_PAGE_SIZE, &local_err); + TARGET_PAGE_SIZE, errp); if (!XBZRLE.cache) { - error_report_err(local_err); goto free_zero_page; } XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE); if (!XBZRLE.encoded_buf) { - error_report("%s: Error allocating encoded_buf", __func__); + error_setg(errp, "%s: Error allocating encoded_buf", __func__); goto free_cache; } XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE); if (!XBZRLE.current_buf) { - error_report("%s: Error allocating current_buf", __func__); + error_setg(errp, "%s: Error allocating current_buf", __func__); goto free_encoded_buf; } /* We are all good */ XBZRLE_cache_unlock(); - return 0; + return true; free_encoded_buf: g_free(XBZRLE.encoded_buf); @@ -2777,7 +2774,7 @@ free_zero_page: XBZRLE.zero_target_page = NULL; err_out: XBZRLE_cache_unlock(); - return -ENOMEM; + return false; } static bool ram_state_init(RAMState **rsp, Error **errp) @@ -2904,7 +2901,8 @@ static int ram_init_all(RAMState **rsp) return -1; } - if (xbzrle_init()) { + if (!xbzrle_init(&local_err)) { + error_report_err(local_err); ram_state_cleanup(rsp); return -1; } -- cgit 1.4.1 From 030b56b280375242cd8591b06e806978b8564be1 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Wed, 20 Mar 2024 07:49:10 +0100 Subject: migration: Modify ram_init_bitmaps() to report dirty tracking errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .save_setup() handler has now an Error** argument that we can use to propagate errors reported by the .log_global_start() handler. Do that for the RAM. The caller qemu_savevm_state_setup() will store the error under the migration stream for later detection in the migration sequence. Signed-off-by: Cédric Le Goater Reviewed-by: Fabiano Rosas Link: https://lore.kernel.org/r/20240320064911.545001-15-clg@redhat.com Signed-off-by: Peter Xu --- migration/ram.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) (limited to 'migration/ram.c') diff --git a/migration/ram.c b/migration/ram.c index 70797ef5d8..daffcd82d4 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2857,9 +2857,8 @@ static void migration_bitmap_clear_discarded_pages(RAMState *rs) } } -static void ram_init_bitmaps(RAMState *rs) +static bool ram_init_bitmaps(RAMState *rs, Error **errp) { - Error *local_err = NULL; bool ret = true; qemu_mutex_lock_ramlist(); @@ -2868,10 +2867,8 @@ static void ram_init_bitmaps(RAMState *rs) ram_list_init_bitmaps(); /* We don't use dirty log with background snapshots */ if (!migrate_background_snapshot()) { - ret = memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION, - &local_err); + ret = memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION, errp); if (!ret) { - error_report_err(local_err); goto out_unlock; } migration_bitmap_sync_precopy(rs, false); @@ -2882,7 +2879,7 @@ out_unlock: if (!ret) { ram_bitmaps_destroy(); - return; + return false; } /* @@ -2890,24 +2887,23 @@ out_unlock: * containing all 1s to exclude any discarded pages from migration. */ migration_bitmap_clear_discarded_pages(rs); + return true; } -static int ram_init_all(RAMState **rsp) +static int ram_init_all(RAMState **rsp, Error **errp) { - Error *local_err = NULL; - - if (!ram_state_init(rsp, &local_err)) { - error_report_err(local_err); + if (!ram_state_init(rsp, errp)) { return -1; } - if (!xbzrle_init(&local_err)) { - error_report_err(local_err); + if (!xbzrle_init(errp)) { ram_state_cleanup(rsp); return -1; } - ram_init_bitmaps(*rsp); + if (!ram_init_bitmaps(*rsp, errp)) { + return -1; + } return 0; } @@ -3104,8 +3100,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque, Error **errp) /* migration has already setup the bitmap, reuse it. */ if (!migration_in_colo_state()) { - if (ram_init_all(rsp) != 0) { - error_setg(errp, "%s: failed to setup RAM for migration", __func__); + if (ram_init_all(rsp, errp) != 0) { compress_threads_save_cleanup(); return -1; } -- cgit 1.4.1 From 5ef7e26bdb7eda10d6d5e1b77121be9945e5e550 Mon Sep 17 00:00:00 2001 From: Yuan Liu Date: Mon, 1 Apr 2024 23:41:10 +0800 Subject: migration/multifd: solve zero page causing multiple page faults Implemented recvbitmap tracking of received pages in multifd. If the zero page appears for the first time in the recvbitmap, this page is not checked and set. If the zero page has already appeared in the recvbitmap, there is no need to check the data but directly set the data to 0, because it is unlikely that the zero page will be migrated multiple times. Signed-off-by: Yuan Liu Reviewed-by: Fabiano Rosas Link: https://lore.kernel.org/r/20240401154110.2028453-2-yuan1.liu@intel.com [peterx: touch up the comment, as the bitmap is used outside postcopy now] Signed-off-by: Peter Xu --- include/exec/ramblock.h | 2 +- migration/multifd-zero-page.c | 4 +++- migration/multifd-zlib.c | 1 + migration/multifd-zstd.c | 1 + migration/multifd.c | 1 + migration/ram.c | 4 ++++ migration/ram.h | 1 + 7 files changed, 12 insertions(+), 2 deletions(-) (limited to 'migration/ram.c') diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h index 848915ea5b..7062da380b 100644 --- a/include/exec/ramblock.h +++ b/include/exec/ramblock.h @@ -57,7 +57,7 @@ struct RAMBlock { off_t bitmap_offset; uint64_t pages_offset; - /* bitmap of already received pages in postcopy */ + /* Bitmap of already received pages. Only used on destination side. */ unsigned long *receivedmap; /* diff --git a/migration/multifd-zero-page.c b/migration/multifd-zero-page.c index 1ba38be636..e1b8370f88 100644 --- a/migration/multifd-zero-page.c +++ b/migration/multifd-zero-page.c @@ -80,8 +80,10 @@ void multifd_recv_zero_page_process(MultiFDRecvParams *p) { for (int i = 0; i < p->zero_num; i++) { void *page = p->host + p->zero[i]; - if (!buffer_is_zero(page, p->page_size)) { + if (ramblock_recv_bitmap_test_byte_offset(p->block, p->zero[i])) { memset(page, 0, p->page_size); + } else { + ramblock_recv_bitmap_set_offset(p->block, p->zero[i]); } } } diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c index 99821cd4d5..737a9645d2 100644 --- a/migration/multifd-zlib.c +++ b/migration/multifd-zlib.c @@ -284,6 +284,7 @@ static int zlib_recv(MultiFDRecvParams *p, Error **errp) int flush = Z_NO_FLUSH; unsigned long start = zs->total_out; + ramblock_recv_bitmap_set_offset(p->block, p->normal[i]); if (i == p->normal_num - 1) { flush = Z_SYNC_FLUSH; } diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c index 02112255ad..256858df0a 100644 --- a/migration/multifd-zstd.c +++ b/migration/multifd-zstd.c @@ -278,6 +278,7 @@ static int zstd_recv(MultiFDRecvParams *p, Error **errp) z->in.pos = 0; for (i = 0; i < p->normal_num; i++) { + ramblock_recv_bitmap_set_offset(p->block, p->normal[i]); z->out.dst = p->host + p->normal[i]; z->out.size = p->page_size; z->out.pos = 0; diff --git a/migration/multifd.c b/migration/multifd.c index 2802afe79d..f317bff077 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -277,6 +277,7 @@ static int nocomp_recv(MultiFDRecvParams *p, Error **errp) for (int i = 0; i < p->normal_num; i++) { p->iov[i].iov_base = p->host + p->normal[i]; p->iov[i].iov_len = p->page_size; + ramblock_recv_bitmap_set_offset(p->block, p->normal[i]); } return qio_channel_readv_all(p->c, p->iov, p->normal_num, errp); } diff --git a/migration/ram.c b/migration/ram.c index daffcd82d4..a975c5af16 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -275,6 +275,10 @@ void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr, nr); } +void ramblock_recv_bitmap_set_offset(RAMBlock *rb, uint64_t byte_offset) +{ + set_bit_atomic(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap); +} #define RAMBLOCK_RECV_BITMAP_ENDING (0x0123456789abcdefULL) /* diff --git a/migration/ram.h b/migration/ram.h index 08feecaf51..bc0318b834 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -69,6 +69,7 @@ int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr); bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset); void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr); void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr, size_t nr); +void ramblock_recv_bitmap_set_offset(RAMBlock *rb, uint64_t byte_offset); int64_t ramblock_recv_bitmap_send(QEMUFile *file, const char *block_name); bool ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *rb, Error **errp); -- cgit 1.4.1