From 4e1871c450a14e38b09d4e312922eefd475c1c64 Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Mon, 4 Mar 2024 12:53:37 +0200 Subject: migration: Don't serialize devices in qemu_savevm_state_iterate() Commit 90697be8896c ("live migration: Serialize vmstate saving in stage 2") introduced device serialization in qemu_savevm_state_iterate(). The rationale behind it was to first complete migration of slower changing block devices and only then migrate the RAM, to avoid sending fast changing RAM pages over and over. This commit was added a long time ago, and while it was useful back then, it is not the case anymore: 1. Block migration is deprecated, see commit 66db46ca83b8 ("migration: Deprecate block migration"). 2. Today there are other iterative devices besides RAM and block, such as VFIO, which are registered for migration after RAM. With current serialization behavior, a fast changing device can block other devices from sending their data, which may prevent migration from converging in some cases. The issue described in item 2 was observed in several VFIO migration scenarios with switchover-ack capability enabled, where some workload on the VM prevented RAM from ever reaching a hard zero, thus blocking VFIO initial pre-copy data from being sent. Hence, destination could not ack switchover and migration could not converge. Fix that by not serializing iterative devices in qemu_savevm_state_iterate(). Note that this still doesn't fully prevent device starvation. As correctly pointed out by Peter [1], a fast changing device might constantly consume all allocated bandwidth and block the following devices. However, this scenario is more likely to happen only if max-bandwidth is low. [1] https://lore.kernel.org/qemu-devel/Zd6iw9dBhW6wKNxx@x1n/ Signed-off-by: Avihai Horon Reviewed-by: Fabiano Rosas Link: https://lore.kernel.org/r/20240304105339.20713-2-avihaih@nvidia.com Signed-off-by: Peter Xu --- migration/savevm.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'migration/savevm.c') diff --git a/migration/savevm.c b/migration/savevm.c index dc1fb9c0d3..e84b26e1c8 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1390,7 +1390,8 @@ int qemu_savevm_state_resume_prepare(MigrationState *s) int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy) { SaveStateEntry *se; - int ret = 1; + bool all_finished = true; + int ret; trace_savevm_state_iterate(); QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { @@ -1431,16 +1432,12 @@ int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy) "%d(%s): %d", se->section_id, se->idstr, ret); qemu_file_set_error(f, ret); - } - if (ret <= 0) { - /* Do not proceed to the next vmstate before this one reported - completion of the current stage. This serializes the migration - and reduces the probability that a faster changing state is - synchronized over and over again. */ - break; + return ret; + } else if (!ret) { + all_finished = false; } } - return ret; + return all_finished; } static bool should_send_vmdesc(void) -- cgit 1.4.1 From e6e08e83239a067449b9698874c7547164a38414 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Mon, 4 Mar 2024 13:28:27 +0100 Subject: migration: Do not call PRECOPY_NOTIFY_SETUP notifiers in case of error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When commit bd2270608fa0 ("migration/ram.c: add a notifier chain for precopy") added PRECOPY_NOTIFY_SETUP notifiers at the end of qemu_savevm_state_setup(), it didn't take into account a possible error in the loop calling vmstate_save() or .save_setup() handlers. Check ret value before calling the notifiers. Reviewed-by: Peter Xu Signed-off-by: Cédric Le Goater Link: https://lore.kernel.org/r/20240304122844.1888308-10-clg@redhat.com Signed-off-by: Peter Xu --- migration/savevm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'migration/savevm.c') diff --git a/migration/savevm.c b/migration/savevm.c index e84b26e1c8..76b57a9888 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1317,7 +1317,7 @@ void qemu_savevm_state_setup(QEMUFile *f) MigrationState *ms = migrate_get_current(); SaveStateEntry *se; Error *local_err = NULL; - int ret; + int ret = 0; json_writer_int64(ms->vmdesc, "page_size", qemu_target_page_size()); json_writer_start_array(ms->vmdesc, "devices"); @@ -1351,6 +1351,10 @@ void qemu_savevm_state_setup(QEMUFile *f) } } + if (ret) { + return; + } + if (precopy_notify(PRECOPY_NOTIFY_SETUP, &local_err)) { error_report_err(local_err); } -- cgit 1.4.1 From aeaafb1e59f81f5cc715e656dac23f3fe5db3faa Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Mon, 11 Mar 2024 10:48:51 -0700 Subject: migration: export migration_is_running Delete the MigrationState parameter from migration_is_running and move it to the public API in misc.h. Signed-off-by: Steve Sistare Link: https://lore.kernel.org/r/1710179338-294359-5-git-send-email-steven.sistare@oracle.com Signed-off-by: Peter Xu --- include/migration/misc.h | 1 + migration/migration.c | 10 ++++++---- migration/migration.h | 2 -- migration/options.c | 4 ++-- migration/savevm.c | 2 +- system/dirtylimit.c | 2 +- target/riscv/kvm/kvm-cpu.c | 4 ++-- 7 files changed, 13 insertions(+), 12 deletions(-) (limited to 'migration/savevm.c') diff --git a/include/migration/misc.h b/include/migration/misc.h index e1f1bf853e..7526977de6 100644 --- a/include/migration/misc.h +++ b/include/migration/misc.h @@ -106,6 +106,7 @@ int migration_call_notifiers(MigrationState *s, MigrationEventType type, bool migration_in_setup(MigrationState *); bool migration_has_finished(MigrationState *); bool migration_has_failed(MigrationState *); +bool migration_is_running(void); /* ...and after the device transmission */ /* True if incoming migration entered POSTCOPY_INCOMING_DISCARD */ bool migration_in_incoming_postcopy(void); diff --git a/migration/migration.c b/migration/migration.c index 17859cbaee..546ba86c63 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1103,9 +1103,11 @@ bool migration_is_setup_or_active(void) } } -bool migration_is_running(int state) +bool migration_is_running(void) { - switch (state) { + MigrationState *s = current_migration; + + switch (s->state) { case MIGRATION_STATUS_ACTIVE: case MIGRATION_STATUS_POSTCOPY_ACTIVE: case MIGRATION_STATUS_POSTCOPY_PAUSED: @@ -1477,7 +1479,7 @@ static void migrate_fd_cancel(MigrationState *s) do { old_state = s->state; - if (!migration_is_running(old_state)) { + if (!migration_is_running()) { break; } /* If the migration is paused, kick it out of the pause */ @@ -1962,7 +1964,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, return true; } - if (migration_is_running(s->state)) { + if (migration_is_running()) { error_setg(errp, QERR_MIGRATION_ACTIVE); return false; } diff --git a/migration/migration.h b/migration/migration.h index 736460aa8b..e4983db9c9 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -479,8 +479,6 @@ bool migrate_has_error(MigrationState *s); void migrate_fd_connect(MigrationState *s, Error *error_in); -bool migration_is_running(int state); - int migrate_init(MigrationState *s, Error **errp); bool migration_is_blocked(Error **errp); /* True if outgoing migration has entered postcopy phase */ diff --git a/migration/options.c b/migration/options.c index 40eb930940..642cfb00a3 100644 --- a/migration/options.c +++ b/migration/options.c @@ -681,7 +681,7 @@ bool migrate_cap_set(int cap, bool value, Error **errp) MigrationState *s = migrate_get_current(); bool new_caps[MIGRATION_CAPABILITY__MAX]; - if (migration_is_running(s->state)) { + if (migration_is_running()) { error_setg(errp, QERR_MIGRATION_ACTIVE); return false; } @@ -725,7 +725,7 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, MigrationCapabilityStatusList *cap; bool new_caps[MIGRATION_CAPABILITY__MAX]; - if (migration_is_running(s->state) || migration_in_colo_state()) { + if (migration_is_running() || migration_in_colo_state()) { error_setg(errp, QERR_MIGRATION_ACTIVE); return; } diff --git a/migration/savevm.c b/migration/savevm.c index 76b57a9888..388d7af7cd 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1706,7 +1706,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) MigrationState *ms = migrate_get_current(); MigrationStatus status; - if (migration_is_running(ms->state)) { + if (migration_is_running()) { error_setg(errp, QERR_MIGRATION_ACTIVE); return -EINVAL; } diff --git a/system/dirtylimit.c b/system/dirtylimit.c index 051e0311c1..1622bb7426 100644 --- a/system/dirtylimit.c +++ b/system/dirtylimit.c @@ -451,7 +451,7 @@ static bool dirtylimit_is_allowed(void) { MigrationState *ms = migrate_get_current(); - if (migration_is_running(ms->state) && + if (migration_is_running() && (!qemu_thread_is_self(&ms->thread)) && migrate_dirty_limit() && dirtylimit_in_service()) { diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index c7afdb1e81..cda7d78a77 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -44,7 +44,7 @@ #include "kvm_riscv.h" #include "sbi_ecall_interface.h" #include "chardev/char-fe.h" -#include "migration/migration.h" +#include "migration/misc.h" #include "sysemu/runstate.h" #include "hw/riscv/numa.h" @@ -729,7 +729,7 @@ static void kvm_riscv_put_regs_timer(CPUState *cs) * frequency. Therefore, we should check whether they are the same here * during the migration. */ - if (migration_is_running(migrate_get_current()->state)) { + if (migration_is_running()) { KVM_RISCV_GET_TIMER(cs, frequency, reg); if (reg != env->kvm_timer_frequency) { error_report("Dst Hosts timer frequency != Src Hosts"); -- cgit 1.4.1