From 9d70239e56fadaa3571b8a7998a323ced52e8e76 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Mon, 17 Jun 2024 15:57:27 -0300 Subject: migration/multifd: Add direct-io support When multifd is used along with mapped-ram, we can take benefit of a filesystem that supports the O_DIRECT flag and perform direct I/O in the multifd threads. This brings a significant performance improvement because direct-io writes bypass the page cache which would otherwise be thrashed by the multifd data which is unlikely to be needed again in a short period of time. To be able to use a multifd channel opened with O_DIRECT, we must ensure that a certain aligment is used. Filesystems usually require a block-size alignment for direct I/O. The way to achieve this is by enabling the mapped-ram feature, which already aligns its I/O properly (see MAPPED_RAM_FILE_OFFSET_ALIGNMENT at ram.c). By setting O_DIRECT on the multifd channels, all writes to the same file descriptor need to be aligned as well, even the ones that come from outside multifd, such as the QEMUFile I/O from the main migration code. This makes it impossible to use the same file descriptor for the QEMUFile and for the multifd channels. The various flags and metadata written by the main migration code will always be unaligned by virtue of their small size. To workaround this issue, we'll require a second file descriptor to be used exclusively for direct I/O. The second file descriptor can be obtained by QEMU by re-opening the migration file (already possible), or by being provided by the user or management application (support to be added in future patches). Reviewed-by: Peter Xu Signed-off-by: Fabiano Rosas --- migration/migration.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'migration/migration.c') diff --git a/migration/migration.c b/migration/migration.c index e1b269624c..e03c80b3aa 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -155,6 +155,16 @@ static bool migration_needs_seekable_channel(void) return migrate_mapped_ram(); } +static bool migration_needs_extra_fds(void) +{ + /* + * When doing direct-io, multifd requires two different, + * non-duplicated file descriptors so we can use one of them for + * unaligned IO. + */ + return migrate_multifd() && migrate_direct_io(); +} + static bool transport_supports_seeking(MigrationAddress *addr) { if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE) { @@ -164,6 +174,12 @@ static bool transport_supports_seeking(MigrationAddress *addr) return false; } +static bool transport_supports_extra_fds(MigrationAddress *addr) +{ + /* file: works because QEMU can open it multiple times */ + return addr->transport == MIGRATION_ADDRESS_TYPE_FILE; +} + static bool migration_channels_and_transport_compatible(MigrationAddress *addr, Error **errp) @@ -180,6 +196,13 @@ migration_channels_and_transport_compatible(MigrationAddress *addr, return false; } + if (migration_needs_extra_fds() && + !transport_supports_extra_fds(addr)) { + error_setg(errp, + "Migration requires a transport that allows for extra fds (e.g. file)"); + return false; + } + return true; } -- cgit 1.4.1 From 60ce47675d74ddae3f13a32767d097d9fecbda4b Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 19 Jun 2024 18:30:37 -0400 Subject: migration: Rename thread debug names The postcopy thread names on dest QEMU are slightly confusing, partly I'll need to blame myself on 36f62f11e4 ("migration: Postcopy preemption preparation on channel creation"). E.g., "fault-fast" reads like a fast version of "fault-default", but it's actually the fast version of "postcopy/listen". Taking this chance, rename all the migration threads with proper rules. Considering we only have 15 chars usable, prefix all threads with "mig/", meanwhile identify src/dst threads properly this time. So now most thread names will look like "mig/DIR/xxx", where DIR will be "src"/"dst", except the bg-snapshot thread which doesn't have a direction. For multifd threads, making them "mig/{src|dst}/{send|recv}_%d". We used to have "live_migration" thread for a very long time, now it's called "mig/src/main". We may hope to have "mig/dst/main" soon but not yet. Reviewed-by: Fabiano Rosas Reviewed-by: Zhijian Li (Fujitsu) Signed-off-by: Peter Xu Signed-off-by: Fabiano Rosas --- migration/colo.c | 2 +- migration/migration.c | 6 +++--- migration/multifd.c | 6 +++--- migration/postcopy-ram.c | 4 ++-- migration/savevm.c | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) (limited to 'migration/migration.c') diff --git a/migration/colo.c b/migration/colo.c index f96c2ee069..6449490221 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -935,7 +935,7 @@ void coroutine_fn colo_incoming_co(void) assert(bql_locked()); assert(migration_incoming_colo_enabled()); - qemu_thread_create(&th, "COLO incoming", colo_process_incoming_thread, + qemu_thread_create(&th, "mig/dst/colo", colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE); mis->colo_incoming_co = qemu_coroutine_self(); diff --git a/migration/migration.c b/migration/migration.c index e03c80b3aa..f9b69af62f 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -2431,7 +2431,7 @@ static int open_return_path_on_source(MigrationState *ms) trace_open_return_path_on_source(); - qemu_thread_create(&ms->rp_state.rp_thread, "return path", + qemu_thread_create(&ms->rp_state.rp_thread, "mig/src/rp-thr", source_return_path_thread, ms, QEMU_THREAD_JOINABLE); ms->rp_state.rp_thread_created = true; @@ -3770,10 +3770,10 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) } if (migrate_background_snapshot()) { - qemu_thread_create(&s->thread, "bg_snapshot", + qemu_thread_create(&s->thread, "mig/snapshot", bg_migration_thread, s, QEMU_THREAD_JOINABLE); } else { - qemu_thread_create(&s->thread, "live_migration", + qemu_thread_create(&s->thread, "mig/src/main", migration_thread, s, QEMU_THREAD_JOINABLE); } s->migration_thread_running = true; diff --git a/migration/multifd.c b/migration/multifd.c index d82885fdbb..0b4cbaddfe 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -1069,7 +1069,7 @@ static bool multifd_tls_channel_connect(MultiFDSendParams *p, args->p = p; p->tls_thread_created = true; - qemu_thread_create(&p->tls_thread, "multifd-tls-handshake-worker", + qemu_thread_create(&p->tls_thread, "mig/src/tls", multifd_tls_handshake_thread, args, QEMU_THREAD_JOINABLE); return true; @@ -1190,7 +1190,7 @@ bool multifd_send_setup(void) p->packet->magic = cpu_to_be32(MULTIFD_MAGIC); p->packet->version = cpu_to_be32(MULTIFD_VERSION); } - p->name = g_strdup_printf("multifdsend_%d", i); + p->name = g_strdup_printf("mig/src/send_%d", i); p->page_size = qemu_target_page_size(); p->page_count = page_count; p->write_flags = 0; @@ -1604,7 +1604,7 @@ int multifd_recv_setup(Error **errp) + sizeof(uint64_t) * page_count; p->packet = g_malloc0(p->packet_len); } - p->name = g_strdup_printf("multifdrecv_%d", i); + p->name = g_strdup_printf("mig/dst/recv_%d", i); p->normal = g_new0(ram_addr_t, page_count); p->zero = g_new0(ram_addr_t, page_count); p->page_count = page_count; diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 3419779548..97701e6bb2 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -1238,7 +1238,7 @@ int postcopy_ram_incoming_setup(MigrationIncomingState *mis) return -1; } - postcopy_thread_create(mis, &mis->fault_thread, "fault-default", + postcopy_thread_create(mis, &mis->fault_thread, "mig/dst/fault", postcopy_ram_fault_thread, QEMU_THREAD_JOINABLE); mis->have_fault_thread = true; @@ -1258,7 +1258,7 @@ int postcopy_ram_incoming_setup(MigrationIncomingState *mis) * This thread needs to be created after the temp pages because * it'll fetch RAM_CHANNEL_POSTCOPY PostcopyTmpPage immediately. */ - postcopy_thread_create(mis, &mis->postcopy_prio_thread, "fault-fast", + postcopy_thread_create(mis, &mis->postcopy_prio_thread, "mig/dst/preempt", postcopy_preempt_thread, QEMU_THREAD_JOINABLE); mis->preempt_thread_status = PREEMPT_THREAD_CREATED; } diff --git a/migration/savevm.c b/migration/savevm.c index c621f2359b..e71410d8c1 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -2129,7 +2129,7 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis) } mis->have_listen_thread = true; - postcopy_thread_create(mis, &mis->listen_thread, "postcopy/listen", + postcopy_thread_create(mis, &mis->listen_thread, "mig/dst/listen", postcopy_ram_listen_thread, QEMU_THREAD_DETACHED); trace_loadvm_postcopy_handle_listen("return"); -- cgit 1.4.1 From a5c24e13e9f176901058b460e61425756322f3e8 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 19 Jun 2024 18:30:38 -0400 Subject: migration: Use MigrationStatus instead of int QEMU uses "int" in most cases even if it stores MigrationStatus. I don't know why, so let's try to do that right and see what blows up.. Reviewed-by: Fabiano Rosas Signed-off-by: Peter Xu Signed-off-by: Fabiano Rosas --- migration/migration.c | 24 +++++++----------------- migration/migration.h | 9 +++++---- 2 files changed, 12 insertions(+), 21 deletions(-) (limited to 'migration/migration.c') diff --git a/migration/migration.c b/migration/migration.c index f9b69af62f..795b30f0d0 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -413,7 +413,7 @@ void migration_incoming_state_destroy(void) yank_unregister_instance(MIGRATION_YANK_INSTANCE); } -static void migrate_generate_event(int new_state) +static void migrate_generate_event(MigrationStatus new_state) { if (migrate_events()) { qapi_event_send_migration(new_state); @@ -1296,8 +1296,6 @@ static void fill_destination_migration_info(MigrationInfo *info) } switch (mis->state) { - case MIGRATION_STATUS_NONE: - return; case MIGRATION_STATUS_SETUP: case MIGRATION_STATUS_CANCELLING: case MIGRATION_STATUS_CANCELLED: @@ -1313,6 +1311,8 @@ static void fill_destination_migration_info(MigrationInfo *info) info->has_status = true; fill_destination_postcopy_migration_info(info); break; + default: + return; } info->status = mis->state; @@ -1360,7 +1360,8 @@ void qmp_migrate_start_postcopy(Error **errp) /* shared migration helpers */ -void migrate_set_state(int *state, int old_state, int new_state) +void migrate_set_state(MigrationStatus *state, MigrationStatus old_state, + MigrationStatus new_state) { assert(new_state < MIGRATION_STATUS__MAX); if (qatomic_cmpxchg(state, old_state, new_state) == old_state) { @@ -1567,7 +1568,7 @@ bool migration_in_postcopy(void) } } -bool migration_postcopy_is_alive(int state) +bool migration_postcopy_is_alive(MigrationStatus state) { switch (state) { case MIGRATION_STATUS_POSTCOPY_ACTIVE: @@ -1612,20 +1613,9 @@ bool migration_is_idle(void) case MIGRATION_STATUS_COMPLETED: case MIGRATION_STATUS_FAILED: return true; - case MIGRATION_STATUS_SETUP: - case MIGRATION_STATUS_CANCELLING: - case MIGRATION_STATUS_ACTIVE: - case MIGRATION_STATUS_POSTCOPY_ACTIVE: - case MIGRATION_STATUS_COLO: - case MIGRATION_STATUS_PRE_SWITCHOVER: - case MIGRATION_STATUS_DEVICE: - case MIGRATION_STATUS_WAIT_UNPLUG: + default: return false; - case MIGRATION_STATUS__MAX: - g_assert_not_reached(); } - - return false; } bool migration_is_active(void) diff --git a/migration/migration.h b/migration/migration.h index 6af01362d4..38aa1402d5 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -160,7 +160,7 @@ struct MigrationIncomingState { /* PostCopyFD's for external userfaultfds & handlers of shared memory */ GArray *postcopy_remote_fds; - int state; + MigrationStatus state; /* * The incoming migration coroutine, non-NULL during qemu_loadvm_state(). @@ -301,7 +301,7 @@ struct MigrationState { /* params from 'migrate-set-parameters' */ MigrationParameters parameters; - int state; + MigrationStatus state; /* State related to return path */ struct { @@ -459,7 +459,8 @@ struct MigrationState { bool rdma_migration; }; -void migrate_set_state(int *state, int old_state, int new_state); +void migrate_set_state(MigrationStatus *state, MigrationStatus old_state, + MigrationStatus new_state); void migration_fd_process_incoming(QEMUFile *f); void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp); @@ -479,7 +480,7 @@ int migrate_init(MigrationState *s, Error **errp); bool migration_is_blocked(Error **errp); /* True if outgoing migration has entered postcopy phase */ bool migration_in_postcopy(void); -bool migration_postcopy_is_alive(int state); +bool migration_postcopy_is_alive(MigrationStatus state); MigrationState *migrate_get_current(void); bool migration_has_failed(MigrationState *); bool migrate_mode_is_cpr(MigrationState *); -- cgit 1.4.1 From 4dd5f7b8d568116b3ce594b0055a47c6db50f49c Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 19 Jun 2024 18:30:39 -0400 Subject: migration: Cleanup incoming migration setup state change Destination QEMU can setup incoming ports for two purposes: either a fresh new incoming migration, in which QEMU will switch to SETUP for channel establishment, or a paused postcopy migration, in which QEMU will stay in POSTCOPY_PAUSED until kicking off the RECOVER phase. Now the state machine worked on dest node for the latter, only because migrate_set_state() implicitly will become a noop if the current state check failed. It wasn't clear at all. Clean it up by providing a helper migration_incoming_state_setup() doing proper checks over current status. Postcopy-paused will be explicitly checked now, and then we can bail out for unknown states. Reviewed-by: Fabiano Rosas Signed-off-by: Peter Xu Signed-off-by: Fabiano Rosas --- migration/migration.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) (limited to 'migration/migration.c') diff --git a/migration/migration.c b/migration/migration.c index 795b30f0d0..41a88fc50a 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -618,6 +618,29 @@ bool migrate_uri_parse(const char *uri, MigrationChannel **channel, return true; } +static bool +migration_incoming_state_setup(MigrationIncomingState *mis, Error **errp) +{ + MigrationStatus current = mis->state; + + if (current == MIGRATION_STATUS_POSTCOPY_PAUSED) { + /* + * Incoming postcopy migration will stay in PAUSED state even if + * reconnection happened. + */ + return true; + } + + if (current != MIGRATION_STATUS_NONE) { + error_setg(errp, "Illegal migration incoming state: %s", + MigrationStatus_str(current)); + return false; + } + + migrate_set_state(&mis->state, current, MIGRATION_STATUS_SETUP); + return true; +} + static void qemu_start_incoming_migration(const char *uri, bool has_channels, MigrationChannelList *channels, Error **errp) @@ -656,8 +679,9 @@ static void qemu_start_incoming_migration(const char *uri, bool has_channels, return; } - migrate_set_state(&mis->state, MIGRATION_STATUS_NONE, - MIGRATION_STATUS_SETUP); + if (!migration_incoming_state_setup(mis, errp)) { + return; + } if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) { SocketAddress *saddr = &addr->u.socket; -- cgit 1.4.1 From 4146b77ec7640d3c30d42558e13423594b114385 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 19 Jun 2024 18:30:40 -0400 Subject: migration/postcopy: Add postcopy-recover-setup phase This patch adds a migration state on src called "postcopy-recover-setup". The new state will describe the intermediate step starting from when the src QEMU received a postcopy recovery request, until the migration channels are properly established, but before the recovery process take place. The request came from Libvirt where Libvirt currently rely on the migration state events to detect migration state changes. That works for most of the migration process but except postcopy recovery failures at the beginning. Currently postcopy recovery only has two major states: - postcopy-paused: this is the state that both sides of QEMU will be in for a long time as long as the migration channel was interrupted. - postcopy-recover: this is the state where both sides of QEMU handshake with each other, preparing for a continuation of postcopy which used to be interrupted. The issue here is when the recovery port is invalid, the src QEMU will take the URI/channels, noticing the ports are not valid, and it'll silently keep in the postcopy-paused state, with no event sent to Libvirt. In this case, the only thing Libvirt can do is to poll the migration status with a proper interval, however that's less optimal. Considering that this is the only case where Libvirt won't get a notification from QEMU on such events, let's add postcopy-recover-setup state to mimic what we have with the "setup" state of a newly initialized migration, describing the phase of connection establishment. With that, postcopy recovery will have two paths to go now, and either path will guarantee an event generated. Now the events will look like this during a recovery process on src QEMU: - Initially when the recovery is initiated on src, QEMU will go from "postcopy-paused" -> "postcopy-recover-setup". Old QEMUs don't have this event. - Depending on whether the channel re-establishment is succeeded: - In succeeded case, src QEMU will move from "postcopy-recover-setup" to "postcopy-recover". Old QEMUs also have this event. - In failure case, src QEMU will move from "postcopy-recover-setup" to "postcopy-paused" again. Old QEMUs don't have this event. This guarantees that Libvirt will always receive a notification for recovery process properly. One thing to mention is, such new status is only needed on src QEMU not both. On dest QEMU, the state machine doesn't change. Hence the events don't change either. It's done like so because dest QEMU may not have an explicit point of setup start. E.g., it can happen that when dest QEMUs doesn't use migrate-recover command to use a new URI/channel, but the old URI/channels can be reused in recovery, in which case the old ports simply can work again after the network routes are fixed up. Add a new helper postcopy_is_paused() detecting whether postcopy is still paused, taking RECOVER_SETUP into account too. When using it on both src/dst, a slight change is done altogether to always wait for the semaphore before checking the status, because for both sides a sem_post() will be required for a recovery. Cc: Jiri Denemark Cc: Prasad Pandit Reviewed-by: Fabiano Rosas Buglink: https://issues.redhat.com/browse/RHEL-38485 Signed-off-by: Peter Xu Signed-off-by: Fabiano Rosas --- migration/migration.c | 40 ++++++++++++++++++++++++++++++++++------ migration/postcopy-ram.c | 6 ++++++ migration/postcopy-ram.h | 3 +++ migration/savevm.c | 4 ++-- qapi/migration.json | 4 ++++ 5 files changed, 49 insertions(+), 8 deletions(-) (limited to 'migration/migration.c') diff --git a/migration/migration.c b/migration/migration.c index 41a88fc50a..3dea06d577 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1117,6 +1117,7 @@ bool migration_is_setup_or_active(void) case MIGRATION_STATUS_ACTIVE: case MIGRATION_STATUS_POSTCOPY_ACTIVE: case MIGRATION_STATUS_POSTCOPY_PAUSED: + case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP: case MIGRATION_STATUS_POSTCOPY_RECOVER: case MIGRATION_STATUS_SETUP: case MIGRATION_STATUS_PRE_SWITCHOVER: @@ -1139,6 +1140,7 @@ bool migration_is_running(void) case MIGRATION_STATUS_ACTIVE: case MIGRATION_STATUS_POSTCOPY_ACTIVE: case MIGRATION_STATUS_POSTCOPY_PAUSED: + case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP: case MIGRATION_STATUS_POSTCOPY_RECOVER: case MIGRATION_STATUS_SETUP: case MIGRATION_STATUS_PRE_SWITCHOVER: @@ -1276,6 +1278,7 @@ static void fill_source_migration_info(MigrationInfo *info) case MIGRATION_STATUS_PRE_SWITCHOVER: case MIGRATION_STATUS_DEVICE: case MIGRATION_STATUS_POSTCOPY_PAUSED: + case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP: case MIGRATION_STATUS_POSTCOPY_RECOVER: /* TODO add some postcopy stats */ populate_time_info(info, s); @@ -1482,9 +1485,30 @@ static void migrate_error_free(MigrationState *s) static void migrate_fd_error(MigrationState *s, const Error *error) { + MigrationStatus current = s->state; + MigrationStatus next; + assert(s->to_dst_file == NULL); - migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, - MIGRATION_STATUS_FAILED); + + switch (current) { + case MIGRATION_STATUS_SETUP: + next = MIGRATION_STATUS_FAILED; + break; + case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP: + /* Never fail a postcopy migration; switch back to PAUSED instead */ + next = MIGRATION_STATUS_POSTCOPY_PAUSED; + break; + default: + /* + * This really shouldn't happen. Just be careful to not crash a VM + * just for this. Instead, dump something. + */ + error_report("%s: Illegal migration status (%s) detected", + __func__, MigrationStatus_str(current)); + return; + } + + migrate_set_state(&s->state, current, next); migrate_set_error(s, error); } @@ -1585,6 +1609,7 @@ bool migration_in_postcopy(void) switch (s->state) { case MIGRATION_STATUS_POSTCOPY_ACTIVE: case MIGRATION_STATUS_POSTCOPY_PAUSED: + case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP: case MIGRATION_STATUS_POSTCOPY_RECOVER: return true; default: @@ -1972,6 +1997,9 @@ static bool migrate_prepare(MigrationState *s, bool resume, Error **errp) return false; } + migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED, + MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP); + /* This is a resume, skip init status */ return true; } @@ -3004,9 +3032,9 @@ static MigThrError postcopy_pause(MigrationState *s) * We wait until things fixed up. Then someone will setup the * status back for us. */ - while (s->state == MIGRATION_STATUS_POSTCOPY_PAUSED) { + do { qemu_sem_wait(&s->postcopy_pause_sem); - } + } while (postcopy_is_paused(s->state)); if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) { /* Woken up by a recover procedure. Give it a shot */ @@ -3702,7 +3730,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) { Error *local_err = NULL; uint64_t rate_limit; - bool resume = s->state == MIGRATION_STATUS_POSTCOPY_PAUSED; + bool resume = (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP); int ret; /* @@ -3769,7 +3797,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) if (resume) { /* Wakeup the main migration thread to do the recovery */ - migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED, + migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP, MIGRATION_STATUS_POSTCOPY_RECOVER); qemu_sem_post(&s->postcopy_pause_sem); return; diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 97701e6bb2..1c374b7ea1 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -1770,3 +1770,9 @@ void *postcopy_preempt_thread(void *opaque) return NULL; } + +bool postcopy_is_paused(MigrationStatus status) +{ + return status == MIGRATION_STATUS_POSTCOPY_PAUSED || + status == MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP; +} diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h index ecae941211..a6df1b2811 100644 --- a/migration/postcopy-ram.h +++ b/migration/postcopy-ram.h @@ -13,6 +13,8 @@ #ifndef QEMU_POSTCOPY_RAM_H #define QEMU_POSTCOPY_RAM_H +#include "qapi/qapi-types-migration.h" + /* Return true if the host supports everything we need to do postcopy-ram */ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis, Error **errp); @@ -193,5 +195,6 @@ enum PostcopyChannels { void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file); void postcopy_preempt_setup(MigrationState *s); int postcopy_preempt_establish_channel(MigrationState *s); +bool postcopy_is_paused(MigrationStatus status); #endif diff --git a/migration/savevm.c b/migration/savevm.c index e71410d8c1..deb57833f8 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -2864,9 +2864,9 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis) error_report("Detected IO failure for postcopy. " "Migration paused."); - while (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) { + do { qemu_sem_wait(&mis->postcopy_pause_sem_dst); - } + } while (postcopy_is_paused(mis->state)); trace_postcopy_pause_incoming_continued(); diff --git a/qapi/migration.json b/qapi/migration.json index de6c8b0444..0f24206bce 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -142,6 +142,9 @@ # # @postcopy-paused: during postcopy but paused. (since 3.0) # +# @postcopy-recover-setup: setup phase for a postcopy recovery process, +# preparing for a recovery phase to start. (since 9.1) +# # @postcopy-recover: trying to recover from a paused postcopy. (since # 3.0) # @@ -166,6 +169,7 @@ { 'enum': 'MigrationStatus', 'data': [ 'none', 'setup', 'cancelling', 'cancelled', 'active', 'postcopy-active', 'postcopy-paused', + 'postcopy-recover-setup', 'postcopy-recover', 'completed', 'failed', 'colo', 'pre-switchover', 'device', 'wait-unplug' ] } ## -- cgit 1.4.1