diff options
Diffstat (limited to 'migration')
| -rw-r--r-- | migration/channel.c | 9 | ||||
| -rw-r--r-- | migration/cpr.c | 1 | ||||
| -rw-r--r-- | migration/migration.c | 81 | ||||
| -rw-r--r-- | migration/migration.h | 37 | ||||
| -rw-r--r-- | migration/multifd.c | 54 | ||||
| -rw-r--r-- | migration/multifd.h | 2 | ||||
| -rw-r--r-- | migration/options.c | 2 | ||||
| -rw-r--r-- | migration/ram.c | 4 | ||||
| -rw-r--r-- | migration/rdma.c | 2 | ||||
| -rw-r--r-- | migration/savevm.c | 6 | ||||
| -rw-r--r-- | migration/tls.c | 5 | ||||
| -rw-r--r-- | migration/tls.h | 2 | ||||
| -rw-r--r-- | migration/trace-events | 4 |
13 files changed, 160 insertions, 49 deletions
diff --git a/migration/channel.c b/migration/channel.c index f9de064f3b..a547b1fbfe 100644 --- a/migration/channel.c +++ b/migration/channel.c @@ -33,6 +33,7 @@ void migration_channel_process_incoming(QIOChannel *ioc) { MigrationState *s = migrate_get_current(); + MigrationIncomingState *mis = migration_incoming_get_current(); Error *local_err = NULL; trace_migration_set_incoming_channel( @@ -47,6 +48,10 @@ void migration_channel_process_incoming(QIOChannel *ioc) if (local_err) { error_report_err(local_err); + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); + if (mis->exit_on_error) { + exit(EXIT_FAILURE); + } } } @@ -74,7 +79,7 @@ void migration_channel_connect(MigrationState *s, if (!error) { /* tls_channel_connect will call back to this * function after the TLS handshake, - * so we mustn't call migrate_fd_connect until then + * so we mustn't call migration_connect until then */ return; @@ -89,7 +94,7 @@ void migration_channel_connect(MigrationState *s, qemu_mutex_unlock(&s->qemu_file_lock); } } - migrate_fd_connect(s, error); + migration_connect(s, error); error_free(error); } diff --git a/migration/cpr.c b/migration/cpr.c index 584b0b98f7..180faab247 100644 --- a/migration/cpr.c +++ b/migration/cpr.c @@ -137,6 +137,7 @@ int cpr_state_save(MigrationChannel *channel, Error **errp) trace_cpr_state_save(MigMode_str(mode)); if (mode == MIG_MODE_CPR_TRANSFER) { + g_assert(channel); f = cpr_transfer_output(channel, errp); } else { return 0; diff --git a/migration/migration.c b/migration/migration.c index 396928513a..c597aa707e 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -106,7 +106,6 @@ static GSList *migration_blockers[MIG_MODE__MAX]; static bool migration_object_check(MigrationState *ms, Error **errp); static bool migration_switchover_start(MigrationState *s, Error **errp); -static void migrate_fd_cancel(MigrationState *s); static bool close_return_path_on_source(MigrationState *s); static void migration_completion_end(MigrationState *s); static void migrate_hup_delete(MigrationState *s); @@ -342,17 +341,6 @@ void migration_bh_schedule(QEMUBHFunc *cb, void *opaque) qemu_bh_schedule(bh); } -void migration_cancel(const Error *error) -{ - if (error) { - migrate_set_error(current_migration, error); - } - if (migrate_dirty_limit()) { - qmp_cancel_vcpu_dirty_limit(false, -1, NULL); - } - migrate_fd_cancel(current_migration); -} - void migration_shutdown(void) { /* @@ -365,7 +353,7 @@ void migration_shutdown(void) * Cancel the current migration - that will (eventually) * stop the migration using this structure */ - migration_cancel(NULL); + migration_cancel(); object_unref(OBJECT(current_migration)); /* @@ -1435,12 +1423,12 @@ static void migration_cleanup_json_writer(MigrationState *s) g_clear_pointer(&s->vmdesc, json_writer_free); } -static void migrate_fd_cleanup(MigrationState *s) +static void migration_cleanup(MigrationState *s) { MigrationEventType type; QEMUFile *tmp = NULL; - trace_migrate_fd_cleanup(); + trace_migration_cleanup(); migration_cleanup_json_writer(s); @@ -1497,9 +1485,9 @@ static void migrate_fd_cleanup(MigrationState *s) yank_unregister_instance(MIGRATION_YANK_INSTANCE); } -static void migrate_fd_cleanup_bh(void *opaque) +static void migration_cleanup_bh(void *opaque) { - migrate_fd_cleanup(opaque); + migration_cleanup(opaque); } void migrate_set_error(MigrationState *s, const Error *error) @@ -1529,7 +1517,7 @@ static void migrate_error_free(MigrationState *s) } } -static void migrate_fd_error(MigrationState *s, const Error *error) +static void migration_connect_set_error(MigrationState *s, const Error *error) { MigrationStatus current = s->state; MigrationStatus next; @@ -1558,12 +1546,17 @@ static void migrate_fd_error(MigrationState *s, const Error *error) migrate_set_error(s, error); } -static void migrate_fd_cancel(MigrationState *s) +void migration_cancel(void) { + MigrationState *s = migrate_get_current(); int old_state ; bool setup = (s->state == MIGRATION_STATUS_SETUP); - trace_migrate_fd_cancel(); + trace_migration_cancel(); + + if (migrate_dirty_limit()) { + qmp_cancel_vcpu_dirty_limit(false, -1, NULL); + } WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) { if (s->rp_state.from_dst_file) { @@ -2205,7 +2198,7 @@ void qmp_migrate(const char *uri, bool has_channels, out: if (local_err) { - migrate_fd_error(s, local_err); + migration_connect_set_error(s, local_err); error_propagate(errp, local_err); } } @@ -2250,7 +2243,7 @@ static void qmp_migrate_finish(MigrationAddress *addr, bool resume_requested, if (!resume_requested) { yank_unregister_instance(MIGRATION_YANK_INSTANCE); } - migrate_fd_error(s, local_err); + migration_connect_set_error(s, local_err); error_propagate(errp, local_err); return; } @@ -2258,7 +2251,18 @@ static void qmp_migrate_finish(MigrationAddress *addr, bool resume_requested, void qmp_migrate_cancel(Error **errp) { - migration_cancel(NULL); + /* + * After postcopy migration has started, the source machine is not + * recoverable in case of a migration error. This also means the + * cancel command cannot be used as cancel should allow the + * machine to continue operation. + */ + if (migration_in_postcopy()) { + error_setg(errp, "Postcopy migration in progress, cannot cancel."); + return; + } + + migration_cancel(); } void qmp_migrate_continue(MigrationStatus state, Error **errp) @@ -2644,7 +2648,10 @@ static int postcopy_start(MigrationState *ms, Error **errp) if (migrate_postcopy_preempt()) { migration_wait_main_channel(ms); if (postcopy_preempt_establish_channel(ms)) { - migrate_set_state(&ms->state, ms->state, MIGRATION_STATUS_FAILED); + if (ms->state != MIGRATION_STATUS_CANCELLING) { + migrate_set_state(&ms->state, ms->state, + MIGRATION_STATUS_FAILED); + } error_setg(errp, "%s: Failed to establish preempt channel", __func__); return -1; @@ -2982,7 +2989,9 @@ fail: error_free(local_err); } - migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); + if (s->state != MIGRATION_STATUS_CANCELLING) { + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); + } } /** @@ -3005,7 +3014,7 @@ static void bg_migration_completion(MigrationState *s) qemu_put_buffer(s->to_dst_file, s->bioc->data, s->bioc->usage); qemu_fflush(s->to_dst_file); } else if (s->state == MIGRATION_STATUS_CANCELLING) { - goto fail; + return; } if (qemu_file_get_error(s->to_dst_file)) { @@ -3434,7 +3443,7 @@ static void migration_iteration_finish(MigrationState *s) break; } - migration_bh_schedule(migrate_fd_cleanup_bh, s); + migration_bh_schedule(migration_cleanup_bh, s); bql_unlock(); } @@ -3462,7 +3471,7 @@ static void bg_migration_iteration_finish(MigrationState *s) break; } - migration_bh_schedule(migrate_fd_cleanup_bh, s); + migration_bh_schedule(migration_cleanup_bh, s); bql_unlock(); } @@ -3844,7 +3853,7 @@ fail_setup: return NULL; } -void migrate_fd_connect(MigrationState *s, Error *error_in) +void migration_connect(MigrationState *s, Error *error_in) { Error *local_err = NULL; uint64_t rate_limit; @@ -3854,24 +3863,24 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) /* * If there's a previous error, free it and prepare for another one. * Meanwhile if migration completes successfully, there won't have an error - * dumped when calling migrate_fd_cleanup(). + * dumped when calling migration_cleanup(). */ migrate_error_free(s); s->expected_downtime = migrate_downtime_limit(); if (error_in) { - migrate_fd_error(s, error_in); + migration_connect_set_error(s, error_in); if (resume) { /* * Don't do cleanup for resume if channel is invalid, but only dump * the error. We wait for another channel connect from the user. * The error_report still gives HMP user a hint on what failed. - * It's normally done in migrate_fd_cleanup(), but call it here + * It's normally done in migration_cleanup(), but call it here * explicitly. */ error_report_err(error_copy(s->error)); } else { - migrate_fd_cleanup(s); + migration_cleanup(s); } return; } @@ -3949,9 +3958,11 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) fail: migrate_set_error(s, local_err); - migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); + if (s->state != MIGRATION_STATUS_CANCELLING) { + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); + } error_report_err(local_err); - migrate_fd_cleanup(s); + migration_cleanup(s); } static void migration_class_init(ObjectClass *klass, void *data) diff --git a/migration/migration.h b/migration/migration.h index eaebcc2042..4639e2a7e4 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -443,6 +443,39 @@ struct MigrationState { * Default value is false. (since 8.1) */ bool multifd_flush_after_each_section; + + /* + * This variable only makes sense when set on the machine that is + * the destination of a multifd migration with TLS enabled. It + * affects the behavior of the last send->recv iteration with + * regards to termination of the TLS session. + * + * When set: + * + * - the destination QEMU instance can expect to never get a + * GNUTLS_E_PREMATURE_TERMINATION error. Manifested as the error + * message: "The TLS connection was non-properly terminated". + * + * When clear: + * + * - the destination QEMU instance can expect to see a + * GNUTLS_E_PREMATURE_TERMINATION error in any multifd channel + * whenever the last recv() call of that channel happens after + * the source QEMU instance has already issued shutdown() on the + * channel. + * + * Commit 637280aeb2 (since 9.1) introduced a side effect that + * causes the destination instance to not be affected by the + * premature termination, while commit 1d457daf86 (since 10.0) + * causes the premature termination condition to be once again + * reachable. + * + * NOTE: Regardless of the state of this option, a premature + * termination of the TLS connection might happen due to error at + * any moment prior to the last send->recv iteration. + */ + bool multifd_clean_tls_termination; + /* * This decides the size of guest memory chunk that will be used * to track dirty bitmap clearing. The size of memory chunk will @@ -484,7 +517,7 @@ bool migration_has_all_channels(void); void migrate_set_error(MigrationState *s, const Error *error); bool migrate_has_error(MigrationState *s); -void migrate_fd_connect(MigrationState *s, Error *error_in); +void migration_connect(MigrationState *s, Error *error_in); int migration_call_notifiers(MigrationState *s, MigrationEventType type, Error **errp); @@ -530,7 +563,7 @@ void migration_make_urgent_request(void); void migration_consume_urgent_request(void); bool migration_rate_limit(void); void migration_bh_schedule(QEMUBHFunc *cb, void *opaque); -void migration_cancel(const Error *error); +void migration_cancel(void); void migration_populate_vfio_info(MigrationInfo *info); void migration_reset_vfio_bytes_transferred(void); diff --git a/migration/multifd.c b/migration/multifd.c index ab73d6d984..215ad0414a 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -444,7 +444,7 @@ static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp) * channels have no I/O handler callback registered when reaching * here, because migration thread will wait for all multifd channel * establishments to complete during setup. Since - * migrate_fd_cleanup() will be scheduled in main thread too, all + * migration_cleanup() will be scheduled in main thread too, all * previous callbacks should guarantee to be completed when * reaching here. See multifd_send_state.channels_created and its * usage. In the future, we could replace this with an assert @@ -490,6 +490,36 @@ void multifd_send_shutdown(void) return; } + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + + /* thread_created implies the TLS handshake has succeeded */ + if (p->tls_thread_created && p->thread_created) { + Error *local_err = NULL; + /* + * The destination expects the TLS session to always be + * properly terminated. This helps to detect a premature + * termination in the middle of the stream. Note that + * older QEMUs always break the connection on the source + * and the destination always sees + * GNUTLS_E_PREMATURE_TERMINATION. + */ + migration_tls_channel_end(p->c, &local_err); + + /* + * The above can return an error in case the migration has + * already failed. If the migration succeeded, errors are + * not expected but there's no need to kill the source. + */ + if (local_err && !migration_has_failed(migrate_get_current())) { + warn_report( + "multifd_send_%d: Failed to terminate TLS connection: %s", + p->id, error_get_pretty(local_err)); + break; + } + } + } + multifd_send_terminate_threads(); for (i = 0; i < migrate_multifd_channels(); i++) { @@ -1121,6 +1151,7 @@ void multifd_recv_sync_main(void) static void *multifd_recv_thread(void *opaque) { + MigrationState *s = migrate_get_current(); MultiFDRecvParams *p = opaque; Error *local_err = NULL; bool use_packets = multifd_use_packets(); @@ -1129,19 +1160,34 @@ static void *multifd_recv_thread(void *opaque) trace_multifd_recv_thread_start(p->id); rcu_register_thread(); + if (!s->multifd_clean_tls_termination) { + p->read_flags = QIO_CHANNEL_READ_FLAG_RELAXED_EOF; + } + while (true) { uint32_t flags = 0; bool has_data = false; p->normal_num = 0; if (use_packets) { + struct iovec iov = { + .iov_base = (void *)p->packet, + .iov_len = p->packet_len + }; + if (multifd_recv_should_exit()) { break; } - ret = qio_channel_read_all_eof(p->c, (void *)p->packet, - p->packet_len, &local_err); - if (ret == 0 || ret == -1) { /* 0: EOF -1: Error */ + ret = qio_channel_readv_full_all_eof(p->c, &iov, 1, NULL, NULL, + p->read_flags, &local_err); + if (!ret) { + /* EOF */ + assert(!local_err); + break; + } + + if (ret == -1) { break; } diff --git a/migration/multifd.h b/migration/multifd.h index bd785b9873..cf408ff721 100644 --- a/migration/multifd.h +++ b/migration/multifd.h @@ -244,6 +244,8 @@ typedef struct { uint32_t zero_num; /* used for de-compression methods */ void *compress_data; + /* Flags for the QIOChannel */ + int read_flags; } MultiFDRecvParams; typedef struct { diff --git a/migration/options.c b/migration/options.c index 4db340b502..bb259d192a 100644 --- a/migration/options.c +++ b/migration/options.c @@ -99,6 +99,8 @@ const Property migration_properties[] = { clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState, preempt_pre_7_2, false), + DEFINE_PROP_BOOL("multifd-clean-tls-termination", MigrationState, + multifd_clean_tls_termination, true), /* Migration parameters */ DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState, diff --git a/migration/ram.c b/migration/ram.c index 6f460fd22d..589b6505eb 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -4465,8 +4465,10 @@ static void ram_mig_ram_block_resized(RAMBlockNotifier *n, void *host, * Abort and indicate a proper reason. */ error_setg(&err, "RAM block '%s' resized during precopy.", rb->idstr); - migration_cancel(err); + migrate_set_error(migrate_get_current(), err); error_free(err); + + migration_cancel(); } switch (ps) { diff --git a/migration/rdma.c b/migration/rdma.c index 855753c671..76fb034923 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -4174,7 +4174,7 @@ void rdma_start_outgoing_migration(void *opaque, s->to_dst_file = rdma_new_output(rdma); s->rdma_migration = true; - migrate_fd_connect(s, NULL); + migration_connect(s, NULL); return; return_path_err: qemu_rdma_cleanup(rdma); diff --git a/migration/savevm.c b/migration/savevm.c index bc375db282..4046faf009 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -2940,7 +2940,11 @@ int qemu_loadvm_state(QEMUFile *f) /* When reaching here, it must be precopy */ if (ret == 0) { - ret = qemu_file_get_error(f); + if (migrate_has_error(migrate_get_current())) { + ret = -EINVAL; + } else { + ret = qemu_file_get_error(f); + } } /* diff --git a/migration/tls.c b/migration/tls.c index fa03d9136c..5cbf952383 100644 --- a/migration/tls.c +++ b/migration/tls.c @@ -156,6 +156,11 @@ void migration_tls_channel_connect(MigrationState *s, NULL); } +void migration_tls_channel_end(QIOChannel *ioc, Error **errp) +{ + qio_channel_tls_bye(QIO_CHANNEL_TLS(ioc), errp); +} + bool migrate_channel_requires_tls_upgrade(QIOChannel *ioc) { if (!migrate_tls()) { diff --git a/migration/tls.h b/migration/tls.h index 5797d153cb..58b25e1228 100644 --- a/migration/tls.h +++ b/migration/tls.h @@ -36,7 +36,7 @@ void migration_tls_channel_connect(MigrationState *s, QIOChannel *ioc, const char *hostname, Error **errp); - +void migration_tls_channel_end(QIOChannel *ioc, Error **errp); /* Whether the QIO channel requires further TLS handshake? */ bool migrate_channel_requires_tls_upgrade(QIOChannel *ioc); diff --git a/migration/trace-events b/migration/trace-events index 12b262f8ee..58c0f07f5b 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -154,9 +154,9 @@ multifd_set_outgoing_channel(void *ioc, const char *ioctype, const char *hostnam # migration.c migrate_set_state(const char *new_state) "new state %s" -migrate_fd_cleanup(void) "" +migration_cleanup(void) "" migrate_error(const char *error_desc) "error=%s" -migrate_fd_cancel(void) "" +migration_cancel(void) "" migrate_handle_rp_req_pages(const char *rbname, size_t start, size_t len) "in %s at 0x%zx len 0x%zx" migrate_pending_exact(uint64_t size, uint64_t pre, uint64_t post) "exact pending size %" PRIu64 " (pre = %" PRIu64 " post=%" PRIu64 ")" migrate_pending_estimate(uint64_t size, uint64_t pre, uint64_t post) "estimate pending size %" PRIu64 " (pre = %" PRIu64 " post=%" PRIu64 ")" |