diff options
Diffstat (limited to 'migration/migration.c')
| -rw-r--r-- | migration/migration.c | 154 |
1 files changed, 62 insertions, 92 deletions
diff --git a/migration/migration.c b/migration/migration.c index d61e572742..585d3c8f55 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -20,6 +20,7 @@ #include "migration/blocker.h" #include "exec.h" #include "fd.h" +#include "file.h" #include "socket.h" #include "sysemu/runstate.h" #include "sysemu/sysemu.h" @@ -98,6 +99,7 @@ static int migration_maybe_pause(MigrationState *s, int *current_active_state, int new_state); static void migrate_fd_cancel(MigrationState *s); +static int await_return_path_close_on_source(MigrationState *s); static bool migration_needs_multiple_sockets(void) { @@ -153,6 +155,7 @@ void migration_object_init(void) qemu_sem_init(¤t_incoming->postcopy_qemufile_dst_done, 0); qemu_mutex_init(¤t_incoming->page_request_mutex); + qemu_cond_init(¤t_incoming->page_request_cond); current_incoming->page_requested = g_tree_new(page_request_addr_cmp); migration_object_check(current_migration, &error_fatal); @@ -367,7 +370,7 @@ int migrate_send_rp_req_pages(MigrationIncomingState *mis, * things like g_tree_lookup() will return TRUE (1) when found. */ g_tree_insert(mis->page_requested, aligned, (gpointer)1); - mis->page_requested_count++; + qatomic_inc(&mis->page_requested_count); trace_postcopy_page_req_add(aligned, mis->page_requested_count); } } @@ -447,6 +450,8 @@ static void qemu_start_incoming_migration(const char *uri, Error **errp) exec_start_incoming_migration(p, errp); } else if (strstart(uri, "fd:", &p)) { fd_start_incoming_migration(p, errp); + } else if (strstart(uri, "file:", &p)) { + file_start_incoming_migration(p, errp); } else { error_setg(errp, "unknown migration protocol: %s", uri); } @@ -1177,11 +1182,11 @@ static void migrate_fd_cleanup(MigrationState *s) qemu_fclose(tmp); } - if (s->postcopy_qemufile_src) { - migration_ioc_unregister_yank_from_file(s->postcopy_qemufile_src); - qemu_fclose(s->postcopy_qemufile_src); - s->postcopy_qemufile_src = NULL; - } + /* + * We already cleaned up to_dst_file, so errors from the return + * path might be due to that, ignore them. + */ + await_return_path_close_on_source(s); assert(!migration_is_active(s)); @@ -1245,7 +1250,7 @@ static void migrate_fd_error(MigrationState *s, const Error *error) static void migrate_fd_cancel(MigrationState *s) { int old_state ; - QEMUFile *f = migrate_get_current()->to_dst_file; + trace_migrate_fd_cancel(); WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) { @@ -1271,11 +1276,13 @@ static void migrate_fd_cancel(MigrationState *s) * If we're unlucky the migration code might be stuck somewhere in a * send/write while the network has failed and is waiting to timeout; * if we've got shutdown(2) available then we can force it to quit. - * The outgoing qemu file gets closed in migrate_fd_cleanup that is - * called in a bh, so there is no race against this cancel. */ - if (s->state == MIGRATION_STATUS_CANCELLING && f) { - qemu_file_shutdown(f); + if (s->state == MIGRATION_STATUS_CANCELLING) { + WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) { + if (s->to_dst_file) { + qemu_file_shutdown(s->to_dst_file); + } + } } if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) { Error *local_err = NULL; @@ -1535,12 +1542,14 @@ void qmp_migrate_pause(Error **errp) { MigrationState *ms = migrate_get_current(); MigrationIncomingState *mis = migration_incoming_get_current(); - int ret; + int ret = 0; if (ms->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { /* Source side, during postcopy */ qemu_mutex_lock(&ms->qemu_file_lock); - ret = qemu_file_shutdown(ms->to_dst_file); + if (ms->to_dst_file) { + ret = qemu_file_shutdown(ms->to_dst_file); + } qemu_mutex_unlock(&ms->qemu_file_lock); if (ret) { error_setg(errp, "Failed to pause source migration"); @@ -1696,16 +1705,14 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk, exec_start_outgoing_migration(s, p, &local_err); } else if (strstart(uri, "fd:", &p)) { fd_start_outgoing_migration(s, p, &local_err); + } else if (strstart(uri, "file:", &p)) { + file_start_outgoing_migration(s, p, &local_err); } else { - if (!resume_requested) { - yank_unregister_instance(MIGRATION_YANK_INSTANCE); - } error_setg(&local_err, QERR_INVALID_PARAMETER_VALUE, "uri", "a valid migration protocol"); migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_FAILED); block_cleanup_parameters(); - return; } if (local_err) { @@ -1788,18 +1795,6 @@ static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname, } } -/* Return true to retry, false to quit */ -static bool postcopy_pause_return_path_thread(MigrationState *s) -{ - trace_postcopy_pause_return_path(); - - qemu_sem_wait(&s->postcopy_pause_rp_sem); - - trace_postcopy_pause_return_path_continued(); - - return true; -} - static int migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name) { RAMBlock *block = qemu_ram_block_by_name(block_name); @@ -1883,7 +1878,6 @@ static void *source_return_path_thread(void *opaque) trace_source_return_path_thread_entry(); rcu_register_thread(); -retry: while (!ms->rp_state.error && !qemu_file_get_error(rp) && migration_is_setup_or_active(ms->state)) { trace_source_return_path_thread_loop_top(); @@ -2005,38 +1999,17 @@ retry: } out: - res = qemu_file_get_error(rp); - if (res) { - if (res && migration_in_postcopy()) { - /* - * Maybe there is something we can do: it looks like a - * network down issue, and we pause for a recovery. - */ - migration_release_dst_files(ms); - rp = NULL; - if (postcopy_pause_return_path_thread(ms)) { - /* - * Reload rp, reset the rest. Referencing it is safe since - * it's reset only by us above, or when migration completes - */ - rp = ms->rp_state.from_dst_file; - ms->rp_state.error = false; - goto retry; - } - } - + if (qemu_file_get_error(rp)) { trace_source_return_path_thread_bad_end(); mark_source_rp_bad(ms); } trace_source_return_path_thread_end(); - migration_release_dst_files(ms); rcu_unregister_thread(); return NULL; } -static int open_return_path_on_source(MigrationState *ms, - bool create_thread) +static int open_return_path_on_source(MigrationState *ms) { ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file); if (!ms->rp_state.from_dst_file) { @@ -2045,11 +2018,6 @@ static int open_return_path_on_source(MigrationState *ms, trace_open_return_path_on_source(); - if (!create_thread) { - /* We're done */ - return 0; - } - qemu_thread_create(&ms->rp_state.rp_thread, "return path", source_return_path_thread, ms, QEMU_THREAD_JOINABLE); ms->rp_state.rp_thread_created = true; @@ -2062,24 +2030,39 @@ static int open_return_path_on_source(MigrationState *ms, /* Returns 0 if the RP was ok, otherwise there was an error on the RP */ static int await_return_path_close_on_source(MigrationState *ms) { + int ret; + + if (!ms->rp_state.rp_thread_created) { + return 0; + } + + trace_migration_return_path_end_before(); + /* - * If this is a normal exit then the destination will send a SHUT and the - * rp_thread will exit, however if there's an error we need to cause - * it to exit. + * If this is a normal exit then the destination will send a SHUT + * and the rp_thread will exit, however if there's an error we + * need to cause it to exit. shutdown(2), if we have it, will + * cause it to unblock if it's stuck waiting for the destination. */ - if (qemu_file_get_error(ms->to_dst_file) && ms->rp_state.from_dst_file) { - /* - * shutdown(2), if we have it, will cause it to unblock if it's stuck - * waiting for the destination. - */ - qemu_file_shutdown(ms->rp_state.from_dst_file); - mark_source_rp_bad(ms); + WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) { + if (ms->to_dst_file && ms->rp_state.from_dst_file && + qemu_file_get_error(ms->to_dst_file)) { + qemu_file_shutdown(ms->rp_state.from_dst_file); + } } + trace_await_return_path_close_on_source_joining(); qemu_thread_join(&ms->rp_state.rp_thread); ms->rp_state.rp_thread_created = false; trace_await_return_path_close_on_source_close(); - return ms->rp_state.error; + + ret = ms->rp_state.error; + ms->rp_state.error = false; + + migration_release_dst_files(ms); + + trace_migration_return_path_end_after(ret); + return ret; } static inline void @@ -2375,20 +2358,8 @@ static void migration_completion(MigrationState *s) goto fail; } - /* - * If rp was opened we must clean up the thread before - * cleaning everything else up (since if there are no failures - * it will wait for the destination to send it's status in - * a SHUT command). - */ - if (s->rp_state.rp_thread_created) { - int rp_error; - trace_migration_return_path_end_before(); - rp_error = await_return_path_close_on_source(s); - trace_migration_return_path_end_after(rp_error); - if (rp_error) { - goto fail; - } + if (await_return_path_close_on_source(s)) { + goto fail; } if (qemu_file_get_error(s->to_dst_file)) { @@ -2565,6 +2536,13 @@ static MigThrError postcopy_pause(MigrationState *s) qemu_file_shutdown(file); qemu_fclose(file); + /* + * We're already pausing, so ignore any errors on the return + * path and just wait for the thread to finish. It will be + * re-created when we resume. + */ + await_return_path_close_on_source(s); + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_POSTCOPY_PAUSED); @@ -2582,12 +2560,6 @@ static MigThrError postcopy_pause(MigrationState *s) if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) { /* Woken up by a recover procedure. Give it a shot */ - /* - * Firstly, let's wake up the return path now, with a new - * return path channel. - */ - qemu_sem_post(&s->postcopy_pause_rp_sem); - /* Do the resume logic */ if (postcopy_do_resume(s) == 0) { /* Let's continue! */ @@ -3277,7 +3249,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) * QEMU uses the return path. */ if (migrate_postcopy_ram() || migrate_return_path()) { - if (open_return_path_on_source(s, !resume)) { + if (open_return_path_on_source(s)) { error_setg(&local_err, "Unable to open return-path for postcopy"); migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); migrate_set_error(s, local_err); @@ -3341,7 +3313,6 @@ static void migration_instance_finalize(Object *obj) qemu_sem_destroy(&ms->rate_limit_sem); qemu_sem_destroy(&ms->pause_sem); qemu_sem_destroy(&ms->postcopy_pause_sem); - qemu_sem_destroy(&ms->postcopy_pause_rp_sem); qemu_sem_destroy(&ms->rp_state.rp_sem); qemu_sem_destroy(&ms->rp_state.rp_pong_acks); qemu_sem_destroy(&ms->postcopy_qemufile_src_sem); @@ -3361,7 +3332,6 @@ static void migration_instance_init(Object *obj) migrate_params_init(&ms->parameters); qemu_sem_init(&ms->postcopy_pause_sem, 0); - qemu_sem_init(&ms->postcopy_pause_rp_sem, 0); qemu_sem_init(&ms->rp_state.rp_sem, 0); qemu_sem_init(&ms->rp_state.rp_pong_acks, 0); qemu_sem_init(&ms->rate_limit_sem, 0); |