diff options
Diffstat (limited to 'migration')
| -rw-r--r-- | migration/cpr-transfer.c | 71 | ||||
| -rw-r--r-- | migration/cpr.c | 224 | ||||
| -rw-r--r-- | migration/meson.build | 2 | ||||
| -rw-r--r-- | migration/migration.c | 348 | ||||
| -rw-r--r-- | migration/migration.h | 5 | ||||
| -rw-r--r-- | migration/options.c | 8 | ||||
| -rw-r--r-- | migration/qemu-file.c | 84 | ||||
| -rw-r--r-- | migration/qemu-file.h | 2 | ||||
| -rw-r--r-- | migration/ram.c | 67 | ||||
| -rw-r--r-- | migration/savevm.c | 116 | ||||
| -rw-r--r-- | migration/savevm.h | 6 | ||||
| -rw-r--r-- | migration/trace-events | 13 | ||||
| -rw-r--r-- | migration/vmstate-types.c | 24 | ||||
| -rw-r--r-- | migration/vmstate.c | 6 |
14 files changed, 760 insertions, 216 deletions
diff --git a/migration/cpr-transfer.c b/migration/cpr-transfer.c new file mode 100644 index 0000000000..e1f140359c --- /dev/null +++ b/migration/cpr-transfer.c @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2022, 2024 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "io/channel-file.h" +#include "io/channel-socket.h" +#include "io/net-listener.h" +#include "migration/cpr.h" +#include "migration/migration.h" +#include "migration/savevm.h" +#include "migration/qemu-file.h" +#include "migration/vmstate.h" +#include "trace.h" + +QEMUFile *cpr_transfer_output(MigrationChannel *channel, Error **errp) +{ + MigrationAddress *addr = channel->addr; + + if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET && + addr->u.socket.type == SOCKET_ADDRESS_TYPE_UNIX) { + + g_autoptr(QIOChannelSocket) sioc = qio_channel_socket_new(); + QIOChannel *ioc = QIO_CHANNEL(sioc); + SocketAddress *saddr = &addr->u.socket; + + if (qio_channel_socket_connect_sync(sioc, saddr, errp) < 0) { + return NULL; + } + trace_cpr_transfer_output(addr->u.socket.u.q_unix.path); + qio_channel_set_name(ioc, "cpr-out"); + return qemu_file_new_output(ioc); + + } else { + error_setg(errp, "bad cpr channel address; must be unix"); + return NULL; + } +} + +QEMUFile *cpr_transfer_input(MigrationChannel *channel, Error **errp) +{ + MigrationAddress *addr = channel->addr; + + if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET && + addr->u.socket.type == SOCKET_ADDRESS_TYPE_UNIX) { + + g_autoptr(QIOChannelSocket) sioc = NULL; + SocketAddress *saddr = &addr->u.socket; + g_autoptr(QIONetListener) listener = qio_net_listener_new(); + QIOChannel *ioc; + + qio_net_listener_set_name(listener, "cpr-socket-listener"); + if (qio_net_listener_open_sync(listener, saddr, 1, errp) < 0) { + return NULL; + } + + sioc = qio_net_listener_wait_client(listener); + ioc = QIO_CHANNEL(sioc); + trace_cpr_transfer_input(addr->u.socket.u.q_unix.path); + qio_channel_set_name(ioc, "cpr-in"); + return qemu_file_new_input(ioc); + + } else { + error_setg(errp, "bad cpr channel socket type; must be unix"); + return NULL; + } +} diff --git a/migration/cpr.c b/migration/cpr.c new file mode 100644 index 0000000000..584b0b98f7 --- /dev/null +++ b/migration/cpr.c @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2021-2024 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "migration/cpr.h" +#include "migration/misc.h" +#include "migration/options.h" +#include "migration/qemu-file.h" +#include "migration/savevm.h" +#include "migration/vmstate.h" +#include "system/runstate.h" +#include "trace.h" + +/*************************************************************************/ +/* cpr state container for all information to be saved. */ + +typedef QLIST_HEAD(CprFdList, CprFd) CprFdList; + +typedef struct CprState { + CprFdList fds; +} CprState; + +static CprState cpr_state; + +/****************************************************************************/ + +typedef struct CprFd { + char *name; + unsigned int namelen; + int id; + int fd; + QLIST_ENTRY(CprFd) next; +} CprFd; + +static const VMStateDescription vmstate_cpr_fd = { + .name = "cpr fd", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(namelen, CprFd), + VMSTATE_VBUFFER_ALLOC_UINT32(name, CprFd, 0, NULL, namelen), + VMSTATE_INT32(id, CprFd), + VMSTATE_FD(fd, CprFd), + VMSTATE_END_OF_LIST() + } +}; + +void cpr_save_fd(const char *name, int id, int fd) +{ + CprFd *elem = g_new0(CprFd, 1); + + trace_cpr_save_fd(name, id, fd); + elem->name = g_strdup(name); + elem->namelen = strlen(name) + 1; + elem->id = id; + elem->fd = fd; + QLIST_INSERT_HEAD(&cpr_state.fds, elem, next); +} + +static CprFd *find_fd(CprFdList *head, const char *name, int id) +{ + CprFd *elem; + + QLIST_FOREACH(elem, head, next) { + if (!strcmp(elem->name, name) && elem->id == id) { + return elem; + } + } + return NULL; +} + +void cpr_delete_fd(const char *name, int id) +{ + CprFd *elem = find_fd(&cpr_state.fds, name, id); + + if (elem) { + QLIST_REMOVE(elem, next); + g_free(elem->name); + g_free(elem); + } + + trace_cpr_delete_fd(name, id); +} + +int cpr_find_fd(const char *name, int id) +{ + CprFd *elem = find_fd(&cpr_state.fds, name, id); + int fd = elem ? elem->fd : -1; + + trace_cpr_find_fd(name, id, fd); + return fd; +} +/*************************************************************************/ +#define CPR_STATE "CprState" + +static const VMStateDescription vmstate_cpr_state = { + .name = CPR_STATE, + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_QLIST_V(fds, CprState, 1, vmstate_cpr_fd, CprFd, next), + VMSTATE_END_OF_LIST() + } +}; +/*************************************************************************/ + +static QEMUFile *cpr_state_file; + +QIOChannel *cpr_state_ioc(void) +{ + return qemu_file_get_ioc(cpr_state_file); +} + +static MigMode incoming_mode = MIG_MODE_NONE; + +MigMode cpr_get_incoming_mode(void) +{ + return incoming_mode; +} + +void cpr_set_incoming_mode(MigMode mode) +{ + incoming_mode = mode; +} + +int cpr_state_save(MigrationChannel *channel, Error **errp) +{ + int ret; + QEMUFile *f; + MigMode mode = migrate_mode(); + + trace_cpr_state_save(MigMode_str(mode)); + + if (mode == MIG_MODE_CPR_TRANSFER) { + f = cpr_transfer_output(channel, errp); + } else { + return 0; + } + if (!f) { + return -1; + } + + qemu_put_be32(f, QEMU_CPR_FILE_MAGIC); + qemu_put_be32(f, QEMU_CPR_FILE_VERSION); + + ret = vmstate_save_state(f, &vmstate_cpr_state, &cpr_state, 0); + if (ret) { + error_setg(errp, "vmstate_save_state error %d", ret); + qemu_fclose(f); + return ret; + } + + /* + * Close the socket only partially so we can later detect when the other + * end closes by getting a HUP event. + */ + qemu_fflush(f); + qio_channel_shutdown(qemu_file_get_ioc(f), QIO_CHANNEL_SHUTDOWN_WRITE, + NULL); + cpr_state_file = f; + return 0; +} + +int cpr_state_load(MigrationChannel *channel, Error **errp) +{ + int ret; + uint32_t v; + QEMUFile *f; + MigMode mode = 0; + + if (channel) { + mode = MIG_MODE_CPR_TRANSFER; + cpr_set_incoming_mode(mode); + f = cpr_transfer_input(channel, errp); + } else { + return 0; + } + if (!f) { + return -1; + } + + trace_cpr_state_load(MigMode_str(mode)); + + v = qemu_get_be32(f); + if (v != QEMU_CPR_FILE_MAGIC) { + error_setg(errp, "Not a migration stream (bad magic %x)", v); + qemu_fclose(f); + return -EINVAL; + } + v = qemu_get_be32(f); + if (v != QEMU_CPR_FILE_VERSION) { + error_setg(errp, "Unsupported migration stream version %d", v); + qemu_fclose(f); + return -ENOTSUP; + } + + ret = vmstate_load_state(f, &vmstate_cpr_state, &cpr_state, 1); + if (ret) { + error_setg(errp, "vmstate_load_state error %d", ret); + qemu_fclose(f); + return ret; + } + + /* + * Let the caller decide when to close the socket (and generate a HUP event + * for the sending side). + */ + cpr_state_file = f; + + return ret; +} + +void cpr_state_close(void) +{ + if (cpr_state_file) { + qemu_fclose(cpr_state_file); + cpr_state_file = NULL; + } +} diff --git a/migration/meson.build b/migration/meson.build index dac687ee3a..d3bfe84d62 100644 --- a/migration/meson.build +++ b/migration/meson.build @@ -14,6 +14,8 @@ system_ss.add(files( 'block-active.c', 'channel.c', 'channel-block.c', + 'cpr.c', + 'cpr-transfer.c', 'cpu-throttle.c', 'dirtyrate.c', 'exec.c', diff --git a/migration/migration.c b/migration/migration.c index 2d1da917c7..74c50cc72c 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -14,6 +14,7 @@ */ #include "qemu/osdep.h" +#include "qemu/ctype.h" #include "qemu/cutils.h" #include "qemu/error-report.h" #include "qemu/main-loop.h" @@ -27,6 +28,7 @@ #include "system/cpu-throttle.h" #include "rdma.h" #include "ram.h" +#include "migration/cpr.h" #include "migration/global_state.h" #include "migration/misc.h" #include "migration.h" @@ -75,6 +77,7 @@ static NotifierWithReturnList migration_state_notifiers[] = { NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_NORMAL), NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_CPR_REBOOT), + NOTIFIER_ELEM_INIT(migration_state_notifiers, MIG_MODE_CPR_TRANSFER), }; /* Messages sent on the return path from destination to source */ @@ -102,12 +105,11 @@ static MigrationIncomingState *current_incoming; static GSList *migration_blockers[MIG_MODE__MAX]; static bool migration_object_check(MigrationState *ms, Error **errp); -static int migration_maybe_pause(MigrationState *s, - int *current_active_state, - int new_state); +static bool migration_switchover_start(MigrationState *s, Error **errp); static void migrate_fd_cancel(MigrationState *s); static bool close_return_path_on_source(MigrationState *s); static void migration_completion_end(MigrationState *s); +static void migrate_hup_delete(MigrationState *s); static void migration_downtime_start(MigrationState *s) { @@ -125,9 +127,19 @@ static void migration_downtime_end(MigrationState *s) */ if (!s->downtime) { s->downtime = now - s->downtime_start; + trace_vmstate_downtime_checkpoint("src-downtime-end"); + } +} + +static void precopy_notify_complete(void) +{ + Error *local_err = NULL; + + if (precopy_notify(PRECOPY_NOTIFY_COMPLETE, &local_err)) { + error_report_err(local_err); } - trace_vmstate_downtime_checkpoint("src-downtime-end"); + trace_migration_precopy_complete(); } static bool migration_needs_multiple_sockets(void) @@ -218,6 +230,12 @@ migration_channels_and_transport_compatible(MigrationAddress *addr, return false; } + if (migrate_mode() == MIG_MODE_CPR_TRANSFER && + addr->transport == MIGRATION_ADDRESS_TYPE_FILE) { + error_setg(errp, "Migration requires streamable transport (eg unix)"); + return false; + } + return true; } @@ -433,6 +451,7 @@ void migration_incoming_state_destroy(void) mis->postcopy_qemufile_dst = NULL; } + cpr_set_incoming_mode(MIG_MODE_NONE); yank_unregister_instance(MIGRATION_YANK_INSTANCE); } @@ -586,6 +605,16 @@ void migrate_add_address(SocketAddress *address) QAPI_CLONE(SocketAddress, address)); } +bool migrate_is_uri(const char *uri) +{ + while (*uri && *uri != ':') { + if (!qemu_isalpha(*uri++)) { + return false; + } + } + return *uri == ':'; +} + bool migrate_uri_parse(const char *uri, MigrationChannel **channel, Error **errp) { @@ -683,7 +712,8 @@ static void qemu_start_incoming_migration(const char *uri, bool has_channels, if (channels) { /* To verify that Migrate channel list has only item */ if (channels->next) { - error_setg(errp, "Channel list has more than one entries"); + error_setg(errp, "Channel list must have only one entry, " + "for type 'main'"); return; } addr = channels->value->addr; @@ -734,6 +764,9 @@ static void qemu_start_incoming_migration(const char *uri, bool has_channels, } else { error_setg(errp, "unknown migration protocol: %s", uri); } + + /* Close cpr socket to tell source that we are listening */ + cpr_state_close(); } static void process_incoming_migration_bh(void *opaque) @@ -1397,6 +1430,11 @@ void migrate_set_state(MigrationStatus *state, MigrationStatus old_state, } } +static void migration_cleanup_json_writer(MigrationState *s) +{ + g_clear_pointer(&s->vmdesc, json_writer_free); +} + static void migrate_fd_cleanup(MigrationState *s) { MigrationEventType type; @@ -1404,12 +1442,14 @@ static void migrate_fd_cleanup(MigrationState *s) trace_migrate_fd_cleanup(); + migration_cleanup_json_writer(s); + g_free(s->hostname); s->hostname = NULL; - json_writer_free(s->vmdesc); - s->vmdesc = NULL; qemu_savevm_state_cleanup(); + cpr_state_close(); + migrate_hup_delete(s); close_return_path_on_source(s); @@ -1521,6 +1561,7 @@ static void migrate_fd_error(MigrationState *s, const Error *error) static void migrate_fd_cancel(MigrationState *s) { int old_state ; + bool setup = (s->state == MIGRATION_STATUS_SETUP); trace_migrate_fd_cancel(); @@ -1555,6 +1596,17 @@ static void migrate_fd_cancel(MigrationState *s) } } } + + /* + * If qmp_migrate_finish has not been called, then there is no path that + * will complete the cancellation. Do it now. + */ + if (setup && !s->to_dst_file) { + migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING, + MIGRATION_STATUS_CANCELLED); + cpr_state_close(); + migrate_hup_delete(s); + } } void migration_add_notifier_mode(NotifierWithReturn *notify, @@ -1652,7 +1704,9 @@ bool migration_thread_is_self(void) bool migrate_mode_is_cpr(MigrationState *s) { - return s->parameters.mode == MIG_MODE_CPR_REBOOT; + MigMode mode = s->parameters.mode; + return mode == MIG_MODE_CPR_REBOOT || + mode == MIG_MODE_CPR_TRANSFER; } int migrate_init(MigrationState *s, Error **errp) @@ -1681,7 +1735,10 @@ int migrate_init(MigrationState *s, Error **errp) s->migration_thread_running = false; error_free(s->error); s->error = NULL; - s->vmdesc = NULL; + + if (should_send_vmdesc()) { + s->vmdesc = json_writer_new(false); + } migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP); @@ -2033,6 +2090,40 @@ static bool migrate_prepare(MigrationState *s, bool resume, Error **errp) return true; } +static void qmp_migrate_finish(MigrationAddress *addr, bool resume_requested, + Error **errp); + +static void migrate_hup_add(MigrationState *s, QIOChannel *ioc, GSourceFunc cb, + void *opaque) +{ + s->hup_source = qio_channel_create_watch(ioc, G_IO_HUP); + g_source_set_callback(s->hup_source, cb, opaque, NULL); + g_source_attach(s->hup_source, NULL); +} + +static void migrate_hup_delete(MigrationState *s) +{ + if (s->hup_source) { + g_source_destroy(s->hup_source); + g_source_unref(s->hup_source); + s->hup_source = NULL; + } +} + +static gboolean qmp_migrate_finish_cb(QIOChannel *channel, + GIOCondition cond, + void *opaque) +{ + MigrationAddress *addr = opaque; + + qmp_migrate_finish(addr, false, NULL); + + cpr_state_close(); + migrate_hup_delete(migrate_get_current()); + qapi_free_MigrationAddress(addr); + return G_SOURCE_REMOVE; +} + void qmp_migrate(const char *uri, bool has_channels, MigrationChannelList *channels, bool has_detach, bool detach, bool has_resume, bool resume, Error **errp) @@ -2042,6 +2133,8 @@ void qmp_migrate(const char *uri, bool has_channels, MigrationState *s = migrate_get_current(); g_autoptr(MigrationChannel) channel = NULL; MigrationAddress *addr = NULL; + MigrationChannel *channelv[MIGRATION_CHANNEL_TYPE__MAX] = { NULL }; + MigrationChannel *cpr_channel = NULL; /* * Having preliminary checks for uri and channel @@ -2052,12 +2145,22 @@ void qmp_migrate(const char *uri, bool has_channels, } if (channels) { - /* To verify that Migrate channel list has only item */ - if (channels->next) { - error_setg(errp, "Channel list has more than one entries"); + for ( ; channels; channels = channels->next) { + MigrationChannelType type = channels->value->channel_type; + + if (channelv[type]) { + error_setg(errp, "Channel list has more than one %s entry", + MigrationChannelType_str(type)); + return; + } + channelv[type] = channels->value; + } + cpr_channel = channelv[MIGRATION_CHANNEL_TYPE_CPR]; + addr = channelv[MIGRATION_CHANNEL_TYPE_MAIN]->addr; + if (!addr) { + error_setg(errp, "Channel list has no main entry"); return; } - addr = channels->value->addr; } if (uri) { @@ -2073,12 +2176,52 @@ void qmp_migrate(const char *uri, bool has_channels, return; } + if (s->parameters.mode == MIG_MODE_CPR_TRANSFER && !cpr_channel) { + error_setg(errp, "missing 'cpr' migration channel"); + return; + } + resume_requested = has_resume && resume; if (!migrate_prepare(s, resume_requested, errp)) { /* Error detected, put into errp */ return; } + if (cpr_state_save(cpr_channel, &local_err)) { + goto out; + } + + /* + * For cpr-transfer, the target may not be listening yet on the migration + * channel, because first it must finish cpr_load_state. The target tells + * us it is listening by closing the cpr-state socket. Wait for that HUP + * event before connecting in qmp_migrate_finish. + * + * The HUP could occur because the target fails while reading CPR state, + * in which case the target will not listen for the incoming migration + * connection, so qmp_migrate_finish will fail to connect, and then recover. + */ + if (s->parameters.mode == MIG_MODE_CPR_TRANSFER) { + migrate_hup_add(s, cpr_state_ioc(), (GSourceFunc)qmp_migrate_finish_cb, + QAPI_CLONE(MigrationAddress, addr)); + + } else { + qmp_migrate_finish(addr, resume_requested, errp); + } + +out: + if (local_err) { + migrate_fd_error(s, local_err); + error_propagate(errp, local_err); + } +} + +static void qmp_migrate_finish(MigrationAddress *addr, bool resume_requested, + Error **errp) +{ + MigrationState *s = migrate_get_current(); + Error *local_err = NULL; + if (!resume_requested) { if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) { return; @@ -2495,8 +2638,14 @@ static int postcopy_start(MigrationState *ms, Error **errp) int ret; QIOChannelBuffer *bioc; QEMUFile *fb; - uint64_t bandwidth = migrate_max_postcopy_bandwidth(); - int cur_state = MIGRATION_STATUS_ACTIVE; + + /* + * Now we're 100% sure to switch to postcopy, so JSON writer won't be + * useful anymore. Free the resources early if it is there. Clearing + * the vmdesc also means any follow up vmstate_save()s will start to + * skip all JSON operations, which can shrink postcopy downtime. + */ + migration_cleanup_json_writer(ms); if (migrate_postcopy_preempt()) { migration_wait_main_channel(ms); @@ -2508,11 +2657,6 @@ static int postcopy_start(MigrationState *ms, Error **errp) } } - if (!migrate_pause_before_switchover()) { - migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE, - MIGRATION_STATUS_POSTCOPY_ACTIVE); - } - trace_postcopy_start(); bql_lock(); trace_postcopy_start_set_run(); @@ -2523,16 +2667,7 @@ static int postcopy_start(MigrationState *ms, Error **errp) goto fail; } - ret = migration_maybe_pause(ms, &cur_state, - MIGRATION_STATUS_POSTCOPY_ACTIVE); - if (ret < 0) { - error_setg_errno(errp, -ret, "%s: Failed in migration_maybe_pause()", - __func__); - goto fail; - } - - if (!migration_block_inactivate()) { - error_setg(errp, "%s: Failed in bdrv_inactivate_all()", __func__); + if (!migration_switchover_start(ms, errp)) { goto fail; } @@ -2540,7 +2675,11 @@ static int postcopy_start(MigrationState *ms, Error **errp) * Cause any non-postcopiable, but iterative devices to * send out their final data. */ - qemu_savevm_state_complete_precopy(ms->to_dst_file, true, false); + ret = qemu_savevm_state_complete_precopy_iterable(ms->to_dst_file, true); + if (ret) { + error_setg(errp, "Postcopy save non-postcopiable iterables failed"); + goto fail; + } /* * in Finish migrate and with the io-lock held everything should @@ -2552,12 +2691,6 @@ static int postcopy_start(MigrationState *ms, Error **errp) ram_postcopy_send_discard_bitmap(ms); } - /* - * send rest of state - note things that are doing postcopy - * will notice we're in POSTCOPY_ACTIVE and not actually - * wrap their state up here - */ - migration_rate_set(bandwidth); if (migrate_postcopy_ram()) { /* Ping just for debugging, helps line traces up */ qemu_savevm_send_ping(ms->to_dst_file, 2); @@ -2585,7 +2718,12 @@ static int postcopy_start(MigrationState *ms, Error **errp) */ qemu_savevm_send_postcopy_listen(fb); - qemu_savevm_state_complete_precopy(fb, false, false); + ret = qemu_savevm_state_complete_precopy_non_iterable(fb, true); + if (ret) { + error_setg(errp, "Postcopy save non-iterable device states failed"); + goto fail_closefb; + } + if (migrate_postcopy_ram()) { qemu_savevm_send_ping(fb, 3); } @@ -2619,8 +2757,6 @@ static int postcopy_start(MigrationState *ms, Error **errp) migration_downtime_end(ms); - bql_unlock(); - if (migrate_postcopy_ram()) { /* * Although this ping is just for debug, it could potentially be @@ -2636,11 +2772,22 @@ static int postcopy_start(MigrationState *ms, Error **errp) ret = qemu_file_get_error(ms->to_dst_file); if (ret) { error_setg_errno(errp, -ret, "postcopy_start: Migration stream error"); - bql_lock(); goto fail; } trace_postcopy_preempt_enabled(migrate_postcopy_preempt()); + /* + * Now postcopy officially started, switch to postcopy bandwidth that + * user specified. + */ + migration_rate_set(migrate_max_postcopy_bandwidth()); + + /* Now, switchover looks all fine, switching to postcopy-active */ + migrate_set_state(&ms->state, MIGRATION_STATUS_DEVICE, + MIGRATION_STATUS_POSTCOPY_ACTIVE); + + bql_unlock(); + return ret; fail_closefb: @@ -2655,16 +2802,39 @@ fail: } /** - * migration_maybe_pause: Pause if required to by - * migrate_pause_before_switchover called with the BQL locked - * Returns: 0 on success + * @migration_switchover_prepare: Start VM switchover procedure + * + * @s: The migration state object pointer + * + * Prepares for the switchover, depending on "pause-before-switchover" + * capability. + * + * If cap set, state machine goes like: + * [postcopy-]active -> pre-switchover -> device + * + * If cap not set: + * [postcopy-]active -> device + * + * Returns: true on success, false on interruptions. */ -static int migration_maybe_pause(MigrationState *s, - int *current_active_state, - int new_state) +static bool migration_switchover_prepare(MigrationState *s) { + /* Concurrent cancellation? Quit */ + if (s->state == MIGRATION_STATUS_CANCELLING) { + return false; + } + + /* + * No matter precopy or postcopy, since we still hold BQL it must not + * change concurrently to CANCELLING, so it must be either ACTIVE or + * POSTCOPY_ACTIVE. + */ + assert(migration_is_active()); + + /* If the pre stage not requested, directly switch to DEVICE */ if (!migrate_pause_before_switchover()) { - return 0; + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_DEVICE); + return true; } /* Since leaving this state is not atomic with posting the semaphore @@ -2677,28 +2847,53 @@ static int migration_maybe_pause(MigrationState *s, /* This block intentionally left blank */ } + /* Update [POSTCOPY_]ACTIVE to PRE_SWITCHOVER */ + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_PRE_SWITCHOVER); + bql_unlock(); + + qemu_sem_wait(&s->pause_sem); + + bql_lock(); /* - * If the migration is cancelled when it is in the completion phase, - * the migration state is set to MIGRATION_STATUS_CANCELLING. - * So we don't need to wait a semaphore, otherwise we would always - * wait for the 'pause_sem' semaphore. + * After BQL released and retaken, the state can be CANCELLING if it + * happend during sem_wait().. Only change the state if it's still + * pre-switchover. */ - if (s->state != MIGRATION_STATUS_CANCELLING) { - bql_unlock(); - migrate_set_state(&s->state, *current_active_state, - MIGRATION_STATUS_PRE_SWITCHOVER); - qemu_sem_wait(&s->pause_sem); - migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, - new_state); - *current_active_state = new_state; - bql_lock(); + migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, + MIGRATION_STATUS_DEVICE); + + return s->state == MIGRATION_STATUS_DEVICE; +} + +static bool migration_switchover_start(MigrationState *s, Error **errp) +{ + ERRP_GUARD(); + + if (!migration_switchover_prepare(s)) { + error_setg(errp, "Switchover is interrupted"); + return false; } - return s->state == new_state ? 0 : -EINVAL; + /* Inactivate disks except in COLO */ + if (!migrate_colo()) { + /* + * Inactivate before sending QEMU_VM_EOF so that the + * bdrv_activate_all() on the other end won't fail. + */ + if (!migration_block_inactivate()) { + error_setg(errp, "Block inactivate failed during switchover"); + return false; + } + } + + migration_rate_set(RATE_LIMIT_DISABLED); + + precopy_notify_complete(); + + return true; } -static int migration_completion_precopy(MigrationState *s, - int *current_active_state) +static int migration_completion_precopy(MigrationState *s) { int ret; @@ -2711,17 +2906,12 @@ static int migration_completion_precopy(MigrationState *s, } } - ret = migration_maybe_pause(s, current_active_state, - MIGRATION_STATUS_DEVICE); - if (ret < 0) { + if (!migration_switchover_start(s, NULL)) { + ret = -EFAULT; goto out_unlock; } - migration_rate_set(RATE_LIMIT_DISABLED); - - /* Inactivate disks except in COLO */ - ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, - !migrate_colo()); + ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false); out_unlock: bql_unlock(); return ret; @@ -2755,11 +2945,10 @@ static void migration_completion_postcopy(MigrationState *s) static void migration_completion(MigrationState *s) { int ret = 0; - int current_active_state = s->state; Error *local_err = NULL; if (s->state == MIGRATION_STATUS_ACTIVE) { - ret = migration_completion_precopy(s, ¤t_active_state); + ret = migration_completion_precopy(s); } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { migration_completion_postcopy(s); } else { @@ -2799,8 +2988,7 @@ fail: error_free(local_err); } - migrate_set_state(&s->state, current_active_state, - MIGRATION_STATUS_FAILED); + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); } /** @@ -3597,12 +3785,8 @@ static void *bg_migration_thread(void *opaque) if (migration_stop_vm(s, RUN_STATE_PAUSED)) { goto fail; } - /* - * Put vCPUs in sync with shadow context structures, then - * save their state to channel-buffer along with devices. - */ - cpu_synchronize_all_states(); - if (qemu_savevm_state_complete_precopy_non_iterable(fb, false, false)) { + + if (qemu_savevm_state_complete_precopy_non_iterable(fb, false)) { goto fail; } /* diff --git a/migration/migration.h b/migration/migration.h index 0df2a187af..4c1fafc2b5 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -468,6 +468,8 @@ struct MigrationState { bool switchover_acked; /* Is this a rdma migration */ bool rdma_migration; + + GSource *hup_source; }; void migrate_set_state(MigrationStatus *state, MigrationStatus old_state, @@ -519,8 +521,6 @@ bool check_dirty_bitmap_mig_alias_map(const BitmapMigrationNodeAliasList *bbm, Error **errp); void migrate_add_address(SocketAddress *address); -bool migrate_uri_parse(const char *uri, MigrationChannel **channel, - Error **errp); int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); #define qemu_ram_foreach_block \ @@ -552,6 +552,7 @@ void migration_bitmap_sync_precopy(bool last_stage); /* migration/block-dirty-bitmap.c */ void dirty_bitmap_mig_init(void); +bool should_send_vmdesc(void); /* migration/block-active.c */ void migration_block_active_setup(bool active); diff --git a/migration/options.c b/migration/options.c index b8d5300326..1ad950e397 100644 --- a/migration/options.c +++ b/migration/options.c @@ -22,6 +22,7 @@ #include "qapi/qmp/qnull.h" #include "system/runstate.h" #include "migration/colo.h" +#include "migration/cpr.h" #include "migration/misc.h" #include "migration.h" #include "migration-stats.h" @@ -745,8 +746,11 @@ uint64_t migrate_max_postcopy_bandwidth(void) MigMode migrate_mode(void) { - MigrationState *s = migrate_get_current(); - MigMode mode = s->parameters.mode; + MigMode mode = cpr_get_incoming_mode(); + + if (mode == MIG_MODE_NONE) { + mode = migrate_get_current()->parameters.mode; + } assert(mode >= 0 && mode < MIG_MODE__MAX); return mode; diff --git a/migration/qemu-file.c b/migration/qemu-file.c index b6d2f588bd..1303a5bf58 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -37,6 +37,11 @@ #define IO_BUF_SIZE 32768 #define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 64) +typedef struct FdEntry { + QTAILQ_ENTRY(FdEntry) entry; + int fd; +} FdEntry; + struct QEMUFile { QIOChannel *ioc; bool is_writable; @@ -51,6 +56,9 @@ struct QEMUFile { int last_error; Error *last_error_obj; + + bool can_pass_fd; + QTAILQ_HEAD(, FdEntry) fds; }; /* @@ -109,6 +117,8 @@ static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable) object_ref(ioc); f->ioc = ioc; f->is_writable = is_writable; + f->can_pass_fd = qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS); + QTAILQ_INIT(&f->fds); return f; } @@ -310,6 +320,10 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f) int len; int pending; Error *local_error = NULL; + g_autofree int *fds = NULL; + size_t nfd = 0; + int **pfds = f->can_pass_fd ? &fds : NULL; + size_t *pnfd = f->can_pass_fd ? &nfd : NULL; assert(!qemu_file_is_writable(f)); @@ -325,10 +339,9 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f) } do { - len = qio_channel_read(f->ioc, - (char *)f->buf + pending, - IO_BUF_SIZE - pending, - &local_error); + struct iovec iov = { f->buf + pending, IO_BUF_SIZE - pending }; + len = qio_channel_readv_full(f->ioc, &iov, 1, pfds, pnfd, 0, + &local_error); if (len == QIO_CHANNEL_ERR_BLOCK) { if (qemu_in_coroutine()) { qio_channel_yield(f->ioc, G_IO_IN); @@ -348,9 +361,66 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f) qemu_file_set_error_obj(f, len, local_error); } + for (int i = 0; i < nfd; i++) { + FdEntry *fde = g_new0(FdEntry, 1); + fde->fd = fds[i]; + QTAILQ_INSERT_TAIL(&f->fds, fde, entry); + } + return len; } +int qemu_file_put_fd(QEMUFile *f, int fd) +{ + int ret = 0; + QIOChannel *ioc = qemu_file_get_ioc(f); + Error *err = NULL; + struct iovec iov = { (void *)" ", 1 }; + + /* + * Send a dummy byte so qemu_fill_buffer on the receiving side does not + * fail with a len=0 error. Flush first to maintain ordering wrt other + * data. + */ + + qemu_fflush(f); + if (qio_channel_writev_full(ioc, &iov, 1, &fd, 1, 0, &err) < 1) { + error_report_err(error_copy(err)); + qemu_file_set_error_obj(f, -EIO, err); + ret = -1; + } + trace_qemu_file_put_fd(f->ioc->name, fd, ret); + return ret; +} + +int qemu_file_get_fd(QEMUFile *f) +{ + int fd = -1; + FdEntry *fde; + + if (!f->can_pass_fd) { + Error *err = NULL; + error_setg(&err, "%s does not support fd passing", f->ioc->name); + error_report_err(error_copy(err)); + qemu_file_set_error_obj(f, -EIO, err); + goto out; + } + + /* Force the dummy byte and its fd passenger to appear. */ + qemu_peek_byte(f, 0); + + fde = QTAILQ_FIRST(&f->fds); + if (fde) { + qemu_get_byte(f); /* Drop the dummy byte */ + fd = fde->fd; + QTAILQ_REMOVE(&f->fds, fde, entry); + g_free(fde); + } +out: + trace_qemu_file_get_fd(f->ioc->name, fd); + return fd; +} + /** Closes the file * * Returns negative error value if any error happened on previous operations or @@ -361,11 +431,17 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f) */ int qemu_fclose(QEMUFile *f) { + FdEntry *fde, *next; int ret = qemu_fflush(f); int ret2 = qio_channel_close(f->ioc, NULL); if (ret >= 0) { ret = ret2; } + QTAILQ_FOREACH_SAFE(fde, &f->fds, entry, next) { + warn_report("qemu_fclose: received fd %d was never claimed", fde->fd); + close(fde->fd); + g_free(fde); + } g_clear_pointer(&f->ioc, object_unref); error_free(f->last_error_obj); g_free(f); diff --git a/migration/qemu-file.h b/migration/qemu-file.h index 11c2120edd..3e47a20621 100644 --- a/migration/qemu-file.h +++ b/migration/qemu-file.h @@ -79,5 +79,7 @@ size_t qemu_get_buffer_at(QEMUFile *f, const uint8_t *buf, size_t buflen, off_t pos); QIOChannel *qemu_file_get_ioc(QEMUFile *file); +int qemu_file_put_fd(QEMUFile *f, int fd); +int qemu_file_get_fd(QEMUFile *f); #endif diff --git a/migration/ram.c b/migration/ram.c index ce28328141..6f460fd22d 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -195,7 +195,9 @@ static bool postcopy_preempt_active(void) bool migrate_ram_is_ignored(RAMBlock *block) { + MigMode mode = migrate_mode(); return !qemu_ram_is_migratable(block) || + mode == MIG_MODE_CPR_TRANSFER || (migrate_ignore_shared() && qemu_ram_is_shared(block) && qemu_ram_is_named_file(block)); } @@ -446,13 +448,6 @@ void ram_transferred_add(uint64_t bytes) } } -struct MigrationOps { - int (*ram_save_target_page)(RAMState *rs, PageSearchStatus *pss); -}; -typedef struct MigrationOps MigrationOps; - -MigrationOps *migration_ops; - static int ram_save_host_page_urgent(PageSearchStatus *pss); /* NOTE: page is the PFN not real ram_addr_t. */ @@ -1958,53 +1953,34 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len, } /** - * ram_save_target_page_legacy: save one target page - * - * Returns the number of pages written + * ram_save_target_page: save one target page to the precopy thread + * OR to multifd workers. * * @rs: current RAM state * @pss: data about the page we want to send */ -static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss) +static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss) { ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS; int res; - if (control_save_page(pss, offset, &res)) { - return res; + if (!migrate_multifd() + || migrate_zero_page_detection() == ZERO_PAGE_DETECTION_LEGACY) { + if (save_zero_page(rs, pss, offset)) { + return 1; + } } - if (save_zero_page(rs, pss, offset)) { - return 1; + if (migrate_multifd()) { + RAMBlock *block = pss->block; + return ram_save_multifd_page(block, offset); } - return ram_save_page(rs, pss); -} - -/** - * ram_save_target_page_multifd: send one target page to multifd workers - * - * Returns 1 if the page was queued, -1 otherwise. - * - * @rs: current RAM state - * @pss: data about the page we want to send - */ -static int ram_save_target_page_multifd(RAMState *rs, PageSearchStatus *pss) -{ - RAMBlock *block = pss->block; - ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS; - - /* - * While using multifd live migration, we still need to handle zero - * page checking on the migration main thread. - */ - if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_LEGACY) { - if (save_zero_page(rs, pss, offset)) { - return 1; - } + if (control_save_page(pss, offset, &res)) { + return res; } - return ram_save_multifd_page(block, offset); + return ram_save_page(rs, pss); } /* Should be called before sending a host page */ @@ -2093,7 +2069,7 @@ static int ram_save_host_page_urgent(PageSearchStatus *pss) if (page_dirty) { /* Be strict to return code; it must be 1, or what else? */ - if (migration_ops->ram_save_target_page(rs, pss) != 1) { + if (ram_save_target_page(rs, pss) != 1) { error_report_once("%s: ram_save_target_page failed", __func__); ret = -1; goto out; @@ -2162,7 +2138,7 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss) if (preempt_active) { qemu_mutex_unlock(&rs->bitmap_mutex); } - tmppages = migration_ops->ram_save_target_page(rs, pss); + tmppages = ram_save_target_page(rs, pss); if (tmppages >= 0) { pages += tmppages; /* @@ -2360,8 +2336,6 @@ static void ram_save_cleanup(void *opaque) xbzrle_cleanup(); multifd_ram_save_cleanup(); ram_state_cleanup(rsp); - g_free(migration_ops); - migration_ops = NULL; } static void ram_state_reset(RAMState *rs) @@ -3027,13 +3001,8 @@ static int ram_save_setup(QEMUFile *f, void *opaque, Error **errp) return ret; } - migration_ops = g_malloc0(sizeof(MigrationOps)); - if (migrate_multifd()) { multifd_ram_save_setup(); - migration_ops->ram_save_target_page = ram_save_target_page_multifd; - } else { - migration_ops->ram_save_target_page = ram_save_target_page_legacy; } /* diff --git a/migration/savevm.c b/migration/savevm.c index c929da1ca5..bc375db282 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1231,8 +1231,7 @@ void qemu_savevm_non_migratable_list(strList **reasons) void qemu_savevm_state_header(QEMUFile *f) { MigrationState *s = migrate_get_current(); - - s->vmdesc = json_writer_new(false); + JSONWriter *vmdesc = s->vmdesc; trace_savevm_state_header(); qemu_put_be32(f, QEMU_VM_FILE_MAGIC); @@ -1241,16 +1240,21 @@ void qemu_savevm_state_header(QEMUFile *f) if (s->send_configuration) { qemu_put_byte(f, QEMU_VM_CONFIGURATION); - /* - * This starts the main json object and is paired with the - * json_writer_end_object in - * qemu_savevm_state_complete_precopy_non_iterable - */ - json_writer_start_object(s->vmdesc, NULL); + if (vmdesc) { + /* + * This starts the main json object and is paired with the + * json_writer_end_object in + * qemu_savevm_state_complete_precopy_non_iterable + */ + json_writer_start_object(vmdesc, NULL); + json_writer_start_object(vmdesc, "configuration"); + } + + vmstate_save_state(f, &vmstate_configuration, &savevm_state, vmdesc); - json_writer_start_object(s->vmdesc, "configuration"); - vmstate_save_state(f, &vmstate_configuration, &savevm_state, s->vmdesc); - json_writer_end_object(s->vmdesc); + if (vmdesc) { + json_writer_end_object(vmdesc); + } } } @@ -1296,16 +1300,19 @@ int qemu_savevm_state_setup(QEMUFile *f, Error **errp) { ERRP_GUARD(); MigrationState *ms = migrate_get_current(); + JSONWriter *vmdesc = ms->vmdesc; SaveStateEntry *se; int ret = 0; - json_writer_int64(ms->vmdesc, "page_size", qemu_target_page_size()); - json_writer_start_array(ms->vmdesc, "devices"); + if (vmdesc) { + json_writer_int64(vmdesc, "page_size", qemu_target_page_size()); + json_writer_start_array(vmdesc, "devices"); + } trace_savevm_state_setup(); QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { if (se->vmsd && se->vmsd->early_setup) { - ret = vmstate_save(f, se, ms->vmdesc, errp); + ret = vmstate_save(f, se, vmdesc, errp); if (ret) { migrate_set_error(ms, *errp); qemu_file_set_error(f, ret); @@ -1424,11 +1431,11 @@ int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy) return all_finished; } -static bool should_send_vmdesc(void) +bool should_send_vmdesc(void) { MachineState *machine = MACHINE(qdev_get_machine()); - bool in_postcopy = migration_in_postcopy(); - return !machine->suppress_vmdesc && !in_postcopy; + + return !machine->suppress_vmdesc; } /* @@ -1470,7 +1477,6 @@ void qemu_savevm_state_complete_postcopy(QEMUFile *f) qemu_fflush(f); } -static int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy) { int64_t start_ts_each, end_ts_each; @@ -1514,8 +1520,7 @@ int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy) } int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f, - bool in_postcopy, - bool inactivate_disks) + bool in_postcopy) { MigrationState *ms = migrate_get_current(); int64_t start_ts_each, end_ts_each; @@ -1525,6 +1530,9 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f, Error *local_err = NULL; int ret; + /* Making sure cpu states are synchronized before saving non-iterable */ + cpu_synchronize_all_states(); + QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { if (se->vmsd && se->vmsd->early_setup) { /* Already saved during qemu_savevm_state_setup(). */ @@ -1546,77 +1554,42 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f, end_ts_each - start_ts_each); } - if (inactivate_disks) { - /* - * Inactivate before sending QEMU_VM_EOF so that the - * bdrv_activate_all() on the other end won't fail. - */ - if (!migration_block_inactivate()) { - error_setg(&local_err, "%s: bdrv_inactivate_all() failed", - __func__); - migrate_set_error(ms, local_err); - error_report_err(local_err); - qemu_file_set_error(f, -EFAULT); - return ret; - } - } if (!in_postcopy) { /* Postcopy stream will still be going */ qemu_put_byte(f, QEMU_VM_EOF); - } - json_writer_end_array(vmdesc); - json_writer_end_object(vmdesc); - vmdesc_len = strlen(json_writer_get(vmdesc)); + if (vmdesc) { + json_writer_end_array(vmdesc); + json_writer_end_object(vmdesc); + vmdesc_len = strlen(json_writer_get(vmdesc)); - if (should_send_vmdesc()) { - qemu_put_byte(f, QEMU_VM_VMDESCRIPTION); - qemu_put_be32(f, vmdesc_len); - qemu_put_buffer(f, (uint8_t *)json_writer_get(vmdesc), vmdesc_len); + qemu_put_byte(f, QEMU_VM_VMDESCRIPTION); + qemu_put_be32(f, vmdesc_len); + qemu_put_buffer(f, (uint8_t *)json_writer_get(vmdesc), vmdesc_len); + } } - /* Free it now to detect any inconsistencies. */ - json_writer_free(vmdesc); - ms->vmdesc = NULL; - trace_vmstate_downtime_checkpoint("src-non-iterable-saved"); return 0; } -int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only, - bool inactivate_disks) +int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only) { int ret; - Error *local_err = NULL; - bool in_postcopy = migration_in_postcopy(); - if (precopy_notify(PRECOPY_NOTIFY_COMPLETE, &local_err)) { - error_report_err(local_err); + ret = qemu_savevm_state_complete_precopy_iterable(f, false); + if (ret) { + return ret; } - trace_savevm_state_complete_precopy(); - - cpu_synchronize_all_states(); - - if (!in_postcopy || iterable_only) { - ret = qemu_savevm_state_complete_precopy_iterable(f, in_postcopy); + if (!iterable_only) { + ret = qemu_savevm_state_complete_precopy_non_iterable(f, false); if (ret) { return ret; } } - if (iterable_only) { - goto flush; - } - - ret = qemu_savevm_state_complete_precopy_non_iterable(f, in_postcopy, - inactivate_disks); - if (ret) { - return ret; - } - -flush: return qemu_fflush(f); } @@ -1714,7 +1687,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) ret = qemu_file_get_error(f); if (ret == 0) { - qemu_savevm_state_complete_precopy(f, false, false); + qemu_savevm_state_complete_precopy(f, false); ret = qemu_file_get_error(f); } if (ret != 0) { @@ -1740,7 +1713,7 @@ cleanup: void qemu_savevm_live_state(QEMUFile *f) { /* save QEMU_VM_SECTION_END section */ - qemu_savevm_state_complete_precopy(f, true, false); + qemu_savevm_state_complete_precopy(f, true); qemu_put_byte(f, QEMU_VM_EOF); } @@ -2965,6 +2938,7 @@ int qemu_loadvm_state(QEMUFile *f) return ret; } + /* When reaching here, it must be precopy */ if (ret == 0) { ret = qemu_file_get_error(f); } diff --git a/migration/savevm.h b/migration/savevm.h index 9ec96a995c..7957460062 100644 --- a/migration/savevm.h +++ b/migration/savevm.h @@ -39,12 +39,12 @@ void qemu_savevm_state_header(QEMUFile *f); int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy); void qemu_savevm_state_cleanup(void); void qemu_savevm_state_complete_postcopy(QEMUFile *f); -int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only, - bool inactivate_disks); +int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only); void qemu_savevm_state_pending_exact(uint64_t *must_precopy, uint64_t *can_postcopy); void qemu_savevm_state_pending_estimate(uint64_t *must_precopy, uint64_t *can_postcopy); +int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy); void qemu_savevm_send_ping(QEMUFile *f, uint32_t value); void qemu_savevm_send_open_return_path(QEMUFile *f); int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len); @@ -68,6 +68,6 @@ int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis); int qemu_load_device_state(QEMUFile *f); int qemu_loadvm_approve_switchover(void); int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f, - bool in_postcopy, bool inactivate_disks); + bool in_postcopy); #endif diff --git a/migration/trace-events b/migration/trace-events index b82a1c5e40..12b262f8ee 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -44,7 +44,6 @@ savevm_state_resume_prepare(void) "" savevm_state_header(void) "" savevm_state_iterate(void) "" savevm_state_cleanup(void) "" -savevm_state_complete_precopy(void) "" vmstate_save(const char *idstr, const char *vmsd_name) "%s, %s" vmstate_load(const char *idstr, const char *vmsd_name) "%s, %s" vmstate_downtime_save(const char *type, const char *idstr, uint32_t instance_id, int64_t downtime) "type=%s idstr=%s instance_id=%d downtime=%"PRIi64 @@ -88,6 +87,8 @@ put_qlist_end(const char *field_name, const char *vmsd_name) "%s(%s)" # qemu-file.c qemu_file_fclose(void) "" +qemu_file_put_fd(const char *name, int fd, int ret) "ioc %s, fd %d -> status %d" +qemu_file_get_fd(const char *name, int fd) "ioc %s -> fd %d" # ram.c get_queued_page(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx" @@ -193,6 +194,7 @@ migrate_transferred(uint64_t transferred, uint64_t time_spent, uint64_t bandwidt process_incoming_migration_co_end(int ret, int ps) "ret=%d postcopy-state=%d" process_incoming_migration_co_postcopy_end_main(void) "" postcopy_preempt_enabled(bool value) "%d" +migration_precopy_complete(void) "" # migration-stats migration_transferred_bytes(uint64_t qemu_file, uint64_t multifd, uint64_t rdma) "qemu_file %" PRIu64 " multifd %" PRIu64 " RDMA %" PRIu64 @@ -342,6 +344,15 @@ colo_receive_message(const char *msg) "Receive '%s' message" # colo-failover.c colo_failover_set_state(const char *new_state) "new state %s" +# cpr.c +cpr_save_fd(const char *name, int id, int fd) "%s, id %d, fd %d" +cpr_delete_fd(const char *name, int id) "%s, id %d" +cpr_find_fd(const char *name, int id, int fd) "%s, id %d returns %d" +cpr_state_save(const char *mode) "%s mode" +cpr_state_load(const char *mode) "%s mode" +cpr_transfer_input(const char *path) "%s" +cpr_transfer_output(const char *path) "%s" + # block-dirty-bitmap.c send_bitmap_header_enter(void) "" send_bitmap_bits(uint32_t flags, uint64_t start_sector, uint32_t nr_sectors, uint64_t data_size) "flags: 0x%x, start_sector: %" PRIu64 ", nr_sectors: %" PRIu32 ", data_size: %" PRIu64 diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c index d70d573dbd..741a588b7e 100644 --- a/migration/vmstate-types.c +++ b/migration/vmstate-types.c @@ -15,6 +15,7 @@ #include "qemu-file.h" #include "migration.h" #include "migration/vmstate.h" +#include "migration/client-options.h" #include "qemu/error-report.h" #include "qemu/queue.h" #include "trace.h" @@ -314,6 +315,29 @@ const VMStateInfo vmstate_info_uint64 = { .put = put_uint64, }; +/* File descriptor communicated via SCM_RIGHTS */ + +static int get_fd(QEMUFile *f, void *pv, size_t size, + const VMStateField *field) +{ + int32_t *v = pv; + *v = qemu_file_get_fd(f); + return 0; +} + +static int put_fd(QEMUFile *f, void *pv, size_t size, + const VMStateField *field, JSONWriter *vmdesc) +{ + int32_t *v = pv; + return qemu_file_put_fd(f, *v); +} + +const VMStateInfo vmstate_info_fd = { + .name = "fd", + .get = get_fd, + .put = put_fd, +}; + static int get_nullptr(QEMUFile *f, void *pv, size_t size, const VMStateField *field) diff --git a/migration/vmstate.c b/migration/vmstate.c index 82bd005a83..047a52af89 100644 --- a/migration/vmstate.c +++ b/migration/vmstate.c @@ -459,6 +459,8 @@ int vmstate_save_state_v(QEMUFile *f, const VMStateDescription *vmsd, } /* + * This logic only matters when dumping VM Desc. + * * Due to the fake nullptr handling above, if there's mixed * null/non-null data, it doesn't make sense to emit a * compressed array representation spanning the entire array @@ -466,7 +468,7 @@ int vmstate_save_state_v(QEMUFile *f, const VMStateDescription *vmsd, * vs. nullptr). Search ahead for the next null/non-null element * and start a new compressed array if found. */ - if (field->flags & VMS_ARRAY_OF_POINTER && + if (vmdesc && (field->flags & VMS_ARRAY_OF_POINTER) && is_null != is_prev_null) { is_prev_null = is_null; @@ -504,7 +506,7 @@ int vmstate_save_state_v(QEMUFile *f, const VMStateDescription *vmsd, written_bytes); /* If we used a fake temp field.. free it now */ - if (inner_field != field) { + if (is_null) { g_clear_pointer((gpointer *)&inner_field, g_free); } |