diff options
| -rw-r--r-- | include/migration/cpr.h | 2 | ||||
| -rw-r--r-- | migration/cpr-exec.c | 95 | ||||
| -rw-r--r-- | migration/cpr.c | 23 | ||||
| -rw-r--r-- | migration/migration.c | 10 | ||||
| -rw-r--r-- | migration/ram.c | 1 | ||||
| -rw-r--r-- | migration/trace-events | 1 | ||||
| -rw-r--r-- | migration/vmstate-types.c | 8 | ||||
| -rw-r--r-- | qapi/migration.json | 25 | ||||
| -rw-r--r-- | system/vl.c | 4 |
9 files changed, 164 insertions, 5 deletions
diff --git a/include/migration/cpr.h b/include/migration/cpr.h index b84389ff04..a412d6663c 100644 --- a/include/migration/cpr.h +++ b/include/migration/cpr.h @@ -53,9 +53,11 @@ int cpr_get_fd_param(const char *name, const char *fdname, int index, QEMUFile *cpr_transfer_output(MigrationChannel *channel, Error **errp); QEMUFile *cpr_transfer_input(MigrationChannel *channel, Error **errp); +void cpr_exec_init(void); QEMUFile *cpr_exec_output(Error **errp); QEMUFile *cpr_exec_input(Error **errp); void cpr_exec_persist_state(QEMUFile *f); bool cpr_exec_has_state(void); void cpr_exec_unpersist_state(void); +void cpr_exec_unpreserve_fds(void); #endif diff --git a/migration/cpr-exec.c b/migration/cpr-exec.c index 81d84425e1..d57714bc5d 100644 --- a/migration/cpr-exec.c +++ b/migration/cpr-exec.c @@ -6,15 +6,21 @@ #include "qemu/osdep.h" #include "qemu/cutils.h" +#include "qemu/error-report.h" #include "qemu/memfd.h" #include "qapi/error.h" +#include "qapi/type-helpers.h" #include "io/channel-file.h" #include "io/channel-socket.h" +#include "block/block-global-state.h" +#include "qemu/main-loop.h" #include "migration/cpr.h" #include "migration/qemu-file.h" +#include "migration/migration.h" #include "migration/misc.h" #include "migration/vmstate.h" #include "system/runstate.h" +#include "trace.h" #define CPR_EXEC_STATE_NAME "QEMU_CPR_EXEC_STATE" @@ -97,3 +103,92 @@ QEMUFile *cpr_exec_input(Error **errp) lseek(mfd, 0, SEEK_SET); return qemu_file_new_fd_input(mfd, CPR_EXEC_STATE_NAME); } + +static bool preserve_fd(int fd) +{ + qemu_clear_cloexec(fd); + return true; +} + +static bool unpreserve_fd(int fd) +{ + qemu_set_cloexec(fd); + return true; +} + +static void cpr_exec_preserve_fds(void) +{ + cpr_walk_fd(preserve_fd); +} + +void cpr_exec_unpreserve_fds(void) +{ + cpr_walk_fd(unpreserve_fd); +} + +static void cpr_exec_cb(void *opaque) +{ + MigrationState *s = migrate_get_current(); + char **argv = strv_from_str_list(s->parameters.cpr_exec_command); + Error *err = NULL; + + /* + * Clear the close-on-exec flag for all preserved fd's. We cannot do so + * earlier because they should not persist across miscellaneous fork and + * exec calls that are performed during normal operation. + */ + cpr_exec_preserve_fds(); + + trace_cpr_exec(); + execvp(argv[0], argv); + + /* + * exec should only fail if argv[0] is bogus, or has a permissions problem, + * or the system is very short on resources. + */ + g_strfreev(argv); + cpr_exec_unpreserve_fds(); + + error_setg_errno(&err, errno, "execvp %s failed", argv[0]); + error_report_err(error_copy(err)); + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); + migrate_set_error(s, err); + + /* Note, we can go from state COMPLETED to FAILED */ + migration_call_notifiers(s, MIG_EVENT_PRECOPY_FAILED, NULL); + + err = NULL; + if (!migration_block_activate(&err)) { + /* error was already reported */ + error_free(err); + return; + } + + if (runstate_is_live(s->vm_old_state)) { + vm_start(); + } +} + +static int cpr_exec_notifier(NotifierWithReturn *notifier, MigrationEvent *e, + Error **errp) +{ + MigrationState *s = migrate_get_current(); + + if (e->type == MIG_EVENT_PRECOPY_DONE) { + QEMUBH *cpr_exec_bh = qemu_bh_new(cpr_exec_cb, NULL); + assert(s->state == MIGRATION_STATUS_COMPLETED); + qemu_bh_schedule(cpr_exec_bh); + qemu_notify_event(); + } else if (e->type == MIG_EVENT_PRECOPY_FAILED) { + cpr_exec_unpersist_state(); + } + return 0; +} + +void cpr_exec_init(void) +{ + static NotifierWithReturn exec_notifier; + + migration_add_notifier_mode(&exec_notifier, cpr_exec_notifier, + MIG_MODE_CPR_EXEC); +} diff --git a/migration/cpr.c b/migration/cpr.c index 6feda78f1b..22dbac7c72 100644 --- a/migration/cpr.c +++ b/migration/cpr.c @@ -6,6 +6,7 @@ */ #include "qemu/osdep.h" +#include "qemu/error-report.h" #include "qapi/error.h" #include "qemu/error-report.h" #include "hw/vfio/vfio-device.h" @@ -186,6 +187,8 @@ int cpr_state_save(MigrationChannel *channel, Error **errp) if (mode == MIG_MODE_CPR_TRANSFER) { g_assert(channel); f = cpr_transfer_output(channel, errp); + } else if (mode == MIG_MODE_CPR_EXEC) { + f = cpr_exec_output(errp); } else { return 0; } @@ -202,6 +205,10 @@ int cpr_state_save(MigrationChannel *channel, Error **errp) return ret; } + if (migrate_mode() == MIG_MODE_CPR_EXEC) { + cpr_exec_persist_state(f); + } + /* * Close the socket only partially so we can later detect when the other * end closes by getting a HUP event. @@ -220,7 +227,13 @@ int cpr_state_load(MigrationChannel *channel, Error **errp) QEMUFile *f; MigMode mode = 0; - if (channel) { + if (cpr_exec_has_state()) { + mode = MIG_MODE_CPR_EXEC; + f = cpr_exec_input(errp); + if (channel) { + warn_report("ignoring cpr channel for migration mode cpr-exec"); + } + } else if (channel) { mode = MIG_MODE_CPR_TRANSFER; cpr_set_incoming_mode(mode); f = cpr_transfer_input(channel, errp); @@ -232,6 +245,7 @@ int cpr_state_load(MigrationChannel *channel, Error **errp) } trace_cpr_state_load(MigMode_str(mode)); + cpr_set_incoming_mode(mode); v = qemu_get_be32(f); if (v != QEMU_CPR_FILE_MAGIC) { @@ -252,6 +266,11 @@ int cpr_state_load(MigrationChannel *channel, Error **errp) return ret; } + if (migrate_mode() == MIG_MODE_CPR_EXEC) { + /* Set cloexec to prevent fd leaks from fork until the next cpr-exec */ + cpr_exec_unpreserve_fds(); + } + /* * Let the caller decide when to close the socket (and generate a HUP event * for the sending side). @@ -272,7 +291,7 @@ void cpr_state_close(void) bool cpr_incoming_needed(void *opaque) { MigMode mode = migrate_mode(); - return mode == MIG_MODE_CPR_TRANSFER; + return mode == MIG_MODE_CPR_TRANSFER || mode == MIG_MODE_CPR_EXEC; } /* diff --git a/migration/migration.c b/migration/migration.c index a399735f02..a63b46bbef 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -333,6 +333,7 @@ void migration_object_init(void) ram_mig_init(); dirty_bitmap_mig_init(); + cpr_exec_init(); /* Initialize cpu throttle timers */ cpu_throttle_init(); @@ -1807,7 +1808,8 @@ bool migrate_mode_is_cpr(MigrationState *s) { MigMode mode = s->parameters.mode; return mode == MIG_MODE_CPR_REBOOT || - mode == MIG_MODE_CPR_TRANSFER; + mode == MIG_MODE_CPR_TRANSFER || + mode == MIG_MODE_CPR_EXEC; } int migrate_init(MigrationState *s, Error **errp) @@ -2156,6 +2158,12 @@ static bool migrate_prepare(MigrationState *s, bool resume, Error **errp) return false; } + if (migrate_mode() == MIG_MODE_CPR_EXEC && + !s->parameters.has_cpr_exec_command) { + error_setg(errp, "cpr-exec mode requires setting cpr-exec-command"); + return false; + } + if (migration_is_blocked(errp)) { return false; } diff --git a/migration/ram.c b/migration/ram.c index a8e8d2cc67..9aac89638a 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -228,6 +228,7 @@ bool migrate_ram_is_ignored(RAMBlock *block) MigMode mode = migrate_mode(); return !qemu_ram_is_migratable(block) || mode == MIG_MODE_CPR_TRANSFER || + mode == MIG_MODE_CPR_EXEC || (migrate_ignore_shared() && qemu_ram_is_shared(block) && qemu_ram_is_named_file(block)); } diff --git a/migration/trace-events b/migration/trace-events index 706db97def..e8edd1fbba 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -354,6 +354,7 @@ cpr_state_save(const char *mode) "%s mode" cpr_state_load(const char *mode) "%s mode" cpr_transfer_input(const char *path) "%s" cpr_transfer_output(const char *path) "%s" +cpr_exec(void) "" # block-dirty-bitmap.c send_bitmap_header_enter(void) "" diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c index a1cd7a95fa..4b01dc19c2 100644 --- a/migration/vmstate-types.c +++ b/migration/vmstate-types.c @@ -322,6 +322,10 @@ static int get_fd(QEMUFile *f, void *pv, size_t size, const VMStateField *field) { int32_t *v = pv; + if (migrate_mode() == MIG_MODE_CPR_EXEC) { + qemu_get_sbe32s(f, v); + return 0; + } *v = qemu_file_get_fd(f); return 0; } @@ -330,6 +334,10 @@ static int put_fd(QEMUFile *f, void *pv, size_t size, const VMStateField *field, JSONWriter *vmdesc) { int32_t *v = pv; + if (migrate_mode() == MIG_MODE_CPR_EXEC) { + qemu_put_sbe32s(f, v); + return 0; + } return qemu_file_put_fd(f, *v); } diff --git a/qapi/migration.json b/qapi/migration.json index 2be8fa1d16..be0f3fcc12 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -694,9 +694,32 @@ # until you issue the `migrate-incoming` command. # # (since 10.0) +# +# @cpr-exec: The migrate command stops the VM, saves state to the +# migration channel, directly exec's a new version of QEMU on the +# same host, replacing the original process while retaining its +# PID, and loads state from the channel. Guest RAM is preserved +# in place. Devices and their pinned pages are also preserved for +# VFIO and IOMMUFD. +# +# Old QEMU starts new QEMU by exec'ing the command specified by +# the @cpr-exec-command parameter. The command may be a direct +# invocation of new QEMU, or may be a wrapper that exec's the new +# QEMU binary. +# +# Because old QEMU terminates when new QEMU starts, one cannot +# stream data between the two, so the channel must be a type, +# such as a file, that accepts all data before old QEMU exits. +# Otherwise, old QEMU may quietly block writing to the channel. +# +# Memory-backend objects must have the share=on attribute, but +# memory-backend-epc is not supported. The VM must be started +# with the '-machine aux-ram-share=on' option. +# +# (since 10.2) ## { 'enum': 'MigMode', - 'data': [ 'normal', 'cpr-reboot', 'cpr-transfer' ] } + 'data': [ 'normal', 'cpr-reboot', 'cpr-transfer', 'cpr-exec' ] } ## # @ZeroPageDetection: diff --git a/system/vl.c b/system/vl.c index 00f3694725..646239e4a6 100644 --- a/system/vl.c +++ b/system/vl.c @@ -3837,6 +3837,8 @@ void qemu_init(int argc, char **argv) } qemu_init_displays(); accel_setup_post(current_machine); - os_setup_post(); + if (migrate_mode() != MIG_MODE_CPR_EXEC) { + os_setup_post(); + } resume_mux_open(); } |