summary refs log tree commit diff stats
path: root/migration/migration.c
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2024-06-21 11:19:25 -0700
committerRichard Henderson <richard.henderson@linaro.org>2024-06-21 11:19:25 -0700
commitffeddb979400b1580ad28acbee09b6f971c3912d (patch)
treeb6e6752ff6c864edd312b9f6c15b05886861a1d0 /migration/migration.c
parent02d9c38236cf8c9826e5c5be61780c4444cb4ae0 (diff)
parent04b09de16d78cf2d163ca65d7c6d161bf2baceb6 (diff)
downloadfocaccia-qemu-ffeddb979400b1580ad28acbee09b6f971c3912d.tar.gz
focaccia-qemu-ffeddb979400b1580ad28acbee09b6f971c3912d.zip
Merge tag 'migration-20240621-pull-request' of https://gitlab.com/farosas/qemu into staging
Migration pull request

- Fabiano's fix for fdset + file migration truncating the migration
  file

- Fabiano's fdset + direct-io support for mapped-ram

- Peter's various cleanups (multifd sync, thread names, migration
  states, tests)

- Peter's new migration state postcopy-recover-setup

- Philippe's unused vmstate macro cleanup

# -----BEGIN PGP SIGNATURE-----
#
# iQJEBAABCAAuFiEEqhtIsKIjJqWkw2TPx5jcdBvsMZ0FAmZ1vIsQHGZhcm9zYXNA
# c3VzZS5kZQAKCRDHmNx0G+wxnVZTEACdFIsQ/PJw2C9eeLNor5B5MNSEqUjxX0KN
# 6s/uTkJ/dcv+2PI92SzRCZ1dpR5e9AyjTFYbLc9tPRBIROEhlUaoc84iyEy0jCFU
# eJ65/RQbH5QHRpOZwbN5RmGwnapfOWHGTn3bpdrmSQTOAy8R2TPGY4SVYR+gamTn
# bAv1cAsrOOBUfCi8aqvSlmvuliOW0lzJdF4XHa3mAaigLoF14JdwUZdyIMP1mLDp
# /fllbHCKCvJ1vprE9hQmptBR9PzveJZOZamIVt96djJr5+C869+9PMCn3a5vxqNW
# b+/LhOZjac37Ecg5kgbq+cO1E4EXKC3zWOmDTw8kHUwp9oYNi1upwLdpHbAAZaQD
# /JmHKsExx9QuV8mrVyGBXMI92E6RrT54b1Bjcuo63gAP8p9JRRxGT22U3LghNbTm
# 1XcGPR3rswjT1yTgE6qAqAIMR+7X5MrJVWop9ub/lF5DQ1VYIwmlKSNdwDHFDhRq
# 0F1k2+EksNpcZ0BH2+3iFml7qKHLVupLQKTWcLdrlnQnTfSG3+yW7eyA5Mte79Qp
# nJPcHt8qBqUVQ9Uf/4490TM4Lrp+T+m16exIi0tISLaDXSVkFJnlowipSm+tQ7U3
# Sm68JWdWWEsXZVaMqJeBE8nA/hCoQDpo4hVdwftStI+NayXbRX/EgvPqrNAvwh+c
# i4AdHdn6hQ==
# =ZX0p
# -----END PGP SIGNATURE-----
# gpg: Signature made Fri 21 Jun 2024 10:46:51 AM PDT
# gpg:                using RSA key AA1B48B0A22326A5A4C364CFC798DC741BEC319D
# gpg:                issuer "farosas@suse.de"
# gpg: Good signature from "Fabiano Rosas <farosas@suse.de>" [unknown]
# gpg:                 aka "Fabiano Almeida Rosas <fabiano.rosas@suse.com>" [unknown]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: AA1B 48B0 A223 26A5 A4C3  64CF C798 DC74 1BEC 319D

* tag 'migration-20240621-pull-request' of https://gitlab.com/farosas/qemu: (28 commits)
  migration: Remove unused VMSTATE_ARRAY_TEST() macro
  tests/migration-tests: Cover postcopy failure on reconnect
  tests/migration-tests: Verify postcopy-recover-setup status
  tests/migration-tests: migration_event_wait()
  tests/migration-tests: Always enable migration events
  tests/migration-tests: Drop most WIN32 ifdefs for postcopy failure tests
  migration/docs: Update postcopy recover session for SETUP phase
  migration/postcopy: Add postcopy-recover-setup phase
  migration: Cleanup incoming migration setup state change
  migration: Use MigrationStatus instead of int
  migration: Rename thread debug names
  migration/multifd: Avoid the final FLUSH in complete()
  tests/qtest/migration: Add a test for mapped-ram with passing of fds
  migration: Add documentation for fdset with multifd + file
  monitor: fdset: Match against O_DIRECT
  tests/qtest/migration: Add tests for file migration with direct-io
  migration/multifd: Add direct-io support
  migration: Add direct-io parameter
  io: Stop using qemu_open_old in channel-file
  monitor: Report errors from monitor_fdset_dup_fd_add
  ...

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'migration/migration.c')
-rw-r--r--migration/migration.c121
1 files changed, 93 insertions, 28 deletions
diff --git a/migration/migration.c b/migration/migration.c
index e1b269624c..3dea06d577 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -155,6 +155,16 @@ static bool migration_needs_seekable_channel(void)
     return migrate_mapped_ram();
 }
 
+static bool migration_needs_extra_fds(void)
+{
+    /*
+     * When doing direct-io, multifd requires two different,
+     * non-duplicated file descriptors so we can use one of them for
+     * unaligned IO.
+     */
+    return migrate_multifd() && migrate_direct_io();
+}
+
 static bool transport_supports_seeking(MigrationAddress *addr)
 {
     if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE) {
@@ -164,6 +174,12 @@ static bool transport_supports_seeking(MigrationAddress *addr)
     return false;
 }
 
+static bool transport_supports_extra_fds(MigrationAddress *addr)
+{
+    /* file: works because QEMU can open it multiple times */
+    return addr->transport == MIGRATION_ADDRESS_TYPE_FILE;
+}
+
 static bool
 migration_channels_and_transport_compatible(MigrationAddress *addr,
                                             Error **errp)
@@ -180,6 +196,13 @@ migration_channels_and_transport_compatible(MigrationAddress *addr,
         return false;
     }
 
+    if (migration_needs_extra_fds() &&
+        !transport_supports_extra_fds(addr)) {
+        error_setg(errp,
+                   "Migration requires a transport that allows for extra fds (e.g. file)");
+        return false;
+    }
+
     return true;
 }
 
@@ -390,7 +413,7 @@ void migration_incoming_state_destroy(void)
     yank_unregister_instance(MIGRATION_YANK_INSTANCE);
 }
 
-static void migrate_generate_event(int new_state)
+static void migrate_generate_event(MigrationStatus new_state)
 {
     if (migrate_events()) {
         qapi_event_send_migration(new_state);
@@ -595,6 +618,29 @@ bool migrate_uri_parse(const char *uri, MigrationChannel **channel,
     return true;
 }
 
+static bool
+migration_incoming_state_setup(MigrationIncomingState *mis, Error **errp)
+{
+    MigrationStatus current = mis->state;
+
+    if (current == MIGRATION_STATUS_POSTCOPY_PAUSED) {
+        /*
+         * Incoming postcopy migration will stay in PAUSED state even if
+         * reconnection happened.
+         */
+        return true;
+    }
+
+    if (current != MIGRATION_STATUS_NONE) {
+        error_setg(errp, "Illegal migration incoming state: %s",
+                   MigrationStatus_str(current));
+        return false;
+    }
+
+    migrate_set_state(&mis->state, current, MIGRATION_STATUS_SETUP);
+    return true;
+}
+
 static void qemu_start_incoming_migration(const char *uri, bool has_channels,
                                           MigrationChannelList *channels,
                                           Error **errp)
@@ -633,8 +679,9 @@ static void qemu_start_incoming_migration(const char *uri, bool has_channels,
         return;
     }
 
-    migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
-                      MIGRATION_STATUS_SETUP);
+    if (!migration_incoming_state_setup(mis, errp)) {
+        return;
+    }
 
     if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
         SocketAddress *saddr = &addr->u.socket;
@@ -1070,6 +1117,7 @@ bool migration_is_setup_or_active(void)
     case MIGRATION_STATUS_ACTIVE:
     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
     case MIGRATION_STATUS_POSTCOPY_PAUSED:
+    case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
     case MIGRATION_STATUS_POSTCOPY_RECOVER:
     case MIGRATION_STATUS_SETUP:
     case MIGRATION_STATUS_PRE_SWITCHOVER:
@@ -1092,6 +1140,7 @@ bool migration_is_running(void)
     case MIGRATION_STATUS_ACTIVE:
     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
     case MIGRATION_STATUS_POSTCOPY_PAUSED:
+    case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
     case MIGRATION_STATUS_POSTCOPY_RECOVER:
     case MIGRATION_STATUS_SETUP:
     case MIGRATION_STATUS_PRE_SWITCHOVER:
@@ -1229,6 +1278,7 @@ static void fill_source_migration_info(MigrationInfo *info)
     case MIGRATION_STATUS_PRE_SWITCHOVER:
     case MIGRATION_STATUS_DEVICE:
     case MIGRATION_STATUS_POSTCOPY_PAUSED:
+    case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
     case MIGRATION_STATUS_POSTCOPY_RECOVER:
         /* TODO add some postcopy stats */
         populate_time_info(info, s);
@@ -1273,8 +1323,6 @@ static void fill_destination_migration_info(MigrationInfo *info)
     }
 
     switch (mis->state) {
-    case MIGRATION_STATUS_NONE:
-        return;
     case MIGRATION_STATUS_SETUP:
     case MIGRATION_STATUS_CANCELLING:
     case MIGRATION_STATUS_CANCELLED:
@@ -1290,6 +1338,8 @@ static void fill_destination_migration_info(MigrationInfo *info)
         info->has_status = true;
         fill_destination_postcopy_migration_info(info);
         break;
+    default:
+        return;
     }
     info->status = mis->state;
 
@@ -1337,7 +1387,8 @@ void qmp_migrate_start_postcopy(Error **errp)
 
 /* shared migration helpers */
 
-void migrate_set_state(int *state, int old_state, int new_state)
+void migrate_set_state(MigrationStatus *state, MigrationStatus old_state,
+                       MigrationStatus new_state)
 {
     assert(new_state < MIGRATION_STATUS__MAX);
     if (qatomic_cmpxchg(state, old_state, new_state) == old_state) {
@@ -1434,9 +1485,30 @@ static void migrate_error_free(MigrationState *s)
 
 static void migrate_fd_error(MigrationState *s, const Error *error)
 {
+    MigrationStatus current = s->state;
+    MigrationStatus next;
+
     assert(s->to_dst_file == NULL);
-    migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
-                      MIGRATION_STATUS_FAILED);
+
+    switch (current) {
+    case MIGRATION_STATUS_SETUP:
+        next = MIGRATION_STATUS_FAILED;
+        break;
+    case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
+        /* Never fail a postcopy migration; switch back to PAUSED instead */
+        next = MIGRATION_STATUS_POSTCOPY_PAUSED;
+        break;
+    default:
+        /*
+         * This really shouldn't happen. Just be careful to not crash a VM
+         * just for this.  Instead, dump something.
+         */
+        error_report("%s: Illegal migration status (%s) detected",
+                     __func__, MigrationStatus_str(current));
+        return;
+    }
+
+    migrate_set_state(&s->state, current, next);
     migrate_set_error(s, error);
 }
 
@@ -1537,6 +1609,7 @@ bool migration_in_postcopy(void)
     switch (s->state) {
     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
     case MIGRATION_STATUS_POSTCOPY_PAUSED:
+    case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
     case MIGRATION_STATUS_POSTCOPY_RECOVER:
         return true;
     default:
@@ -1544,7 +1617,7 @@ bool migration_in_postcopy(void)
     }
 }
 
-bool migration_postcopy_is_alive(int state)
+bool migration_postcopy_is_alive(MigrationStatus state)
 {
     switch (state) {
     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
@@ -1589,20 +1662,9 @@ bool migration_is_idle(void)
     case MIGRATION_STATUS_COMPLETED:
     case MIGRATION_STATUS_FAILED:
         return true;
-    case MIGRATION_STATUS_SETUP:
-    case MIGRATION_STATUS_CANCELLING:
-    case MIGRATION_STATUS_ACTIVE:
-    case MIGRATION_STATUS_POSTCOPY_ACTIVE:
-    case MIGRATION_STATUS_COLO:
-    case MIGRATION_STATUS_PRE_SWITCHOVER:
-    case MIGRATION_STATUS_DEVICE:
-    case MIGRATION_STATUS_WAIT_UNPLUG:
+    default:
         return false;
-    case MIGRATION_STATUS__MAX:
-        g_assert_not_reached();
     }
-
-    return false;
 }
 
 bool migration_is_active(void)
@@ -1935,6 +1997,9 @@ static bool migrate_prepare(MigrationState *s, bool resume, Error **errp)
             return false;
         }
 
+        migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
+                          MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP);
+
         /* This is a resume, skip init status */
         return true;
     }
@@ -2408,7 +2473,7 @@ static int open_return_path_on_source(MigrationState *ms)
 
     trace_open_return_path_on_source();
 
-    qemu_thread_create(&ms->rp_state.rp_thread, "return path",
+    qemu_thread_create(&ms->rp_state.rp_thread, "mig/src/rp-thr",
                        source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
     ms->rp_state.rp_thread_created = true;
 
@@ -2967,9 +3032,9 @@ static MigThrError postcopy_pause(MigrationState *s)
          * We wait until things fixed up. Then someone will setup the
          * status back for us.
          */
-        while (s->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
+        do {
             qemu_sem_wait(&s->postcopy_pause_sem);
-        }
+        } while (postcopy_is_paused(s->state));
 
         if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
             /* Woken up by a recover procedure. Give it a shot */
@@ -3665,7 +3730,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
 {
     Error *local_err = NULL;
     uint64_t rate_limit;
-    bool resume = s->state == MIGRATION_STATUS_POSTCOPY_PAUSED;
+    bool resume = (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP);
     int ret;
 
     /*
@@ -3732,7 +3797,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
 
     if (resume) {
         /* Wakeup the main migration thread to do the recovery */
-        migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
+        migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP,
                           MIGRATION_STATUS_POSTCOPY_RECOVER);
         qemu_sem_post(&s->postcopy_pause_sem);
         return;
@@ -3747,10 +3812,10 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
     }
 
     if (migrate_background_snapshot()) {
-        qemu_thread_create(&s->thread, "bg_snapshot",
+        qemu_thread_create(&s->thread, "mig/snapshot",
                 bg_migration_thread, s, QEMU_THREAD_JOINABLE);
     } else {
-        qemu_thread_create(&s->thread, "live_migration",
+        qemu_thread_create(&s->thread, "mig/src/main",
                 migration_thread, s, QEMU_THREAD_JOINABLE);
     }
     s->migration_thread_running = true;