summary refs log tree commit diff stats
path: root/migration/savevm.c
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2020-07-28 14:38:17 +0100
committerPeter Maydell <peter.maydell@linaro.org>2020-07-28 14:38:17 +0100
commit264991512193ee50e27d43e66f832d5041cf3b28 (patch)
tree1576995ac48671469cb568d9c6ecae70fa3a8f6f /migration/savevm.c
parent1b242c3b1ec7c6011901b4f3b4b0876e31746afb (diff)
parent37931e006f05cb768b78dcc47453b13f76ea43c5 (diff)
downloadfocaccia-qemu-264991512193ee50e27d43e66f832d5041cf3b28.tar.gz
focaccia-qemu-264991512193ee50e27d43e66f832d5041cf3b28.zip
Merge remote-tracking branch 'remotes/ericb/tags/pull-bitmaps-2020-07-27' into staging
bitmaps patches for 2020-07-27

- Improve handling of various post-copy bitmap migration scenarios. A lost
bitmap should merely mean that the next backup must be full rather than
incremental, rather than abruptly breaking the entire guest migration.
- Associated iotest improvements

# gpg: Signature made Mon 27 Jul 2020 21:46:17 BST
# gpg:                using RSA key 71C2CC22B1C4602927D2F3AAA7A16B4A2527436A
# gpg: Good signature from "Eric Blake <eblake@redhat.com>" [full]
# gpg:                 aka "Eric Blake (Free Software Programmer) <ebb9@byu.net>" [full]
# gpg:                 aka "[jpeg image of size 6874]" [full]
# Primary key fingerprint: 71C2 CC22 B1C4 6029 27D2  F3AA A7A1 6B4A 2527 436A

* remotes/ericb/tags/pull-bitmaps-2020-07-27: (24 commits)
  migration: Fix typos in bitmap migration comments
  iotests: Adjust which migration tests are quick
  qemu-iotests/199: add source-killed case to bitmaps postcopy
  qemu-iotests/199: add early shutdown case to bitmaps postcopy
  qemu-iotests/199: check persistent bitmaps
  qemu-iotests/199: prepare for new test-cases addition
  migration/savevm: don't worry if bitmap migration postcopy failed
  migration/block-dirty-bitmap: cancel migration on shutdown
  migration/block-dirty-bitmap: relax error handling in incoming part
  migration/block-dirty-bitmap: keep bitmap state for all bitmaps
  migration/block-dirty-bitmap: simplify dirty_bitmap_load_complete
  migration/block-dirty-bitmap: rename finish_lock to just lock
  migration/block-dirty-bitmap: refactor state global variables
  migration/block-dirty-bitmap: move mutex init to dirty_bitmap_mig_init
  migration/block-dirty-bitmap: rename dirty_bitmap_mig_cleanup
  migration/block-dirty-bitmap: rename state structure types
  migration/block-dirty-bitmap: fix dirty_bitmap_mig_before_vm_start
  qemu-iotests/199: increase postcopy period
  qemu-iotests/199: change discard patterns
  qemu-iotests/199: improve performance: set bitmap by discard
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'migration/savevm.c')
-rw-r--r--migration/savevm.c37
1 files changed, 32 insertions, 5 deletions
diff --git a/migration/savevm.c b/migration/savevm.c
index 45c9dd9d8a..a843d202b5 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1813,6 +1813,9 @@ static void *postcopy_ram_listen_thread(void *opaque)
     MigrationIncomingState *mis = migration_incoming_get_current();
     QEMUFile *f = mis->from_src_file;
     int load_res;
+    MigrationState *migr = migrate_get_current();
+
+    object_ref(OBJECT(migr));
 
     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
                                    MIGRATION_STATUS_POSTCOPY_ACTIVE);
@@ -1839,11 +1842,24 @@ static void *postcopy_ram_listen_thread(void *opaque)
 
     trace_postcopy_ram_listen_thread_exit();
     if (load_res < 0) {
-        error_report("%s: loadvm failed: %d", __func__, load_res);
         qemu_file_set_error(f, load_res);
-        migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
-                                       MIGRATION_STATUS_FAILED);
-    } else {
+        dirty_bitmap_mig_cancel_incoming();
+        if (postcopy_state_get() == POSTCOPY_INCOMING_RUNNING &&
+            !migrate_postcopy_ram() && migrate_dirty_bitmaps())
+        {
+            error_report("%s: loadvm failed during postcopy: %d. All states "
+                         "are migrated except dirty bitmaps. Some dirty "
+                         "bitmaps may be lost, and present migrated dirty "
+                         "bitmaps are correctly migrated and valid.",
+                         __func__, load_res);
+            load_res = 0; /* prevent further exit() */
+        } else {
+            error_report("%s: loadvm failed: %d", __func__, load_res);
+            migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
+                                           MIGRATION_STATUS_FAILED);
+        }
+    }
+    if (load_res >= 0) {
         /*
          * This looks good, but it's possible that the device loading in the
          * main thread hasn't finished yet, and so we might not be in 'RUN'
@@ -1879,6 +1895,8 @@ static void *postcopy_ram_listen_thread(void *opaque)
     mis->have_listen_thread = false;
     postcopy_state_set(POSTCOPY_INCOMING_END);
 
+    object_unref(OBJECT(migr));
+
     return NULL;
 }
 
@@ -2437,6 +2455,8 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis)
 {
     trace_postcopy_pause_incoming();
 
+    assert(migrate_postcopy_ram());
+
     /* Clear the triggered bit to allow one recovery */
     mis->postcopy_recover_triggered = false;
 
@@ -2521,15 +2541,22 @@ out:
     if (ret < 0) {
         qemu_file_set_error(f, ret);
 
+        /* Cancel bitmaps incoming regardless of recovery */
+        dirty_bitmap_mig_cancel_incoming();
+
         /*
          * If we are during an active postcopy, then we pause instead
          * of bail out to at least keep the VM's dirty data.  Note
          * that POSTCOPY_INCOMING_LISTENING stage is still not enough,
          * during which we're still receiving device states and we
          * still haven't yet started the VM on destination.
+         *
+         * Only RAM postcopy supports recovery. Still, if RAM postcopy is
+         * enabled, canceled bitmaps postcopy will not affect RAM postcopy
+         * recovering.
          */
         if (postcopy_state_get() == POSTCOPY_INCOMING_RUNNING &&
-            postcopy_pause_incoming(mis)) {
+            migrate_postcopy_ram() && postcopy_pause_incoming(mis)) {
             /* Reset f to point to the newly created channel */
             f = mis->from_src_file;
             goto retry;