summary refs log tree commit diff stats
path: root/migration/savevm.c
diff options
context:
space:
mode:
authorStefan Hajnoczi <stefanha@redhat.com>2023-11-03 09:57:32 +0800
committerStefan Hajnoczi <stefanha@redhat.com>2023-11-03 09:57:32 +0800
commit75b7b25d44a64411ea0ae792d5ebad8ddf22527e (patch)
tree26530d73d594c13f116dd88135245586696aafb5 /migration/savevm.c
parent4a6a6cb60de80e9e90d9915abfec8d3d9fcc157c (diff)
parent8e3766eefbb4036cbc280c1f1a0d28537929f7fb (diff)
downloadfocaccia-qemu-75b7b25d44a64411ea0ae792d5ebad8ddf22527e.tar.gz
focaccia-qemu-75b7b25d44a64411ea0ae792d5ebad8ddf22527e.zip
Merge tag 'migration-20231102-pull-request' of https://gitlab.com/juan.quintela/qemu into staging
Migration Pull request (20231102)

Hi

In this pull request:

- migration reboot mode (steve)
  * I disabled the test because our CI don't like programs using so
    much shared memory.  Searching for a fix.
- test for postcopy recover (fabiano)
- MigrateAddress QAPI (het)
- better return path error handling (peter)
- traces for downtime (peter)
- vmstate_register() check for duplicates (juan)
  thomas find better solutions for s390x and ipmi.
  now also works on s390x

Please, apply.

# -----BEGIN PGP SIGNATURE-----
#
# iQIzBAABCAAdFiEEGJn/jt6/WMzuA0uC9IfvGFhy1yMFAmVDipMACgkQ9IfvGFhy
# 1yNYnQ/9E5Cywsoqljqa/9FiKBSII2qMrmkfu6JLKqePnsh5pFZiukbudYRuJCCe
# ZTDEmD0NmKRJbDx2xRU1qx/e6gKJy+gz37KP89Buuh/WwZHPboPYtxQpGvCSiH26
# J3i+1+TgaqmkLzcO35wa8tp6gneQclWeAwKgMvdb4cm2pJEhgWRKI62ccyLzxeve
# UCzFQn60t55ETyVZGnRD4YwdTQvGKH+DPlyTuJOLR3DePuvZd8EdH+ypvB4RLAy7
# 3+CuQOxmF5LRXPbpJuAeOsudbmhhHzrO/yL7ZmsiKQTthsJv+SzC1bO94jhQrawZ
# Q7GCii5KpGq0KnRTRKZRGk6XKwxcYRduXMX3R5tXuVmDmCZsjhXzziU8yEdftph8
# 5TJdk1o0Gb043EFu81mrsQYS+9yJqe6sy6m3PTJaec54cAty5ln+c17WOvpAOaSV
# +1phe05ftuVPmQ3KWhbIR/tCmavNLwEZxpVIfyaKJx04bFbtQ9gRpRyURORX4KXc
# s4WXvNirQEohxYBnP4TPvA09xBTW3V08pk/wRDwt0YDXnLiqCltOuxD8r05K8K4B
# MkCLcWj0g7he2tBkF60oz1KSIE0oTB81um9AzLIv5F2YSYLaJM5BIcoC437MR2f4
# MOR7drR1fP5GsRu/SeU5BWvhVq3IvdOxR7G2MLNRJJvl7ZtGXDc=
# =uaqL
# -----END PGP SIGNATURE-----
# gpg: Signature made Thu 02 Nov 2023 19:40:03 HKT
# gpg:                using RSA key 1899FF8EDEBF58CCEE034B82F487EF185872D723
# gpg: Good signature from "Juan Quintela <quintela@redhat.com>" [full]
# gpg:                 aka "Juan Quintela <quintela@trasno.org>" [full]
# Primary key fingerprint: 1899 FF8E DEBF 58CC EE03  4B82 F487 EF18 5872 D723

* tag 'migration-20231102-pull-request' of https://gitlab.com/juan.quintela/qemu: (40 commits)
  migration: modify test_multifd_tcp_none() to use new QAPI syntax.
  migration: Implement MigrateChannelList to hmp migration flow.
  migration: Implement MigrateChannelList to qmp migration flow.
  migration: modify migration_channels_and_uri_compatible() for new QAPI syntax
  migration: New migrate and migrate-incoming argument 'channels'
  migration: Convert the file backend to the new QAPI syntax
  migration: convert exec backend to accept MigrateAddress.
  migration: convert rdma backend to accept MigrateAddress
  migration: convert socket backend to accept MigrateAddress
  migration: convert migration 'uri' into 'MigrateAddress'
  migration: New QAPI type 'MigrateAddress'
  migration: Change ram_dirty_bitmap_reload() retval to bool
  tests/migration-test: Add a test for postcopy hangs during RECOVER
  migration: Allow network to fail even during recovery
  migration: Refactor error handling in source return path
  tests/qtest: migration: add reboot mode test
  cpr: reboot mode
  cpr: relax vhost migration blockers
  cpr: relax blockdev migration blockers
  migration: per-mode blockers
  ...

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'migration/savevm.c')
-rw-r--r--migration/savevm.c95
1 files changed, 86 insertions, 9 deletions
diff --git a/migration/savevm.c b/migration/savevm.c
index c7835e9c73..bc98c2ea6f 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -237,6 +237,8 @@ static SaveState savevm_state = {
     .global_section_id = 0,
 };
 
+static SaveStateEntry *find_se(const char *idstr, uint32_t instance_id);
+
 static bool should_validate_capability(int capability)
 {
     assert(capability >= 0 && capability < MIGRATION_CAPABILITY__MAX);
@@ -716,6 +718,18 @@ static void savevm_state_handler_insert(SaveStateEntry *nse)
 
     assert(priority <= MIG_PRI_MAX);
 
+    /*
+     * This should never happen otherwise migration will probably fail
+     * silently somewhere because we can be wrongly applying one
+     * object properties upon another one.  Bail out ASAP.
+     */
+    if (find_se(nse->idstr, nse->instance_id)) {
+        error_report("%s: Detected duplicate SaveStateEntry: "
+                     "id=%s, instance_id=0x%"PRIx32, __func__,
+                     nse->idstr, nse->instance_id);
+        exit(EXIT_FAILURE);
+    }
+
     for (i = priority - 1; i >= 0; i--) {
         se = savevm_state.handler_pri_head[i];
         if (se != NULL) {
@@ -846,6 +860,24 @@ static void vmstate_check(const VMStateDescription *vmsd)
     }
 }
 
+/*
+ * See comment in hw/intc/xics.c:icp_realize()
+ *
+ * This function can be removed when
+ * pre_2_10_vmstate_register_dummy_icp() is removed.
+ */
+int vmstate_replace_hack_for_ppc(VMStateIf *obj, int instance_id,
+                                 const VMStateDescription *vmsd,
+                                 void *opaque)
+{
+    SaveStateEntry *se = find_se(vmsd->name, instance_id);
+
+    if (se) {
+        savevm_state_handler_remove(se);
+    }
+    return vmstate_register(obj, instance_id, vmsd, opaque);
+}
+
 int vmstate_register_with_alias_id(VMStateIf *obj, uint32_t instance_id,
                                    const VMStateDescription *vmsd,
                                    void *opaque, int alias_id,
@@ -1459,6 +1491,7 @@ void qemu_savevm_state_complete_postcopy(QEMUFile *f)
 static
 int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
 {
+    int64_t start_ts_each, end_ts_each;
     SaveStateEntry *se;
     int ret;
 
@@ -1475,6 +1508,8 @@ int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
                 continue;
             }
         }
+
+        start_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
         trace_savevm_section_start(se->idstr, se->section_id);
 
         save_section_header(f, se, QEMU_VM_SECTION_END);
@@ -1486,8 +1521,13 @@ int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
             qemu_file_set_error(f, ret);
             return -1;
         }
+        end_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+        trace_vmstate_downtime_save("iterable", se->idstr, se->instance_id,
+                                    end_ts_each - start_ts_each);
     }
 
+    trace_vmstate_downtime_checkpoint("src-iterable-saved");
+
     return 0;
 }
 
@@ -1496,6 +1536,7 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
                                                     bool inactivate_disks)
 {
     MigrationState *ms = migrate_get_current();
+    int64_t start_ts_each, end_ts_each;
     JSONWriter *vmdesc = ms->vmdesc;
     int vmdesc_len;
     SaveStateEntry *se;
@@ -1507,11 +1548,17 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
             continue;
         }
 
+        start_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+
         ret = vmstate_save(f, se, vmdesc);
         if (ret) {
             qemu_file_set_error(f, ret);
             return ret;
         }
+
+        end_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+        trace_vmstate_downtime_save("non-iterable", se->idstr, se->instance_id,
+                                    end_ts_each - start_ts_each);
     }
 
     if (inactivate_disks) {
@@ -1547,6 +1594,8 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
     json_writer_free(vmdesc);
     ms->vmdesc = NULL;
 
+    trace_vmstate_downtime_checkpoint("src-non-iterable-saved");
+
     return 0;
 }
 
@@ -2088,18 +2137,18 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
     Error *local_err = NULL;
     MigrationIncomingState *mis = opaque;
 
-    trace_loadvm_postcopy_handle_run_bh("enter");
+    trace_vmstate_downtime_checkpoint("dst-postcopy-bh-enter");
 
     /* TODO we should move all of this lot into postcopy_ram.c or a shared code
      * in migration.c
      */
     cpu_synchronize_all_post_init();
 
-    trace_loadvm_postcopy_handle_run_bh("after cpu sync");
+    trace_vmstate_downtime_checkpoint("dst-postcopy-bh-cpu-synced");
 
     qemu_announce_self(&mis->announce_timer, migrate_announce_params());
 
-    trace_loadvm_postcopy_handle_run_bh("after announce");
+    trace_vmstate_downtime_checkpoint("dst-postcopy-bh-announced");
 
     /* Make sure all file formats throw away their mutable metadata.
      * If we get an error here, just don't restart the VM yet. */
@@ -2110,7 +2159,7 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
         autostart = false;
     }
 
-    trace_loadvm_postcopy_handle_run_bh("after invalidate cache");
+    trace_vmstate_downtime_checkpoint("dst-postcopy-bh-cache-invalidated");
 
     dirty_bitmap_mig_before_vm_start();
 
@@ -2124,7 +2173,7 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
 
     qemu_bh_delete(mis->bh);
 
-    trace_loadvm_postcopy_handle_run_bh("return");
+    trace_vmstate_downtime_checkpoint("dst-postcopy-bh-vm-started");
 }
 
 /* After all discards we can start running and asking for pages */
@@ -2505,9 +2554,12 @@ static bool check_section_footer(QEMUFile *f, SaveStateEntry *se)
 }
 
 static int
-qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis)
+qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis,
+                               uint8_t type)
 {
+    bool trace_downtime = (type == QEMU_VM_SECTION_FULL);
     uint32_t instance_id, version_id, section_id;
+    int64_t start_ts, end_ts;
     SaveStateEntry *se;
     char idstr[256];
     int ret;
@@ -2556,12 +2608,23 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis)
         return -EINVAL;
     }
 
+    if (trace_downtime) {
+        start_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+    }
+
     ret = vmstate_load(f, se);
     if (ret < 0) {
         error_report("error while loading state for instance 0x%"PRIx32" of"
                      " device '%s'", instance_id, idstr);
         return ret;
     }
+
+    if (trace_downtime) {
+        end_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+        trace_vmstate_downtime_load("non-iterable", se->idstr,
+                                    se->instance_id, end_ts - start_ts);
+    }
+
     if (!check_section_footer(f, se)) {
         return -EINVAL;
     }
@@ -2570,8 +2633,11 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis)
 }
 
 static int
-qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis)
+qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis,
+                             uint8_t type)
 {
+    bool trace_downtime = (type == QEMU_VM_SECTION_END);
+    int64_t start_ts, end_ts;
     uint32_t section_id;
     SaveStateEntry *se;
     int ret;
@@ -2596,12 +2662,23 @@ qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis)
         return -EINVAL;
     }
 
+    if (trace_downtime) {
+        start_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+    }
+
     ret = vmstate_load(f, se);
     if (ret < 0) {
         error_report("error while loading state section id %d(%s)",
                      section_id, se->idstr);
         return ret;
     }
+
+    if (trace_downtime) {
+        end_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+        trace_vmstate_downtime_load("iterable", se->idstr,
+                                    se->instance_id, end_ts - start_ts);
+    }
+
     if (!check_section_footer(f, se)) {
         return -EINVAL;
     }
@@ -2790,14 +2867,14 @@ retry:
         switch (section_type) {
         case QEMU_VM_SECTION_START:
         case QEMU_VM_SECTION_FULL:
-            ret = qemu_loadvm_section_start_full(f, mis);
+            ret = qemu_loadvm_section_start_full(f, mis, section_type);
             if (ret < 0) {
                 goto out;
             }
             break;
         case QEMU_VM_SECTION_PART:
         case QEMU_VM_SECTION_END:
-            ret = qemu_loadvm_section_part_end(f, mis);
+            ret = qemu_loadvm_section_part_end(f, mis, section_type);
             if (ret < 0) {
                 goto out;
             }