summary refs log tree commit diff stats
path: root/gitlab/issues/target_missing/host_missing/accel_TCG/1565.toml
diff options
context:
space:
mode:
Diffstat (limited to 'gitlab/issues/target_missing/host_missing/accel_TCG/1565.toml')
-rw-r--r--gitlab/issues/target_missing/host_missing/accel_TCG/1565.toml44
1 files changed, 44 insertions, 0 deletions
diff --git a/gitlab/issues/target_missing/host_missing/accel_TCG/1565.toml b/gitlab/issues/target_missing/host_missing/accel_TCG/1565.toml
new file mode 100644
index 000000000..99f3a3ae8
--- /dev/null
+++ b/gitlab/issues/target_missing/host_missing/accel_TCG/1565.toml
@@ -0,0 +1,44 @@
+id = 1565
+title = "s390x TCG migration failure"
+state = "closed"
+created_at = "2023-03-28T22:18:08.770Z"
+closed_at = "2024-03-26T13:35:28.482Z"
+labels = ["Migration", "accel: TCG"]
+url = "https://gitlab.com/qemu-project/qemu/-/issues/1565"
+host-os = "Fedora 37"
+host-arch = "s390x"
+qemu-version = "v8.0.0-rc1"
+guest-os = "kvm-unit-test (s390x migration tests)"
+guest-arch = "s390x"
+description = """We're seeing failures running s390x migration kvm-unit-tests tests with TCG.
+
+Some initial findings:
+
+What seems to be happening is that after migration a control block header accessed by the test code is all zeros which causes an unexpected exception.
+
+I did a bisection which points to c8df4a7aef ("migration: Split save_live_pending() into state_pending_*") as the culprit.
+The migration issue persists after applying the fix e264705012 ("migration: I messed state_pending_exact/estimate") on top of c8df4a7aef.
+
+Applying
+
+```
+diff --git a/migration/ram.c b/migration/ram.c
+index 56ff9cd29d..2dc546cf28 100644
+--- a/migration/ram.c
++++ b/migration/ram.c
+@@ -3437,7 +3437,7 @@ static void ram_state_pending_exact(void *opaque, uint64_t max_size,
+ 
+     uint64_t remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
+ 
+-    if (!migration_in_postcopy()) {
++    if (!migration_in_postcopy() && remaining_size < max_size) {
+         qemu_mutex_lock_iothread();
+         WITH_RCU_READ_LOCK_GUARD() {
+             migration_bitmap_sync_precopy(rs);
+```
+on top fixes or hides the issue. (The comparison was removed by c8df4a7aef.)
+
+I arrived at this by experimentation, I haven't looked into why this makes a difference."""
+reproduce = """1. Run ACCEL=tcg ./run_tests.sh migration-skey-sequential with current QEMU master
+2. Repeat until the test fails (doesn't happen every time, but still easy to reproduce)"""
+additional = "n/a"