diff options
Diffstat (limited to 'gitlab/issues/target_missing/host_missing/accel_missing/1052.toml')
| -rw-r--r-- | gitlab/issues/target_missing/host_missing/accel_missing/1052.toml | 87 |
1 files changed, 87 insertions, 0 deletions
diff --git a/gitlab/issues/target_missing/host_missing/accel_missing/1052.toml b/gitlab/issues/target_missing/host_missing/accel_missing/1052.toml new file mode 100644 index 00000000..71541f98 --- /dev/null +++ b/gitlab/issues/target_missing/host_missing/accel_missing/1052.toml @@ -0,0 +1,87 @@ +id = 1052 +title = "QEMU monitor hangs after \"stop\" QMP command called in postcopy-paused migration state" +state = "opened" +created_at = "2022-06-01T12:30:31.562Z" +closed_at = "n/a" +labels = ["Migration"] +url = "https://gitlab.com/qemu-project/qemu/-/issues/1052" +host-os = "Fedora 35" +host-arch = "x86_64" +qemu-version = "7.0.0" +guest-os = "RHEL 7.8" +guest-arch = "x86" +description = """QEMU monitor hangs when I try to pause virtual CPUs using "stop" QMP command +on the destination host once migration enters postcopy-paused (after it was +paused using "migrate-pause" QMP command on the source host). QEMU just does +not send any reply to the "stop" command.""" +reproduce = """1. start migration +2. wait for the first iteration to finish +3. switch to post-copy using "migrate-start-postcopy" +3. break migration with "migrate-pause" +4. send "stop" to the destination monitor""" +additional = """Unfortunately I haven't been able to get a stack trace as gdb just hangs when +I try to attach it to QEMU after step 4. I can see threads getting SIGUSR1 +after the "stop" command, but I cannot get to gdb prompt afterwards: + +``` +(gdb) c +Continuing. +[New Thread 0x7f41ec9be640 (LWP 1112)] +[New Thread 0x7f41d7fff640 (LWP 1113)] +Thread 4 "CPU 0/KVM" received signal SIGUSR1, User defined signal 1. +Thread 5 "CPU 1/KVM" received signal SIGUSR1, User defined signal 1. +Thread 4 "CPU 0/KVM" received signal SIGUSR1, User defined signal 1. +Thread 5 "CPU 1/KVM" received signal SIGUSR1, User defined signal 1. +Thread 4 "CPU 0/KVM" received signal SIGUSR1, User defined signal 1. +Thread 5 "CPU 1/KVM" received signal SIGUSR1, User defined signal 1. +Thread 4 "CPU 0/KVM" received signal SIGUSR1, User defined signal 1. +Thread 5 "CPU 1/KVM" received signal SIGUSR1, User defined signal 1. +Thread 4 "CPU 0/KVM" received signal SIGUSR1, User defined signal 1. +Thread 5 "CPU 1/KVM" received signal SIGUSR1, User defined signal 1. +Thread 4 "CPU 0/KVM" received signal SIGUSR1, User defined signal 1. +Thread 4 "CPU 0/KVM" received signal SIGUSR1, User defined signal 1. +``` + +I was able to attach strace to it though (in case it is at least a bit +useful). The first line corresponds to the final '}' of the +{"execute":"stop","id":"libvirt-413"} QMP comamnd: + +``` +[pid 72970] recvmsg(20, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="}", iov_len=1}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_CMSG_CLOEXEC}, MSG_CMSG_CLOEXEC) = 1 +[pid 72970] write(4, "\\1\\0\\0\\0\\0\\0\\0\\0", 8) = 8 +[pid 72949] <... ppoll resumed>) = 1 ([{fd=4, revents=POLLIN}], left {tv_sec=0, tv_nsec=513181335}) +[pid 72970] write(19, "\\1\\0\\0\\0\\0\\0\\0\\0", 8 <unfinished ...> +[pid 72949] read(4, <unfinished ...> +[pid 72970] <... write resumed>) = 8 +[pid 72949] <... read resumed>"\\1\\0\\0\\0\\0\\0\\0\\0", 512) = 8 +[pid 72970] write(19, "\\1\\0\\0\\0\\0\\0\\0\\0", 8 <unfinished ...> +[pid 72949] ppoll([{fd=4, events=POLLIN}, {fd=5, events=POLLIN}, {fd=8, events=POLLIN}, {fd=9, events=POLLIN}, {fd=11, events=POLLIN}, {fd=12, events=POLLIN}, {fd=23, events=POLLIN}, {fd=24, events=POLLIN}, {fd=29, events=POLLIN}, {fd=30, events=POLLIN}, {fd=31, events=POLLIN}, {fd=32, events=POLLIN}, {fd=33, events=POLLIN}, {fd=34, events=POLLIN}, {fd=38, events=POLLIN}, {fd=40, events=POLLIN}, {fd=41, events=POLLIN}, {fd=42, events=POLLIN}, {fd=43, events=POLLIN}, {fd=44, events=POLLIN}, {fd=45, events=POLLIN}, {fd=46, events=POLLIN}, {fd=47, events=POLLIN}, {fd=48, events=POLLIN}, {fd=49, events=POLLIN}, {fd=50, events=POLLIN}, {fd=51, events=POLLIN}, {fd=52, events=POLLIN}, {fd=53, events=POLLIN}, {fd=54, events=POLLIN}, {fd=55, events=POLLIN}, {fd=56, events=POLLIN}, ...], 74, {tv_sec=0, tv_nsec=0}, NULL, 8 <unfinished ...> +[pid 72970] <... write resumed>) = 8 +[pid 72949] <... ppoll resumed>) = 0 (Timeout) +[pid 72970] write(19, "\\1\\0\\0\\0\\0\\0\\0\\0", 8 <unfinished ...> +[pid 72949] write(8, "\\1\\0\\0\\0\\0\\0\\0\\0", 8 <unfinished ...> +[pid 72970] <... write resumed>) = 8 +[pid 72949] <... write resumed>) = 8 +[pid 72970] write(19, "\\1\\0\\0\\0\\0\\0\\0\\0", 8 <unfinished ...> +[pid 72949] ppoll([{fd=4, events=POLLIN}, {fd=5, events=POLLIN}, {fd=8, events=POLLIN}, {fd=9, events=POLLIN}, {fd=11, events=POLLIN}, {fd=12, events=POLLIN}, {fd=23, events=POLLIN}, {fd=24, events=POLLIN}, {fd=29, events=POLLIN}, {fd=30, events=POLLIN}, {fd=31, events=POLLIN}, {fd=32, events=POLLIN}, {fd=33, events=POLLIN}, {fd=34, events=POLLIN}, {fd=38, events=POLLIN}, {fd=40, events=POLLIN}, {fd=41, events=POLLIN}, {fd=42, events=POLLIN}, {fd=43, events=POLLIN}, {fd=44, events=POLLIN}, {fd=45, events=POLLIN}, {fd=46, events=POLLIN}, {fd=47, events=POLLIN}, {fd=48, events=POLLIN}, {fd=49, events=POLLIN}, {fd=50, events=POLLIN}, {fd=51, events=POLLIN}, {fd=52, events=POLLIN}, {fd=53, events=POLLIN}, {fd=54, events=POLLIN}, {fd=55, events=POLLIN}, {fd=56, events=POLLIN}, ...], 74, {tv_sec=0, tv_nsec=0}, NULL, 8 <unfinished ...> +[pid 72970] <... write resumed>) = 8 +[pid 72949] <... ppoll resumed>) = 1 ([{fd=8, revents=POLLIN}], left {tv_sec=0, tv_nsec=0}) +[pid 72970] poll([{fd=18, events=POLLIN}, {fd=19, events=POLLIN}, {fd=20, events=0}], 3, -1 <unfinished ...> +[pid 72949] rt_sigprocmask(SIG_BLOCK, ~[], <unfinished ...> +[pid 72970] <... poll resumed>) = 1 ([{fd=19, revents=POLLIN}]) +[pid 72949] <... rt_sigprocmask resumed>[BUS USR1 ALRM IO], 8) = 0 +[pid 72970] read(19, <unfinished ...> +[pid 72949] getpid() = 72949 +[pid 72970] <... read resumed>"\\5\\0\\0\\0\\0\\0\\0\\0", 16) = 8 +[pid 72949] tgkill(72949, 72971, SIGUSR1 <unfinished ...> +[pid 72970] poll([{fd=18, events=POLLIN}, {fd=19, events=POLLIN}, {fd=20, events=0}], 3, -1 <unfinished ...> +[pid 72949] <... tgkill resumed>) = 0 +[pid 72949] rt_sigprocmask(SIG_SETMASK, [BUS USR1 ALRM IO], NULL, 8) = 0 +[pid 72949] rt_sigprocmask(SIG_BLOCK, ~[], [BUS USR1 ALRM IO], 8) = 0 +[pid 72949] getpid() = 72949 +[pid 72949] tgkill(72949, 72972, SIGUSR1) = 0 +[pid 72949] rt_sigprocmask(SIG_SETMASK, [BUS USR1 ALRM IO], NULL, 8) = 0 +[pid 72949] futex(0x5606f6cb73a8, FUTEX_WAIT_BITSET_PRIVATE|FUTEX_CLOCK_REALTIME, 0, NULL, FUTEX_BITSET_MATCH_ANY +``` + +And that's it, the last futex never returns.""" |