diff options
Diffstat (limited to 'results/classifier/118/none/1807052')
| -rw-r--r-- | results/classifier/118/none/1807052 | 392 |
1 files changed, 392 insertions, 0 deletions
diff --git a/results/classifier/118/none/1807052 b/results/classifier/118/none/1807052 new file mode 100644 index 000000000..557ae452a --- /dev/null +++ b/results/classifier/118/none/1807052 @@ -0,0 +1,392 @@ +hypervisor: 0.767 +peripherals: 0.713 +KVM: 0.687 +VMM: 0.643 +user-level: 0.609 +vnc: 0.588 +i386: 0.563 +ppc: 0.560 +mistranslation: 0.556 +virtual: 0.553 +permissions: 0.550 +register: 0.529 +risc-v: 0.506 +x86: 0.504 +debug: 0.490 +performance: 0.471 +arm: 0.466 +TCG: 0.465 +files: 0.461 +boot: 0.446 +graphic: 0.444 +network: 0.440 +architecture: 0.437 +PID: 0.428 +device: 0.427 +socket: 0.412 +assembly: 0.406 +kernel: 0.399 +semantic: 0.397 + +Qemu hangs during migration + +Source server: linux 4.19.5 qemu-3.0.0 from source, libvirt 4.9 +Dest server: linux 4.18.19 qemu-3.0.0 from source, libvirt 4.9 + +When this VM is running on source server: + +/usr/bin/qemu-system-x86_64 -name guest=testvm,debug-threads=on -S -object secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-13-testvm/master-key.aes -machine pc-q35-3.0,accel=kvm,usb=off,dump-guest-core=off -cpu Skylake-Server-IBRS,ss=on,hypervisor=on,tsc_adjust=on,clflushopt=on,umip=on,pku=on,ssbd=on,xsaves=on,topoext=on,hv_time,hv_relaxed,hv_vapic,hv_spinlocks=0x1fff,hv_vpindex,hv_runtime,hv_synic,hv_stimer,hv_reset,hv_vendor_id=KVM Hv -m 4096 -realtime mlock=off -smp 2,sockets=2,cores=1,threads=1 -object iothread,id=iothread1 -uuid 3b00b788-ee91-4e45-80a6-c7319da71225 -no-user-config -nodefaults -chardev socket,id=charmonitor,fd=23,server,nowait -mon chardev=charmonitor,id=monitor,mode=control -rtc base=localtime,driftfix=slew -global kvm-pit.lost_tick_policy=delay -no-hpet -no-shutdown -boot strict=on -device pcie-root-port,port=0x10,chassis=1,id=pci.1,bus=pcie.0,multifunction=on,addr=0x2 -device pcie-root-port,port=0x11,chassis=2,id=pci.2,bus=pcie.0,addr=0x2.0x1 -device pcie-pci-bridge,id=pci.3,bus=pci.1,addr=0x0 -device pcie-root-port,port=0x12,chassis=4,id=pci.4,bus=pcie.0,addr=0x2.0x2 -device pcie-root-port,port=0x13,chassis=5,id=pci.5,bus=pcie.0,addr=0x2.0x3 -device piix3-usb-uhci,id=usb,bus=pci.3,addr=0x1 -device virtio-scsi-pci,iothread=iothread1,id=scsi0,bus=pci.4,addr=0x0 -drive file=/dev/zvol/datastore/vm/testvm-vda,format=raw,if=none,id=drive-scsi0-0-0-0,cache=writeback,aio=threads -device scsi-hd,bus=scsi0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0-0-0-0,id=scsi0-0-0-0,bootindex=2,write-cache=on -drive if=none,id=drive-sata0-0-4,media=cdrom,readonly=on -device ide-cd,bus=ide.4,drive=drive-sata0-0-4,id=sata0-0-4,bootindex=1 -netdev tap,fd=25,id=hostnet0,vhost=on,vhostfd=26 -device virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:a2:b7:a1,bus=pci.2,addr=0x0 -device usb-tablet,id=input0,bus=usb.0,port=1 -vnc 127.0.0.1:0 -device cirrus-vga,id=video0,bus=pcie.0,addr=0x1 -s -sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny -msg timestamp=on + +I try to migrate it and the disks to the other side: + +virsh migrate --live --undefinesource --persistent --verbose --copy-storage-all testvm qemu+ssh://wasvirt1/system + +We get to 99% then hang with both sides in the pause state. + +Source server is stuck here: +(gdb) bt full +#0 0x00007f327994f3c1 in ppoll () at /lib64/libc.so.6 +#1 0x000000000086167b in qemu_poll_ns (fds=<optimized out>, nfds=nfds@entry=1, timeout=<optimized out>) at util/qemu-timer.c:322 +#2 0x0000000000863302 in aio_poll (ctx=0x21044e0, blocking=blocking@entry=true) at util/aio-posix.c:629 + node = <optimized out> + i = <optimized out> + ret = 0 + progress = <optimized out> + timeout = <optimized out> + start = <optimized out> + __PRETTY_FUNCTION__ = "aio_poll" +#3 0x00000000007e0d52 in nbd_client_close (bs=0x2ba2400) at block/nbd-client.c:62 + waited_ = <optimized out> + wait_ = 0x2ba563c + ctx_ = 0x2109bb0 + bs_ = 0x2ba2400 + client = 0x31287e0 + client = <optimized out> + request = {handle = 0, from = 0, len = 0, flags = 0, type = 2} +#4 0x00000000007e0d52 in nbd_client_close (bs=0x2ba2400) at block/nbd-client.c:965 + client = <optimized out> + request = {handle = 0, from = 0, len = 0, flags = 0, type = 2} +#5 0x00000000007de5ca in nbd_close (bs=<optimized out>) at block/nbd.c:491 + s = 0x31287e0 +#6 0x00000000007823d6 in bdrv_unref (bs=0x2ba2400) at block.c:3352 + ban = <optimized out> + ban_next = <optimized out> + child = <optimized out> + next = <optimized out> +#7 0x00000000007823d6 in bdrv_unref (bs=0x2ba2400) at block.c:3560 +#8 0x00000000007823d6 in bdrv_unref (bs=0x2ba2400) at block.c:4616 +#9 0x0000000000782403 in bdrv_unref (bs=0x2af96f0) at block.c:3359 + ban = <optimized out> + ban_next = <optimized out> + child = <optimized out> + next = <optimized out> +#10 0x0000000000782403 in bdrv_unref (bs=0x2af96f0) at block.c:3560 +#11 0x0000000000782403 in bdrv_unref (bs=0x2af96f0) at block.c:4616 +#12 0x0000000000785784 in block_job_remove_all_bdrv (job=job@entry=0x2f32570) at blockjob.c:200 + c = 0x23bac30 + l = 0x20dd330 = {0x23bac30, 0x2b89410} +#13 0x00000000007ceb5f in mirror_exit (job=0x2f32570, opaque=0x7f326407a350) at block/mirror.c:700 + s = 0x2f32570 + bjob = 0x2f32570 + data = 0x7f326407a350 + bs_opaque = 0x30d5600 + replace_aio_context = <optimized out> + src = 0x2131080 + target_bs = 0x2af96f0 + mirror_top_bs = 0x210eb70 + local_err = 0x0 +#14 0x0000000000786452 in job_defer_to_main_loop_bh (opaque=0x7f32640786a0) at job.c:973 + data = 0x7f32640786a0 + job = <optimized out> + aio_context = 0x2109bb0 +#15 0x000000000085fd3f in aio_bh_poll (ctx=ctx@entry=0x21044e0) at util/async.c:118 +---Type <return> to continue, or q <return> to quit--- + bh = <optimized out> + bhp = <optimized out> + next = 0x2ea86e0 + ret = 1 + deleted = false +#16 0x00000000008631b0 in aio_dispatch (ctx=0x21044e0) at util/aio-posix.c:436 +#17 0x000000000085fc1e in aio_ctx_dispatch (source=<optimized out>, callback=<optimized out>, user_data=<optimized out>) at util/async.c:261 + ctx = <optimized out> +#18 0x00007f327f17d797 in g_main_context_dispatch () at /usr/lib64/libglib-2.0.so.0 +#19 0x00000000008622ed in main_loop_wait () at util/main-loop.c:215 + context = 0x2104900 + pfds = <optimized out> + context = 0x2104900 + ret = 1 + ret = 1 + timeout = 4294967295 + timeout_ns = <optimized out> +#20 0x00000000008622ed in main_loop_wait (timeout=<optimized out>) at util/main-loop.c:238 + context = 0x2104900 + ret = 1 + ret = 1 + timeout = 4294967295 + timeout_ns = <optimized out> +#21 0x00000000008622ed in main_loop_wait (nonblocking=nonblocking@entry=0) at util/main-loop.c:497 + ret = 1 + timeout = 4294967295 + timeout_ns = <optimized out> +#22 0x0000000000595dee in main_loop () at vl.c:1866 +#23 0x000000000041f35d in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4644 + i = <optimized out> + snapshot = 0 + linux_boot = <optimized out> + initrd_filename = 0x0 + kernel_filename = <optimized out> + kernel_cmdline = <optimized out> + boot_order = 0x918f44 "cad" + boot_once = 0x0 + ds = <optimized out> + opts = <optimized out> + machine_opts = <optimized out> + icount_opts = <optimized out> + accel_opts = 0x0 + olist = <optimized out> + optind = 71 + optarg = 0x7ffdfc94df69 "timestamp=on" + loadvm = 0x0 + machine_class = 0x0 + cpu_model = 0x7ffdfc94d864 "Skylake-Server-IBRS,ss=on,hypervisor=on,tsc_adjust=on,clflushopt=on,umip=on,pku=on,ssbd=on,xsaves=on,topoext=on,hv_time,hv_relaxed,hv_vapic,hv_spinlocks=0x1fff,hv_vpindex,hv_runtime,hv_synic,hv_stimer"... + vga_model = 0x0 + qtest_chrdev = 0x0 + qtest_log = 0x0 + pid_file = <optimized out> + incoming = 0x0 + userconfig = <optimized out> +---Type <return> to continue, or q <return> to quit--- + nographic = false + display_remote = <optimized out> + log_mask = <optimized out> + log_file = <optimized out> + trace_file = <optimized out> + maxram_size = 4294967296 + ram_slots = 0 + vmstate_dump_file = 0x0 + main_loop_err = 0x0 + err = 0x0 + list_data_dirs = false + dir = <optimized out> + dirs = <optimized out> + bdo_queue = {sqh_first = 0x0, sqh_last = 0x7ffdfc94c170} + __func__ = "main" + +Strace shows: +ppoll([{fd=9, events=POLLIN|POLLERR|POLLHUP}], 1, NULL, NULL, 8 + +Which points to this: + +ls -al /proc/2286/fd/9 +lrwx------ 1 root users 64 Dec 5 13:04 /proc/2286/fd/9 -> anon_inode:[eventfd] + +The dest side is stuck here: + +(gdb) bt full +#0 0x00007f21f070d3c1 in ppoll () at /lib64/libc.so.6 +#1 0x0000000000861659 in qemu_poll_ns (fds=<optimized out>, nfds=<optimized out>, timeout=timeout@entry=2999926258) at util/qemu-timer.c:334 + ts = {tv_sec = 2, tv_nsec = 999926258} +Python Exception <class 'gdb.error'> That operation is not available on integers of more than 8 bytes.: +#2 0x00000000008622a4 in main_loop_wait (timeout=<optimized out>) at util/main-loop.c:233 + context = 0x2142900 + ret = <optimized out> + ret = -1295041038 + timeout = 4294967295 + timeout_ns = <optimized out> +#3 0x00000000008622a4 in main_loop_wait (nonblocking=nonblocking@entry=0) at util/main-loop.c:497 + ret = -1295041038 + timeout = 4294967295 + timeout_ns = <optimized out> +#4 0x0000000000595dee in main_loop () at vl.c:1866 +#5 0x000000000041f35d in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4644 + i = <optimized out> + snapshot = 0 + linux_boot = <optimized out> + initrd_filename = 0x0 + kernel_filename = <optimized out> + kernel_cmdline = <optimized out> + boot_order = 0x918f44 "cad" + boot_once = 0x0 + ds = <optimized out> + opts = <optimized out> + machine_opts = <optimized out> + icount_opts = <optimized out> + accel_opts = 0x0 + olist = <optimized out> + optind = 73 + optarg = 0x7ffdd6ee8f69 "timestamp=on" + loadvm = 0x0 + machine_class = 0x0 + cpu_model = 0x7ffdd6ee8854 "Skylake-Server-IBRS,ss=on,hypervisor=on,tsc_adjust=on,clflushopt=on,umip=on,pku=on,ssbd=on,xsaves=on,topoext=on,hv_time,hv_relaxed,hv_vapic,hv_spinlocks=0x1fff,hv_vpindex,hv_runtime,hv_synic,hv_stimer"... + vga_model = 0x0 + qtest_chrdev = 0x0 + qtest_log = 0x0 + pid_file = <optimized out> + incoming = 0x7ffdd6ee8f0a "defer" + userconfig = <optimized out> + nographic = false + display_remote = <optimized out> + log_mask = <optimized out> + log_file = <optimized out> + trace_file = <optimized out> + maxram_size = 4294967296 + ram_slots = 0 + vmstate_dump_file = 0x0 + main_loop_err = 0x0 + err = 0x0 + list_data_dirs = false + dir = <optimized out> + dirs = <optimized out> + bdo_queue = {sqh_first = 0x0, sqh_last = 0x7ffdd6ee6630} +---Type <return> to continue, or q <return> to quit--- + __func__ = "main" + +Strace show this over and over +ppoll([{fd=6, events=POLLIN}, {fd=7, events=POLLIN}, {fd=9, events=POLLIN}, {fd=10, events=POLLIN}, {fd=21, events=POLLIN}, {fd=22, events=POLLIN}, {fd=23, events=POLLIN}, {fd=24, events=POLLIN}, {fd=27, events=POLLIN}], 9, {0, 594527977}, NULL, 8) = 0 (Timeout) + +lrwx------ 1 root users 64 Dec 5 13:15 /proc/20170/fd/10 -> anon_inode:[eventfd] +lrwx------ 1 root users 64 Dec 5 13:15 /proc/20170/fd/21 -> socket:[42631161] +lrwx------ 1 root users 64 Dec 5 13:15 /proc/20170/fd/22 -> socket:[42631165] +lrwx------ 1 root users 64 Dec 5 13:15 /proc/20170/fd/23 -> socket:[42631167] +lrwx------ 1 root users 64 Dec 5 13:15 /proc/20170/fd/24 -> socket:[42631168] +lrwx------ 1 root users 64 Dec 5 13:15 /proc/20170/fd/27 -> socket:[42690422] +lrwx------ 1 root users 64 Dec 5 13:15 /proc/20170/fd/6 -> anon_inode:[eventfd] +lrwx------ 1 root users 64 Dec 5 13:15 /proc/20170/fd/7 -> anon_inode:[signalfd] +lrwx------ 1 root users 64 Dec 5 13:15 /proc/20170/fd/9 -> anon_inode:[eventfd] + +If I remote iothreads and writeback caching, it seems more reliable, but I can still get it to hang. + +This time the source server shows the VM as running, backtrace looks like: + +(gdb) bt full +#0 0x00007f27eab0028c in __lll_lock_wait () at /lib64/libpthread.so.0 +#1 0x00007f27eaaf9d35 in pthread_mutex_lock () at /lib64/libpthread.so.0 +#2 0x0000000000865419 in qemu_mutex_lock_impl (mutex=mutex@entry=0x115b8e0 <qemu_global_mutex>, file=file@entry=0x8fdf14 "/tmp/qemu-3.0.0/cpus.c", line=line@entry=1768) + at util/qemu-thread-posix.c:66 + err = <optimized out> + __PRETTY_FUNCTION__ = "qemu_mutex_lock_impl" + __func__ = "qemu_mutex_lock_impl" +#3 0x0000000000477578 in qemu_mutex_lock_iothread () at /tmp/qemu-3.0.0/cpus.c:1768 +#4 0x00000000008622b0 in main_loop_wait (timeout=<optimized out>) at util/main-loop.c:236 + context = 0x1e72810 + ret = 1 + ret = 1 + timeout = 4294967295 + timeout_ns = <optimized out> +#5 0x00000000008622b0 in main_loop_wait (nonblocking=nonblocking@entry=0) at util/main-loop.c:497 + ret = 1 + timeout = 4294967295 + timeout_ns = <optimized out> +#6 0x0000000000595dee in main_loop () at vl.c:1866 +#7 0x000000000041f35d in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4644 + i = <optimized out> + snapshot = 0 + linux_boot = <optimized out> + initrd_filename = 0x0 + kernel_filename = <optimized out> + kernel_cmdline = <optimized out> + boot_order = 0x918f44 "cad" + boot_once = 0x0 + ds = <optimized out> + opts = <optimized out> + machine_opts = <optimized out> + icount_opts = <optimized out> + accel_opts = 0x0 + olist = <optimized out> + optind = 71 + optarg = 0x7fff5edcff69 "timestamp=on" + loadvm = 0x0 + machine_class = 0x0 + cpu_model = 0x7fff5edcf88a "Skylake-Server-IBRS,ss=on,hypervisor=on,tsc_adjust=on,clflushopt=on,umip=on,pku=on,ssbd=on,xsaves=on,topoext=on,hv_time,hv_relaxed,hv_vapic,hv_spinlocks=0x1fff,hv_vpindex,hv_runtime,hv_synic,hv_stimer"... + vga_model = 0x0 + qtest_chrdev = 0x0 + qtest_log = 0x0 + pid_file = <optimized out> + incoming = 0x7fff5edcff0a "defer" + userconfig = <optimized out> + nographic = false + display_remote = <optimized out> + log_mask = <optimized out> + log_file = <optimized out> + trace_file = <optimized out> + maxram_size = 4294967296 + ram_slots = 0 + vmstate_dump_file = 0x0 + main_loop_err = 0x0 +---Type <return> to continue, or q <return> to quit--- + err = 0x0 + list_data_dirs = false + dir = <optimized out> + dirs = <optimized out> + bdo_queue = {sqh_first = 0x0, sqh_last = 0x7fff5edcd670} + __func__ = "main" + + +Dest server is paused, and looks like this: + +#0 0x00007f11c48bc3c1 in ppoll () at /lib64/libc.so.6 +#1 0x0000000000861659 in qemu_poll_ns (fds=<optimized out>, nfds=<optimized out>, timeout=timeout@entry=2999892383) at util/qemu-timer.c:334 + ts = {tv_sec = 2, tv_nsec = 999892383} +Python Exception <class 'gdb.error'> That operation is not available on integers of more than 8 bytes.: +#2 0x00000000008622a4 in main_loop_wait (timeout=<optimized out>) at util/main-loop.c:233 + context = 0x2342810 + ret = <optimized out> + ret = -1295074913 + timeout = 4294967295 + timeout_ns = <optimized out> +#3 0x00000000008622a4 in main_loop_wait (nonblocking=nonblocking@entry=0) at util/main-loop.c:497 + ret = -1295074913 + timeout = 4294967295 + timeout_ns = <optimized out> +#4 0x0000000000595dee in main_loop () at vl.c:1866 +#5 0x000000000041f35d in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4644 + i = <optimized out> + snapshot = 0 + linux_boot = <optimized out> + initrd_filename = 0x0 + kernel_filename = <optimized out> + kernel_cmdline = <optimized out> + boot_order = 0x918f44 "cad" + boot_once = 0x0 + ds = <optimized out> + opts = <optimized out> + machine_opts = <optimized out> + icount_opts = <optimized out> + accel_opts = 0x0 + olist = <optimized out> + optind = 71 + optarg = 0x7ffe6b899f69 "timestamp=on" + loadvm = 0x0 + machine_class = 0x0 + cpu_model = 0x7ffe6b89988a "Skylake-Server-IBRS,ss=on,hypervisor=on,tsc_adjust=on,clflushopt=on,umip=on,pku=on,ssbd=on,xsaves=on,topoext=on,hv_time,hv_relaxed,hv_vapic,hv_spinlocks=0x1fff,hv_vpindex,hv_runtime,hv_synic,hv_stimer"... + vga_model = 0x0 + qtest_chrdev = 0x0 + qtest_log = 0x0 + pid_file = <optimized out> + incoming = 0x7ffe6b899f0a "defer" + userconfig = <optimized out> + nographic = false + display_remote = <optimized out> + log_mask = <optimized out> + log_file = <optimized out> + trace_file = <optimized out> + maxram_size = 4294967296 + ram_slots = 0 + vmstate_dump_file = 0x0 + main_loop_err = 0x0 + err = 0x0 + list_data_dirs = false + dir = <optimized out> + dirs = <optimized out> + bdo_queue = {sqh_first = 0x0, sqh_last = 0x7ffe6b8988e0} +---Type <return> to continue, or q <return> to quit--- + __func__ = "main" + +Honestly looks pretty much like the same bug.... + + +The QEMU project is currently considering to move its bug tracking to another system. For this we need to know which bugs are still valid and which could be closed already. Thus we are setting older bugs to "Incomplete" now. +If you still think this bug report here is valid, then please switch the state back to "New" within the next 60 days, otherwise this report will be marked as "Expired". Or mark it as "Fix Released" if the problem has been solved with a newer version of QEMU already. Thank you and sorry for the inconvenience. + +[Expired for QEMU because there has been no activity for 60 days.] + |