other: 0.902 KVM: 0.856 graphic: 0.834 performance: 0.832 permissions: 0.825 semantic: 0.823 debug: 0.818 vnc: 0.814 PID: 0.794 network: 0.788 device: 0.784 files: 0.774 socket: 0.769 boot: 0.696 While committing snapshot qemu crashes with SIGABRT Information: OS: Slackware64-Current Compiled with: gcc version 5.3.0 (GCC) / glibc 2.23 Compiled using: CFLAGS="-O2 -fPIC" \ CXXFLAGS="-O2 -fPIC" \ LDFLAGS="-L/usr/lib64" \ ./configure \ --prefix=/usr \ --sysconfdir=/etc \ --localstatedir=/var \ --libdir=/usr/lib64 \ --enable-spice \ --enable-kvm \ --enable-glusterfs \ --enable-libiscsi \ --enable-libusb \ --target-list=x86_64-softmmu,i386-softmmu \ --enable-debug Source: qemu-2.5.1.tar.bz2 Running as: /usr/bin/qemu-system-x86_64 -name test1,debug-threads=on -S -machine pc-1.1,accel=kvm,usb=off -m 4096 -realtime mlock=off -smp 2,sockets=2,cores=1,threads=1 -uuid 4b30ec13-6609-4a56-8731-d400c38189ef -no-user-config -nodefaults -chardev socket,id=charmonitor,path=/var/lib/libvirt/qemu/domain-4-test1/monitor.sock,server,nowait -mon chardev=charmonitor,id=monitor,mode=control -rtc base=localtime,clock=vm,driftfix=slew -global kvm-pit.lost_tick_policy=discard -no-shutdown -boot strict=on -device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 -drive file=/datastore/vm/test1/test1.img,format=qcow2,if=none,id=drive-virtio-disk0 -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=2 -drive if=none,id=drive-ide0-1-0,readonly=on -device ide-cd,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0,bootindex=1 -netdev tap,fd=23,id=hostnet0,vhost=on,vhostfd=25 -device virtio-net pci,netdev=hostnet0,id=net0,mac=52:54:00:66:2e:0f,bus=pci.0,addr=0x3 -vnc 0.0.0.0:0 -device cirrus-vga,id=video0,bus=pci.0,addr=0x2 -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x5 -msg timestamp=on File system: zfs v0.6.5.6 While running: virsh blockcommit test1 vda --active --pivot --verbose VM running very heavy IO load GDB reporting: #0 0x00007fd80132c3f8 in raise () at /lib64/libc.so.6 #1 0x00007fd80132dffa in abort () at /lib64/libc.so.6 #2 0x00007fd801324c17 in __assert_fail_base () at /lib64/libc.so.6 #3 0x00007fd801324cc2 in () at /lib64/libc.so.6 #4 0x000055d9918d7572 in bdrv_replace_in_backing_chain (old=0x55d993ed9c10, new=0x55d9931ccc10) at block.c:2096 __PRETTY_FUNCTION__ = "bdrv_replace_in_backing_chain" #5 0x000055d991911869 in mirror_exit (job=0x55d993fef830, opaque=0x55d999bbefe0) at block/mirror.c:376 to_replace = 0x55d993ed9c10 s = 0x55d993fef830 data = 0x55d999bbefe0 replace_aio_context = src = 0x55d993ed9c10 #6 0x000055d9918da1dc in block_job_defer_to_main_loop_bh (opaque=0x55d9940ce850) at blockjob.c:481 data = 0x55d9940ce850 aio_context = 0x55d9931a2610 #7 0x000055d9918d014b in aio_bh_poll (ctx=ctx@entry=0x55d9931a2610) at async.c:92 bh = bhp = next = 0x55d99440f910 ret = 1 #8 0x000055d9918dc8c0 in aio_dispatch (ctx=0x55d9931a2610) at aio-posix.c:305 node = progress = false #9 0x000055d9918d000e in aio_ctx_dispatch (source=, callback=, user_data=) at async.c:231 ctx = #10 0x00007fd8037cf787 in g_main_context_dispatch () at /usr/lib64/libglib-2.0.so.0 #11 0x000055d9918db03b in main_loop_wait () at main-loop.c:211 context = 0x55d9931a3200 pfds = ret = 0 spin_counter = 1 ret = 0 timeout = 4294967295 timeout_ns = #12 0x000055d9918db03b in main_loop_wait (timeout=) at main-loop.c:256 ret = 0 spin_counter = 1 ret = 0 timeout = 4294967295 timeout_ns = #13 0x000055d9918db03b in main_loop_wait (nonblocking=) at main-loop.c:504 ret = 0 timeout = 4294967295 timeout_ns = #14 0x000055d991679cc4 in main () at vl.c:1923 nonblocking = last_io = 2 i = snapshot = linux_boot = initrd_filename = kernel_filename = kernel_cmdline = boot_order = boot_once = ds = cyls = heads = secs = translation = hda_opts = opts = machine_opts = icount_opts = olist = optind = 49 optarg = 0x7fffc6d27f43 "timestamp=on" loadvm = machine_class = 0x55d993194d10 cpu_model = vga_model = 0x0 qtest_chrdev = qtest_log = pid_file = incoming = defconfig = userconfig = false log_mask = log_file = trace_events = trace_file = maxram_size = ram_slots = vmstate_dump_file = main_loop_err = 0x0 err = 0x0 __func__ = "main" #15 0x000055d991679cc4 in main (argc=, argv=, envp=) at vl.c:4699 i = snapshot = linux_boot = initrd_filename = kernel_filename = kernel_cmdline = boot_order = boot_once = ds = cyls = heads = secs = translation = hda_opts = opts = machine_opts = icount_opts = olist = optind = 49 optarg = 0x7fffc6d27f43 "timestamp=on" loadvm = machine_class = 0x55d993194d10 cpu_model = vga_model = 0x0 qtest_chrdev = qtest_log = pid_file = incoming = defconfig = userconfig = false log_mask = log_file = trace_events = trace_file = maxram_size = ram_slots = vmstate_dump_file = main_loop_err = 0x0 err = 0x0 __func__ = "main" I can reproduce this at will, and can provide more information per a dev's request. On Wed, 04/13 23:18, Matthew Schumacher wrote: > I can reproduce this at will, and can provide more information per a > dev's request. Could you please try v2.6.0-rc1? Fam Sure, I did the same test and still got a SIGABRT, but the debug looks a little different: Backtrace: #0 0x00007f8f0d46a3f8 in raise () at /lib64/libc.so.6 #1 0x00007f8f0d46bffa in abort () at /lib64/libc.so.6 #2 0x00007f8f0d462c17 in __assert_fail_base () at /lib64/libc.so.6 #3 0x00007f8f0d462cc2 in () at /lib64/libc.so.6 #4 0x000055ff4ce33926 in mirror_run (s=0x55ff4fc00dd0) at block/mirror.c:335 next_sector = 31174784 next_chunk = 243553 nb_chunks = 29 end = 209715200 sectors_per_chunk = 128 source = 0x55ff4e1eb050 sector_num = 31171072 delay_ns = 0 delay_ns = 0 cnt = 157184 should_complete = s = 0x55ff4fc00dd0 data = bs = 0x55ff4e1eb050 sector_num = end = length = last_pause_ns = bdi = {cluster_size = 65536, vm_state_offset = 107374182400, is_dirty = false, unallocated_blocks_are_zero = true, can_write_zeroes_with_unmap = true, needs_compressed_writes = false} backing_filename = "\000\021" ret = n = 1048576 target_cluster_size = __PRETTY_FUNCTION__ = "mirror_run" #5 0x000055ff4ce33926 in mirror_run (opaque=0x55ff4fc00dd0) at block/mirror.c:613 delay_ns = 0 cnt = 157184 should_complete = s = 0x55ff4fc00dd0 data = bs = 0x55ff4e1eb050 sector_num = end = length = last_pause_ns = bdi = {cluster_size = 65536, vm_state_offset = 107374182400, is_dirty = false, unallocated_blocks_are_zero = true, can_write_zeroes_with_unmap = true, needs_compressed_writes = false} backing_filename = "\000\021" ret = n = 1048576 target_cluster_size = __PRETTY_FUNCTION__ = "mirror_run" #6 0x000055ff4ce9968a in coroutine_trampoline (i0=, i1=) at util/coroutine-ucontext.c:78 self = 0x55ff4f6c2c80 co = 0x55ff4f6c2c80 #7 0x00007f8f0d47f560 in __start_context () at /lib64/libc.so.6 #8 0x00007ffc759cb060 in () #9 0x0000000000000000 in () I get this in the log: qemu-system-x86_64: block/mirror.c:335: mirror_iteration: Assertion `hbitmap_next == next_sector' failed. The system was compiled like this: Install prefix /usr BIOS directory /usr/share/qemu binary directory /usr/bin library directory /usr/lib64 module directory /usr/lib64/qemu libexec directory /usr/libexec include directory /usr/include config directory /etc local state directory /var Manual directory /usr/share/man ELF interp prefix /usr/gnemul/qemu-%M Source path /tmp/qemu-2.6.0-rc1 C compiler cc Host C compiler cc C++ compiler c++ Objective-C compiler clang ARFLAGS rv CFLAGS -pthread -I/usr/include/glib-2.0 -I/usr/lib64/glib-2.0/include -g -O2 -fPIC QEMU_CFLAGS -I/usr/include/pixman-1 -I$(SRC_PATH)/dtc/libfdt -DHAS_LIBSSH2_SFTP_FSYNC -fPIE -DPIE -m64 -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -Wstrict-prototypes -Wredundant-decls -Wall -Wundef -Wwrite-strings -Wmissing-prototypes -fno-strict-aliasing -fno-common -Wendif-labels -Wmissing-include-dirs -Wempty-body -Wnested-externs -Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers -Wold-style-declaration -Wold-style-definition -Wtype-limits -fstack-protector-strong -I/usr/include/p11-kit-1 -I/usr/include/libpng16 -I/usr/include/spice-server -I/usr/include/cacard -I/usr/include/nss -I/usr/include/nspr -I/usr/include/glib-2.0 -I/usr/lib64/glib-2.0/include -I/usr/include/pixman-1 -I/usr/include/spice-1 -I/usr/include/cacard -I/usr/include/nss -I/usr/include/nspr -I/usr/include/glib-2.0 -I/usr/lib64/glib-2.0/include -I/usr/include/libusb-1.0 LDFLAGS -Wl,--warn-common -Wl,-z,relro -Wl,-z,now -pie -m64 -g -L/usr/lib64 make make install install python python -B smbd /usr/sbin/smbd module support no host CPU x86_64 host big endian no target list x86_64-softmmu i386-softmmu tcg debug enabled yes gprof enabled no sparse enabled no strip binaries no profiler no static build no pixman system SDL support yes GTK support yes GTK GL support no GNUTLS support yes GNUTLS hash yes GNUTLS rnd yes libgcrypt no libgcrypt kdf no nettle yes (3.2) nettle kdf yes libtasn1 yes VTE support yes curses support yes virgl support no curl support yes mingw32 support no Audio drivers oss Block whitelist (rw) Block whitelist (ro) VirtFS support yes VNC support yes VNC SASL support yes VNC JPEG support yes VNC PNG support yes xen support no brlapi support no bluez support no Documentation yes PIE yes vde support no netmap support no Linux AIO support yes ATTR/XATTR support yes Install blobs yes KVM support yes RDMA support no TCG interpreter no fdt support yes preadv support yes fdatasync yes madvise yes posix_madvise yes sigev_thread_id yes uuid support yes libcap-ng support yes vhost-net support yes vhost-scsi support yes Trace backends log spice support yes (0.12.10/0.12.6) rbd support no xfsctl support yes smartcard support yes libusb yes usb net redir no OpenGL support yes OpenGL dmabufs yes libiscsi support yes libnfs support no build guest agent yes QGA VSS support no QGA w32 disk info no QGA MSI support no seccomp support no coroutine backend ucontext coroutine pool yes GlusterFS support yes Archipelago support no gcov gcov gcov enabled no TPM support yes libssh2 support yes TPM passthrough yes QOM debugging yes vhdx yes lzo support yes snappy support no bzip2 support yes NUMA host support no tcmalloc support no jemalloc support no avx2 optimization yes I'm going to try and put the VM on an EXT4 partition and see if I can duplicate the issue. It might be related to ZFS. It still fails with ext4: #0 0x00007fbaa12b33f8 in raise () at /lib64/libc.so.6 #1 0x00007fbaa12b4ffa in abort () at /lib64/libc.so.6 #2 0x00007fbaa12abc17 in __assert_fail_base () at /lib64/libc.so.6 #3 0x00007fbaa12abcc2 in () at /lib64/libc.so.6 #4 0x00005646b990f926 in mirror_run (s=0x5646bc50f480) at block/mirror.c:335 next_sector = 36659200 next_chunk = 286400 nb_chunks = 80 end = 209715200 sectors_per_chunk = 128 source = 0x5646bcb70000 sector_num = 36648960 delay_ns = 0 delay_ns = 0 cnt = 15360 should_complete = s = 0x5646bc50f480 data = bs = 0x5646bcb70000 sector_num = end = length = last_pause_ns = bdi = {cluster_size = 65536, vm_state_offset = 107374182400, is_dirty = false, unallocated_blocks_are_zero = true, can_write_zeroes_with_unmap = true, needs_compressed_writes = false} backing_filename = "\000" ret = n = 1048576 target_cluster_size = __PRETTY_FUNCTION__ = "mirror_run" #5 0x00005646b990f926 in mirror_run (opaque=0x5646bc50f480) at block/mirror.c:613 delay_ns = 0 cnt = 15360 should_complete = s = 0x5646bc50f480 data = bs = 0x5646bcb70000 sector_num = end = length = last_pause_ns = bdi = {cluster_size = 65536, vm_state_offset = 107374182400, is_dirty = false, unallocated_blocks_are_zero = true, can_write_zeroes_with_unmap = true, needs_compressed_writes = false} backing_filename = "\000" ret = n = 1048576 target_cluster_size = __PRETTY_FUNCTION__ = "mirror_run" #6 0x00005646b997568a in coroutine_trampoline (i0=, i1=) at util/coroutine-ucontext.c:78 self = 0x5646bc5115b0 co = 0x5646bc5115b0 #7 0x00007fbaa12c8560 in __start_context () at /lib64/libc.so.6 #8 0x00005646bd2b98b0 in () #9 0x0000000000000000 in () qemu-system-x86_64: block/mirror.c:335: mirror_iteration: Assertion `hbitmap_next == next_sector' failed. I can't seem to get stable snapshotting and blockpull with a loaded VM. Interestingly enough, the last command libvirt passes to qemu is: 2016-04-14 20:47:58.196+0000: 18932: debug : qemuMonitorJSONCommandWithFd:294 : Send command '{"execute":"query-block-jobs","id":"libvirt-69"}' for write with FD -1 2016-04-14 20:47:58.196+0000: 18932: info : qemuMonitorSend:1005 : QEMU_MONITOR_SEND_MSG: mon=0x7f1874001a30 msg={"execute":"query-block-jobs","id":"libvirt-69"} 2016-04-14 20:47:58.197+0000: 18929: info : qemuMonitorIOWrite:529 : QEMU_MONITOR_IO_WRITE: mon=0x7f1874001a30 buf={"execute":"query-block-jobs","id":"libvirt-69"} Odd that it would SIGABRT on a smile query-block-jobs. Even more interesting is that it crashes on the first or second or third snapshot/block-commit cycle when using EXT4, but would sometimes go for 30-40 cycles on ZFS. Any ideas? I'm certainly willing to test and help in any way I can. Thanks! I just tested master, and it does the same as 2.6.0-rc.... The 2.6.0 branch crashes much faster than 2.5.x Hi Matthew, Thank you for your report! Could you try again with these two patches applied? Alternatively, you may fetch the resulting tree from https://github.com/XanClic/qemu.git, branch lp-1570134-pl (https://github.com/XanClic/qemu/archive/lp-1570134-pl.zip). Max And the second patch, because I'm either too stupid to make Launchpad attach two files to a single comment, or because Launchpad actually doesn't want me to for some reason. Thank you for working on this. Super helpful to have someone looking at this issue! With those two patches applied to 2.6.0-rc2 I still get the following: qemu-system-x86_64: block/mirror.c:342: mirror_iteration: Assertion `hbitmap_next == next_sector' failed. The line number confirms that qemu was patched before it was compiled. Here is the full backtrace: #0 0x00007f4e5aa213f8 in raise () at /lib64/libc.so.6 #1 0x00007f4e5aa22ffa in abort () at /lib64/libc.so.6 #2 0x00007f4e5aa19c17 in __assert_fail_base () at /lib64/libc.so.6 #3 0x00007f4e5aa19cc2 in () at /lib64/libc.so.6 #4 0x0000564d5afc1dab in mirror_run (s=0x564d5eb9c2d0) at block/mirror.c:342 hbitmap_next = next_sector = 29561984 next_chunk = 230953 nb_chunks = 4 end = 209715200 sectors_per_chunk = 128 source = 0x564d5d273b00 sector_num = 29561472 delay_ns = 0 delay_ns = 0 cnt = should_complete = s = 0x564d5eb9c2d0 data = bs = 0x564d5d273b00 sector_num = end = length = last_pause_ns = bdi = {cluster_size = 65536, vm_state_offset = 107374182400, is_dirty = false, unallocated_blocks_are_zero = true, can_write_zeroes_with_unmap = true, needs_compressed_writes = false} backing_filename = "\000\060" ret = n = 1048576 target_cluster_size = __PRETTY_FUNCTION__ = "mirror_run" #5 0x0000564d5afc1dab in mirror_run (opaque=0x564d5eb9c2d0) at block/mirror.c:619 delay_ns = 0 cnt = should_complete = s = 0x564d5eb9c2d0 data = bs = 0x564d5d273b00 sector_num = end = length = last_pause_ns = bdi = {cluster_size = 65536, vm_state_offset = 107374182400, is_dirty = false, unallocated_blocks_are_zero = true, can_write_zeroes_with_unmap = true, needs_compressed_writes = false} backing_filename = "\000\060" ret = n = 1048576 target_cluster_size = __PRETTY_FUNCTION__ = "mirror_run" #6 0x0000564d5b027e4a in coroutine_trampoline (i0=, i1=) at util/coroutine-ucontext.c:78 self = 0x564d5eacc520 co = 0x564d5eacc520 #7 0x00007f4e5aa36560 in __start_context () at /lib64/libc.so.6 #8 0x00007ffc151258c0 in () #9 0x0000000000000000 in () Hi Matthew, I now reproduced the issue myself, and it appears the second patch just missed one little thing. The attached patch (together with patch 1 from above) fixes the problem for me. (Also available from https://github.com/XanClic/qemu.git, branch lp-1570134-pl2; archive: https://github.com/XanClic/qemu/archive/lp-1570134-pl2.zip) While it was probably more or less noticed by chance (this is most likely a different issue than the one in 2.5.1), thank you for bringing this up. 2.6.0 is close to release, so it's good that this issue was still found. Max Max, Qemu still crashes for me, but the debug is again very different. When I attach to the qemu process from gdb, it is unable to provide a backtrace when it crashes. The log file is different too. Any ideas? qemu-system-x86_64: block.c:2307: bdrv_replace_in_backing_chain: Assertion `!bdrv_requests_pending(old)' failed. (gdb) attach 5563 Attaching to process 5563 Reading symbols from /usr/bin/qemu-system-x86_64...cdone. oReading symbols from /usr/lib64/libepoxy.so.0...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libdrm.so.2...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libgbm.so.1...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libX11.so.6...n(no debugging symbols found)...done. Reading symbols from /usr/lib64/libz.so.1...(no debugging symbols found)...done. Reading symbols from /lib64/libaio.so.1...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libiscsi.so.4...done. Reading symbols from /usr/lib64/libcurl.so.4...(no debugging symbols found)...done. Reading symbols from /lib64/libacl.so.1...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libgfapi.so.0...done. Reading symbols from /usr/lib64/libglusterfs.so.0...done. Reading symbols from /usr/lib64/libgfrpc.so.0...done. Reading symbols from /usr/lib64/libgfxdr.so.0...done. Reading symbols from /lib64/libuuid.so.1...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libssh2.so.1...done. Reading symbols from /lib64/libbz2.so.1...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libpixman-1.so.0...(no debugging symbols found)...done. Reading symbols from /lib64/libutil.so.1...(no debugging symbols found)...done. Reading symbols from /lib64/libncurses.so.5...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libpng16.so.16...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libjpeg.so.62...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libsasl2.so.3...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libSDL-1.2.so.0...(no debugging symbols found)...done. Reading symbols from /lib64/libpthread.so.0...(no debugging symbols found)...done. [New LWP 5588] [New LWP 5587] [New LWP 5586] [New LWP 5585] [New LWP 5584] [New LWP 5583] [New LWP 5582] [New LWP 5581] [New LWP 5580] [New LWP 5579] [New LWP 5578] [New LWP 5577] [New LWP 5576] [New LWP 5575] [New LWP 5574] [New LWP 5573] [New LWP 5572] [New LWP 5571] [New LWP 5570] [New LWP 5568] [New LWP 5567] [New LWP 5566] [New LWP 5564] [Thread debugging using libthread_db enabled] Using host libthread_db library "/lib64/libthread_db.so.1". Reading symbols from /usr/lib64/libvte.so.9...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libgtk-x11-2.0.so.0...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libgdk-x11-2.0.so.0...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libpangocairo-1.0.so.0...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libatk-1.0.so.0...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libgdk_pixbuf-2.0.so.0...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libpangoft2-1.0.so.0...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libpango-1.0.so.0...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libfontconfig.so.1...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libfreetype.so.6...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libgio-2.0.so.0...t(no debugging symbols found)...done. Reading symbols from /usr/lib64/libgobject-2.0.so.0...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libglib-2.0.so.0...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libcairo.so.2...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libXext.so.6...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libnettle.so.6...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libgnutls.so.30...(no debugging symbols found)...done. Reading symbols from /usr/lib64/liblzo2.so.2...done. Reading symbols from /usr/lib64/libspice-server.so.1...done. Reading symbols from /usr/lib64/libcacard.so.0...done. Reading symbols from /usr/lib64/libusb-1.0.so.0...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libgthread-2.0.so.0...(no debugging symbols found)...done. Reading symbols from /lib64/librt.so.1...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libstdc++.so.6...(no debugging symbols found)...done. Reading symbols from /lib64/libm.so.6...i(no debugging symbols found)...done. Reading symbols from /usr/lib64/libgcc_s.so.1...(no debugging symbols found)...done. Reading symbols from /lib64/libc.so.6...(no debugging symbols found)...done. Reading symbols from /lib64/ld-linux-x86-64.so.2...(no debugging symbols found)...done. Reading symbols from /lib64/libdl.so.2...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libexpat.so.1...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libxcb.so.1...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libXau.so.6...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libXdmcp.so.6...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libgcrypt.so.20...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libgpg-error.so.0...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libidn.so.11...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libssl.so.1...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libcrypto.so.1...(no debugging symbols found)...done. Reading symbols from /usr/lib64/liblber-2.4.so.2...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libldap-2.4.so.2...(no debugging symbols found)...done. Reading symbols from /lib64/libattr.so.1...(no debugging symbols found)...done. Reading symbols from /lib64/libresolv.so.2...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libXrandr.so.2...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libXrender.so.1...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libvga.so.1...done. Reading symbols from /usr/lib64/../lib64/libgmodule-2.0.so.0...(no debugging symbols found)...done. Reading symbols from /usr/lib64/../lib64/libffi.so.6...(no debugging symbols found)...done. Reading symbols from /usr/lib64/../lib64/libharfbuzz.so.0...(no debugging symbols found)...done. Reading symbols from /usr/lib64/../lib64/libEGL.so.1...(no debugging symbols found)...done. Reading symbols from /usr/lib64/../lib64/libxcb-shm.so.0...(no debugging symbols found)...done. Reading symbols from /usr/lib64/../lib64/libGL.so.1...n(no debugging symbols found)...done. Reading symbols from /usr/lib64/../lib64/libglapi.so.0...(no debugging symbols found)...done. Reading symbols from /usr/lib64/../lib64/libXdamage.so.1...(no debugging symbols found)...done. Reading symbols from /usr/lib64/../lib64/libXfixes.so.3...(no debugging symbols found)...done. Reading symbols from /usr/lib64/../lib64/libX11-xcb.so.1...(no debugging symbols found)...done. Reading symbols from /usr/lib64/../lib64/libxcb-glx.so.0...(no debugging symbols found)...done. Reading symbols from /usr/lib64/../lib64/libxcb-dri2.so.0...(no debugging symbols found)...done. Reading symbols from /usr/lib64/../lib64/libxcb-dri3.so.0...(no debugging symbols found)...done. Reading symbols from /usr/lib64/../lib64/libxcb-present.so.0...(no debugging symbols found)...done. Reading symbols from /usr/lib64/../lib64/libxcb-randr.so.0...(no debugging symbols found)...done. Reading symbols from /usr/lib64/../lib64/libxcb-xfixes.so.0...(no debugging symbols found)...done. Reading symbols from /usr/lib64/../lib64/libxcb-render.so.0...(no debugging symbols found)...done. Reading symbols from /usr/lib64/../lib64/libxcb-shape.so.0...(no debugging symbols found)...done. Reading symbols from /usr/lib64/../lib64/libxcb-sync.so.1...(no debugging symbols found)...done. Reading symbols from /usr/lib64/../lib64/libxshmfence.so.1...(no debugging symbols found)...done. Reading symbols from /usr/lib64/../lib64/libXxf86vm.so.1...(no debugging symbols found)...done. Reading symbols from /usr/lib64/../lib64/libXinerama.so.1...(no debugging symbols found)...done. Reading symbols from /usr/lib64/../lib64/libXi.so.6...(no debugging symbols found)...done. Reading symbols from /usr/lib64/../lib64/libXcursor.so.1...(no debugging symbols found)...done. Reading symbols from /usr/lib64/../lib64/libXcomposite.so.1...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libp11-kit.so.0...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libhogweed.so.4...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libgmp.so.10...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libnss3.so...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libsmime3.so...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libssl3.so...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libsoftokn3.so...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libnssutil3.so...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libplds4.so...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libplc4.so...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libnspr4.so...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libsqlite3.so.0...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libicui18n.so.56...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libicuuc.so.56...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libicudata.so.56...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libcelt051.so.0...done. Reading symbols from /usr/lib64/liblz4.so.1...(no debugging symbols found)...done. Reading symbols from /lib64/libudev.so.1...(no debugging symbols found)...done. Reading symbols from /usr/lib64/sasl2/libsasldb.so.3...(no debugging symbols found)...done. Reading symbols from /usr/lib64/libgdbm.so.4...(no debugging symbols found)...done. Reading symbols from /usr/lib64/sasl2/libotp.so.3...(no debugging symbols found)...done. Reading symbols from /usr/lib64/sasl2/libdigestmd5.so.3...(no debugging symbols found)...done. Reading symbols from /usr/lib64/sasl2/libcrammd5.so.3...(no debugging symbols found)...done. Reading symbols from /usr/lib64/sasl2/liblogin.so.3...(no debugging symbols found)...done. Reading symbols from /lib64/libcrypt.so.1...(no debugging symbols found)...done. Reading symbols from /usr/lib64/sasl2/libplain.so.3...(no debugging symbols found)...done. Reading symbols from /usr/lib64/sasl2/libscram.so.3...(no debugging symbols found)...done. 0x00007f12852f83d1 in ppoll () from /lib64/libc.so.6 (gdb) continue Continuing. [Thread 0x7f115b7fe700 (LWP 5576) exited] [Thread 0x7f127aa76700 (LWP 5566) exited] [Thread 0x7f1159ffb700 (LWP 5579) exited] [Thread 0x7f115affd700 (LWP 5577) exited] [Thread 0x7f116a0e2700 (LWP 5571) exited] [Thread 0x7f1158ff9700 (LWP 5581) exited] [Thread 0x7f11690e0700 (LWP 5573) exited] [Thread 0x7f11597fa700 (LWP 5580) exited] [Thread 0x7f115bfff700 (LWP 5575) exited] [Thread 0x7f11688df700 (LWP 5574) exited] [Thread 0x7f115a7fc700 (LWP 5578) exited] [Thread 0x7f11698e1700 (LWP 5572) exited] [New Thread 0x7f11698e1700 (LWP 5631)] [New Thread 0x7f115a7fc700 (LWP 5632)] [New Thread 0x7f11688df700 (LWP 5633)] [New Thread 0x7f115bfff700 (LWP 5634)] [New Thread 0x7f127aa76700 (LWP 5635)] [New Thread 0x7f116a0e2700 (LWP 5636)] [New Thread 0x7f11690e0700 (LWP 5637)] [New Thread 0x7f115b7fe700 (LWP 5638)] [New Thread 0x7f115affd700 (LWP 5639)] [New Thread 0x7f1159ffb700 (LWP 5640)] [New Thread 0x7f11597fa700 (LWP 5641)] [New Thread 0x7f1158ff9700 (LWP 5642)] [New Thread 0x7f1117fff700 (LWP 5643)] [New Thread 0x7f11177fe700 (LWP 5644)] [New Thread 0x7f1116ffd700 (LWP 5645)] [New Thread 0x7f11167fc700 (LWP 5646)] [New Thread 0x7f1115ffb700 (LWP 5647)] [New Thread 0x7f11157fa700 (LWP 5648)] [New Thread 0x7f1114ff9700 (LWP 5649)] [New Thread 0x7f11147f8700 (LWP 5650)] [New Thread 0x7f1113ff7700 (LWP 5651)] [New Thread 0x7f11137f6700 (LWP 5652)] [New Thread 0x7f1112ff5700 (LWP 5653)] Thread 1 "qemu-system-x86" received signal SIGABRT, Aborted. 0x00007f12852323f8 in raise () from /lib64/libc.so.6 (gdb) Continuing. Couldn't get registers: No such process. Couldn't get registers: No such process. Couldn't get registers: No such process. (gdb) Continuing. Couldn't get registers: No such process. (gdb) [Thread 0x7f1112ff5700 (LWP 5653) exited] [Thread 0x7f11137f6700 (LWP 5652) exited] [Thread 0x7f1113ff7700 (LWP 5651) exited] [Thread 0x7f11147f8700 (LWP 5650) exited] [Thread 0x7f1114ff9700 (LWP 5649) exited] [Thread 0x7f11157fa700 (LWP 5648) exited] [Thread 0x7f1115ffb700 (LWP 5647) exited] [Thread 0x7f1116ffd700 (LWP 5645) exited] [Thread 0x7f11177fe700 (LWP 5644) exited] [Thread 0x7f1117fff700 (LWP 5643) exited] [Thread 0x7f1158ff9700 (LWP 5642) exited] [Thread 0x7f11597fa700 (LWP 5641) exited] [Thread 0x7f1159ffb700 (LWP 5640) exited] [Thread 0x7f115affd700 (LWP 5639) exited] [Thread 0x7f115b7fe700 (LWP 5638) exited] [Thread 0x7f11690e0700 (LWP 5637) exited] [Thread 0x7f116a0e2700 (LWP 5636) exited] [Thread 0x7f127aa76700 (LWP 5635) exited] [Thread 0x7f115bfff700 (LWP 5634) exited] [Thread 0x7f11688df700 (LWP 5633) exited] [Thread 0x7f115a7fc700 (LWP 5632) exited] [Thread 0x7f11698e1700 (LWP 5631) exited] [Thread 0x7f1134ff9700 (LWP 5588) exited] [Thread 0x7f11357fa700 (LWP 5587) exited] [Thread 0x7f1135ffb700 (LWP 5586) exited] [Thread 0x7f11367fc700 (LWP 5585) exited] [Thread 0x7f1136ffd700 (LWP 5584) exited] [Thread 0x7f11377fe700 (LWP 5583) exited] [Thread 0x7f1137fff700 (LWP 5582) exited] [Thread 0x7f1272dff700 (LWP 5570) exited] [Thread 0x7f1278961700 (LWP 5568) exited] [Thread 0x7f1279162700 (LWP 5567) exited] [Thread 0x7f127b277700 (LWP 5564) exited] [Thread 0x7f128d35cb00 (LWP 5563) exited] Continuing. Cannot execute this command without a live selected thread. (gdb) Continuing. Cannot execute this command without a live selected thread. (gdb) Continuing. Cannot execute this command without a live selected thread. (gdb) On Wed, 04/20 22:03, Max Reitz wrote: > On 20.04.2016 20:09, Max Reitz wrote: > > On 20.04.2016 02:03, Matthew Schumacher wrote: > >> Max, > >> > >> Qemu still crashes for me, but the debug is again very different. When > >> I attach to the qemu process from gdb, it is unable to provide a > >> backtrace when it crashes. The log file is different too. Any ideas? > >> > >> qemu-system-x86_64: block.c:2307: bdrv_replace_in_backing_chain: > >> Assertion `!bdrv_requests_pending(old)' failed. > > > > This message is exactly the same as you saw in 2.5.1, so I guess we've > > at least averted a regression in 2.6.0. > > I get the same message in 2.5.0, in 2.4.0 it's "Co-routine re-entered > recursively". 2.3.0 works fine. > > Bisecting the regression between 2.3.0 and 2.4.0 interestingly yields > 48ac0a4df84662f as the problematic commit, but I can't imagine that this > is the root issue. The effective change it brings is that for active > commits, the buf_size is no longer the same as the granularity, but the > default mirror buf_size instead. > > When forcing buf_size to the granularity, the issue first appears with > commit 3f09bfbc7bee812 (after 2.4.0, before 2.5.0), which is much less > surprising, because this is the one that introduced the assertion in the > first place. > > However, I still don't think the assertion is the problem but the fact > that the guest device can still send requests after bdrv_drained_begin(). Thanks for debugging this. bdrv_drained_begin isn't effective because the guest notifier handler is not registered as "external": virtio_queue_set_host_notifier_fd_handler event_notifier_set_handler qemu_set_fd_handler aio_set_fd_handler(ctx, fd, is_external, /* false */ ...) is_external SHOULD be true here. On Thu, 04/21 08:34, Fam Zheng wrote: > On Wed, 04/20 22:03, Max Reitz wrote: > > On 20.04.2016 20:09, Max Reitz wrote: > > > On 20.04.2016 02:03, Matthew Schumacher wrote: > > >> Max, > > >> > > >> Qemu still crashes for me, but the debug is again very different. When > > >> I attach to the qemu process from gdb, it is unable to provide a > > >> backtrace when it crashes. The log file is different too. Any ideas? > > >> > > >> qemu-system-x86_64: block.c:2307: bdrv_replace_in_backing_chain: > > >> Assertion `!bdrv_requests_pending(old)' failed. > > > > > > This message is exactly the same as you saw in 2.5.1, so I guess we've > > > at least averted a regression in 2.6.0. > > > > I get the same message in 2.5.0, in 2.4.0 it's "Co-routine re-entered > > recursively". 2.3.0 works fine. > > > > Bisecting the regression between 2.3.0 and 2.4.0 interestingly yields > > 48ac0a4df84662f as the problematic commit, but I can't imagine that this > > is the root issue. The effective change it brings is that for active > > commits, the buf_size is no longer the same as the granularity, but the > > default mirror buf_size instead. > > > > When forcing buf_size to the granularity, the issue first appears with > > commit 3f09bfbc7bee812 (after 2.4.0, before 2.5.0), which is much less > > surprising, because this is the one that introduced the assertion in the > > first place. > > > > However, I still don't think the assertion is the problem but the fact > > that the guest device can still send requests after bdrv_drained_begin(). > > Thanks for debugging this. > > bdrv_drained_begin isn't effective because the guest notifier handler is not > registered as "external": > > virtio_queue_set_host_notifier_fd_handler > event_notifier_set_handler > qemu_set_fd_handler > aio_set_fd_handler(ctx, fd, > is_external, /* false */ > ...) > > > is_external SHOULD be true here. > This patch survives the reproducer I have on top of master (also submitted to qemu-devel for 2.6): --- diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index f745c4a..002c2c6 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -1829,10 +1829,11 @@ void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign, bool set_handler) { if (assign && set_handler) { - event_notifier_set_handler(&vq->host_notifier, - virtio_queue_host_notifier_read); + aio_set_event_notifier(qemu_get_aio_context(), &vq->host_notifier, + true, virtio_queue_host_notifier_read); } else { - event_notifier_set_handler(&vq->host_notifier, NULL); + aio_set_event_notifier(qemu_get_aio_context(), &vq->host_notifier, + true, NULL); } if (!assign) { /* Test and clear notifier before after disabling event, On 20 April 2016 at 19:09, Max Reitz