From dee4dcba78baf712cab403d47d9db319ab7f95d6 Mon Sep 17 00:00:00 2001 From: Christian Krinitsin Date: Thu, 3 Jul 2025 19:39:53 +0200 Subject: restructure results --- .../classifier/zero-shot/014/peripherals/56937788 | 371 +++++ .../classifier/zero-shot/014/peripherals/57756589 | 1448 ++++++++++++++++++++ .../classifier/zero-shot/014/peripherals/60339453 | 88 ++ 3 files changed, 1907 insertions(+) create mode 100644 results/classifier/zero-shot/014/peripherals/56937788 create mode 100644 results/classifier/zero-shot/014/peripherals/57756589 create mode 100644 results/classifier/zero-shot/014/peripherals/60339453 (limited to 'results/classifier/zero-shot/014/peripherals') diff --git a/results/classifier/zero-shot/014/peripherals/56937788 b/results/classifier/zero-shot/014/peripherals/56937788 new file mode 100644 index 00000000..82617706 --- /dev/null +++ b/results/classifier/zero-shot/014/peripherals/56937788 @@ -0,0 +1,371 @@ +peripherals: 0.807 +user-level: 0.794 +risc-v: 0.773 +hypervisor: 0.765 +TCG: 0.760 +KVM: 0.755 +vnc: 0.743 +mistranslation: 0.735 +VMM: 0.731 +virtual: 0.730 +ppc: 0.728 +debug: 0.723 +graphic: 0.720 +operating system: 0.713 +register: 0.706 +semantic: 0.705 +device: 0.697 +i386: 0.694 +x86: 0.693 +performance: 0.692 +permissions: 0.685 +files: 0.680 +arm: 0.665 +assembly: 0.638 +boot: 0.636 +network: 0.633 +alpha: 0.631 +architecture: 0.627 +PID: 0.620 +socket: 0.613 +kernel: 0.594 + +[Qemu-devel] [Bug] virtio-blk: qemu will crash if hotplug virtio-blk device failed + +I found that hotplug virtio-blk device will lead to qemu crash. + +Re-production steps: + +1. Run VM named vm001 + +2. Create a virtio-blk.xml which contains wrong configurations: + + + + + + +3. Run command : virsh attach-device vm001 vm001 + +Libvirt will return err msg: + +error: Failed to attach device from blk-scsi.xml + +error: internal error: unable to execute QEMU command 'device_add': Please set +scsi=off for virtio-blk devices in order to use virtio 1.0 + +it means hotplug virtio-blk device failed. + +4. Suspend or shutdown VM will leads to qemu crash + + + +from gdb: + + +(gdb) bt +#0 object_get_class (address@hidden) at qom/object.c:750 +#1 0x00007f9a72582e01 in virtio_vmstate_change (opaque=0x7f9a73d10960, +running=0, state=) at +/mnt/sdb/lzc/code/open/qemu/hw/virtio/virtio.c:2203 +#2 0x00007f9a7261ef52 in vm_state_notify (address@hidden, address@hidden) at +vl.c:1685 +#3 0x00007f9a7252603a in do_vm_stop (state=RUN_STATE_PAUSED) at +/mnt/sdb/lzc/code/open/qemu/cpus.c:941 +#4 vm_stop (address@hidden) at /mnt/sdb/lzc/code/open/qemu/cpus.c:1807 +#5 0x00007f9a7262eb1b in qmp_stop (address@hidden) at qmp.c:102 +#6 0x00007f9a7262c70a in qmp_marshal_stop (args=, +ret=, errp=0x7ffe63e255d8) at qmp-marshal.c:5854 +#7 0x00007f9a72897e79 in do_qmp_dispatch (errp=0x7ffe63e255d0, +request=0x7f9a76510120, cmds=0x7f9a72ee7980 ) at +qapi/qmp-dispatch.c:104 +#8 qmp_dispatch (cmds=0x7f9a72ee7980 , address@hidden) at +qapi/qmp-dispatch.c:131 +#9 0x00007f9a725288d5 in handle_qmp_command (parser=, +tokens=) at /mnt/sdb/lzc/code/open/qemu/monitor.c:3852 +#10 0x00007f9a7289d514 in json_message_process_token (lexer=0x7f9a73ce4498, +input=0x7f9a73cc6880, type=JSON_RCURLY, x=36, y=17) at +qobject/json-streamer.c:105 +#11 0x00007f9a728bb69b in json_lexer_feed_char (address@hidden, ch=125 '}', +address@hidden) at qobject/json-lexer.c:323 +#12 0x00007f9a728bb75e in json_lexer_feed (lexer=0x7f9a73ce4498, +buffer=, size=) at qobject/json-lexer.c:373 +#13 0x00007f9a7289d5d9 in json_message_parser_feed (parser=, +buffer=, size=) at qobject/json-streamer.c:124 +#14 0x00007f9a7252722e in monitor_qmp_read (opaque=, +buf=, size=) at +/mnt/sdb/lzc/code/open/qemu/monitor.c:3894 +#15 0x00007f9a7284ee1b in tcp_chr_read (chan=, cond=, opaque=) at chardev/char-socket.c:441 +#16 0x00007f9a6e03e99a in g_main_context_dispatch () from +/usr/lib64/libglib-2.0.so.0 +#17 0x00007f9a728a342c in glib_pollfds_poll () at util/main-loop.c:214 +#18 os_host_main_loop_wait (timeout=) at util/main-loop.c:261 +#19 main_loop_wait (address@hidden) at util/main-loop.c:515 +#20 0x00007f9a724e7547 in main_loop () at vl.c:1999 +#21 main (argc=, argv=, envp=) at +vl.c:4877 + +Problem happens in virtio_vmstate_change which is called by vm_state_notify, +static void virtio_vmstate_change(void *opaque, int running, RunState state) +{ + VirtIODevice *vdev = opaque; + BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK); + vdev->vm_running = running; + + if (backend_run) { + virtio_set_status(vdev, vdev->status); + } + + if (k->vmstate_change) { + k->vmstate_change(qbus->parent, backend_run); + } + + if (!backend_run) { + virtio_set_status(vdev, vdev->status); + } +} + +Vdev's parent_bus is NULL, so qdev_get_parent_bus(DEVICE(vdev)) will crash. +virtio_vmstate_change is added to the list vm_change_state_head at +virtio_blk_device_realize(virtio_init), +but after hotplug virtio-blk failed, virtio_vmstate_change will not be removed +from vm_change_state_head. + + +I apply a patch as follews: + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 5884ce3..ea532dc 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2491,6 +2491,7 @@ static void virtio_device_realize(DeviceState *dev, Error +**errp) + virtio_bus_device_plugged(vdev, &err); + if (err != NULL) { + error_propagate(errp, err); ++ vdc->unrealize(dev, NULL); + return; + } + +On Tue, Oct 31, 2017 at 05:19:08AM +0000, linzhecheng wrote: +> +I found that hotplug virtio-blk device will lead to qemu crash. +The author posted a patch in a separate email thread. Please see +"[PATCH] fix: unrealize virtio device if we fail to hotplug it". + +> +Re-production steps: +> +> +1. Run VM named vm001 +> +> +2. Create a virtio-blk.xml which contains wrong configurations: +> + +> + +> + +> + +> + +> +> +3. Run command : virsh attach-device vm001 vm001 +> +> +Libvirt will return err msg: +> +> +error: Failed to attach device from blk-scsi.xml +> +> +error: internal error: unable to execute QEMU command 'device_add': Please +> +set scsi=off for virtio-blk devices in order to use virtio 1.0 +> +> +it means hotplug virtio-blk device failed. +> +> +4. Suspend or shutdown VM will leads to qemu crash +> +> +> +> +from gdb: +> +> +> +(gdb) bt +> +#0 object_get_class (address@hidden) at qom/object.c:750 +> +#1 0x00007f9a72582e01 in virtio_vmstate_change (opaque=0x7f9a73d10960, +> +running=0, state=) at +> +/mnt/sdb/lzc/code/open/qemu/hw/virtio/virtio.c:2203 +> +#2 0x00007f9a7261ef52 in vm_state_notify (address@hidden, address@hidden) at +> +vl.c:1685 +> +#3 0x00007f9a7252603a in do_vm_stop (state=RUN_STATE_PAUSED) at +> +/mnt/sdb/lzc/code/open/qemu/cpus.c:941 +> +#4 vm_stop (address@hidden) at /mnt/sdb/lzc/code/open/qemu/cpus.c:1807 +> +#5 0x00007f9a7262eb1b in qmp_stop (address@hidden) at qmp.c:102 +> +#6 0x00007f9a7262c70a in qmp_marshal_stop (args=, +> +ret=, errp=0x7ffe63e255d8) at qmp-marshal.c:5854 +> +#7 0x00007f9a72897e79 in do_qmp_dispatch (errp=0x7ffe63e255d0, +> +request=0x7f9a76510120, cmds=0x7f9a72ee7980 ) at +> +qapi/qmp-dispatch.c:104 +> +#8 qmp_dispatch (cmds=0x7f9a72ee7980 , address@hidden) at +> +qapi/qmp-dispatch.c:131 +> +#9 0x00007f9a725288d5 in handle_qmp_command (parser=, +> +tokens=) at /mnt/sdb/lzc/code/open/qemu/monitor.c:3852 +> +#10 0x00007f9a7289d514 in json_message_process_token (lexer=0x7f9a73ce4498, +> +input=0x7f9a73cc6880, type=JSON_RCURLY, x=36, y=17) at +> +qobject/json-streamer.c:105 +> +#11 0x00007f9a728bb69b in json_lexer_feed_char (address@hidden, ch=125 '}', +> +address@hidden) at qobject/json-lexer.c:323 +> +#12 0x00007f9a728bb75e in json_lexer_feed (lexer=0x7f9a73ce4498, +> +buffer=, size=) at qobject/json-lexer.c:373 +> +#13 0x00007f9a7289d5d9 in json_message_parser_feed (parser=, +> +buffer=, size=) at qobject/json-streamer.c:124 +> +#14 0x00007f9a7252722e in monitor_qmp_read (opaque=, +> +buf=, size=) at +> +/mnt/sdb/lzc/code/open/qemu/monitor.c:3894 +> +#15 0x00007f9a7284ee1b in tcp_chr_read (chan=, cond= +out>, opaque=) at chardev/char-socket.c:441 +> +#16 0x00007f9a6e03e99a in g_main_context_dispatch () from +> +/usr/lib64/libglib-2.0.so.0 +> +#17 0x00007f9a728a342c in glib_pollfds_poll () at util/main-loop.c:214 +> +#18 os_host_main_loop_wait (timeout=) at util/main-loop.c:261 +> +#19 main_loop_wait (address@hidden) at util/main-loop.c:515 +> +#20 0x00007f9a724e7547 in main_loop () at vl.c:1999 +> +#21 main (argc=, argv=, envp=) +> +at vl.c:4877 +> +> +Problem happens in virtio_vmstate_change which is called by vm_state_notify, +> +static void virtio_vmstate_change(void *opaque, int running, RunState state) +> +{ +> +VirtIODevice *vdev = opaque; +> +BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); +> +VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); +> +bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK); +> +vdev->vm_running = running; +> +> +if (backend_run) { +> +virtio_set_status(vdev, vdev->status); +> +} +> +> +if (k->vmstate_change) { +> +k->vmstate_change(qbus->parent, backend_run); +> +} +> +> +if (!backend_run) { +> +virtio_set_status(vdev, vdev->status); +> +} +> +} +> +> +Vdev's parent_bus is NULL, so qdev_get_parent_bus(DEVICE(vdev)) will crash. +> +virtio_vmstate_change is added to the list vm_change_state_head at +> +virtio_blk_device_realize(virtio_init), +> +but after hotplug virtio-blk failed, virtio_vmstate_change will not be +> +removed from vm_change_state_head. +> +> +> +I apply a patch as follews: +> +> +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +> +index 5884ce3..ea532dc 100644 +> +--- a/hw/virtio/virtio.c +> ++++ b/hw/virtio/virtio.c +> +@@ -2491,6 +2491,7 @@ static void virtio_device_realize(DeviceState *dev, +> +Error **errp) +> +virtio_bus_device_plugged(vdev, &err); +> +if (err != NULL) { +> +error_propagate(errp, err); +> ++ vdc->unrealize(dev, NULL); +> +return; +> +} +signature.asc +Description: +PGP signature + diff --git a/results/classifier/zero-shot/014/peripherals/57756589 b/results/classifier/zero-shot/014/peripherals/57756589 new file mode 100644 index 00000000..5891931d --- /dev/null +++ b/results/classifier/zero-shot/014/peripherals/57756589 @@ -0,0 +1,1448 @@ +peripherals: 0.875 +hypervisor: 0.863 +mistranslation: 0.861 +register: 0.858 +architecture: 0.856 +device: 0.853 +vnc: 0.851 +virtual: 0.845 +permissions: 0.842 +assembly: 0.841 +performance: 0.839 +ppc: 0.838 +semantic: 0.835 +operating system: 0.835 +TCG: 0.833 +VMM: 0.833 +arm: 0.828 +boot: 0.827 +user-level: 0.826 +graphic: 0.824 +network: 0.822 +socket: 0.820 +PID: 0.819 +KVM: 0.817 +kernel: 0.817 +files: 0.816 +x86: 0.814 +alpha: 0.810 +debug: 0.803 +i386: 0.782 +risc-v: 0.755 + +[Qemu-devel] 答复: Re: 答复: Re: 答复: Re: [BUG]COLO failover hang + +amost like wiki,but panic in Primary Node. + + + + +setp: + +1 + +Primary Node. + +x86_64-softmmu/qemu-system-x86_64 -enable-kvm -boot c -m 2048 -smp 2 -qmp stdio +-vnc :7 -name primary -cpu qemu64,+kvmclock -device piix3-usb-uhci -usb +-usbdevice tablet\ + + -drive +if=virtio,id=colo-disk0,driver=quorum,read-pattern=fifo,vote-threshold=1, + + +children.0.file.filename=/mnt/sdd/pure_IMG/linux/redhat/rhel_6.5_64_2U_ide,children.0.driver=qcow2 + -S \ + + -netdev +tap,id=hn1,vhost=off,script=/etc/qemu-ifup2,downscript=/etc/qemu-ifdown2 \ + + -device e1000,id=e1,netdev=hn1,mac=52:a4:00:12:78:67 \ + + -netdev +tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown \ + + -device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66 \ + + -chardev socket,id=mirror0,host=9.61.1.8,port=9003,server,nowait -chardev +socket,id=compare1,host=9.61.1.8,port=9004,server,nowait \ + + -chardev socket,id=compare0,host=9.61.1.8,port=9001,server,nowait -chardev +socket,id=compare0-0,host=9.61.1.8,port=9001 \ + + -chardev socket,id=compare_out,host=9.61.1.8,port=9005,server,nowait \ + + -chardev socket,id=compare_out0,host=9.61.1.8,port=9005 \ + + -object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0 \ + + -object filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out +-object filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0 \ + + -object +colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0 + +2 Second node: + +x86_64-softmmu/qemu-system-x86_64 -boot c -m 2048 -smp 2 -qmp stdio -vnc :7 +-name secondary -enable-kvm -cpu qemu64,+kvmclock -device piix3-usb-uhci -usb +-usbdevice tablet\ + + -drive +if=none,id=colo-disk0,file.filename=/mnt/sdd/pure_IMG/linux/redhat/rhel_6.5_64_2U_ide,driver=qcow2,node-name=node0 + \ + + -drive +if=virtio,id=active-disk0,driver=replication,mode=secondary,file.driver=qcow2,top-id=active-disk0,file.file.filename=/mnt/ramfstest/active_disk.img,file.backing.driver=qcow2,file.backing.file.filename=/mnt/ramfstest/hidden_disk.img,file.backing.backing=colo-disk0 + \ + + -netdev +tap,id=hn1,vhost=off,script=/etc/qemu-ifup2,downscript=/etc/qemu-ifdown2 \ + + -device e1000,id=e1,netdev=hn1,mac=52:a4:00:12:78:67 \ + + -netdev +tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown \ + + -device e1000,netdev=hn0,mac=52:a4:00:12:78:66 -chardev +socket,id=red0,host=9.61.1.8,port=9003 \ + + -chardev socket,id=red1,host=9.61.1.8,port=9004 \ + + -object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0 \ + + -object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1 \ + + -object filter-rewriter,id=rew0,netdev=hn0,queue=all -incoming tcp:0:8888 + +3 Secondary node: + +{'execute':'qmp_capabilities'} + +{ 'execute': 'nbd-server-start', + + 'arguments': {'addr': {'type': 'inet', 'data': {'host': '9.61.1.7', 'port': +'8889'} } } + +} + +{'execute': 'nbd-server-add', 'arguments': {'device': 'colo-disk0', 'writable': +true } } + +4:Primary Node: + +{'execute':'qmp_capabilities'} + + +{ 'execute': 'human-monitor-command', + + 'arguments': {'command-line': 'drive_add -n buddy +driver=replication,mode=primary,file.driver=nbd,file.host=9.61.1.7,file.port=8889,file.export=colo-disk0,node-name=node0'}} + +{ 'execute':'x-blockdev-change', 'arguments':{'parent': 'colo-disk0', 'node': +'node0' } } + +{ 'execute': 'migrate-set-capabilities', + + 'arguments': {'capabilities': [ {'capability': 'x-colo', 'state': true } +] } } + +{ 'execute': 'migrate', 'arguments': {'uri': 'tcp:9.61.1.7:8888' } } + + + + +then can see two runing VMs, whenever you make changes to PVM, SVM will be +synced. + + + + +5:Primary Node: + +echo c > /proc/sysrq-trigger + + + + +6:Secondary node: + +{ 'execute': 'nbd-server-stop' } + +{ "execute": "x-colo-lost-heartbeat" } + + + + +then can see the Secondary node hang at recvmsg recvmsg . + + + + + + + + + + + + +原始邮件 + + + +发件人: address@hidden +收件人:王广10165992 address@hidden +抄送人: address@hidden address@hidden +日 期 :2017å¹´03月21日 16:27 +主 题 :Re: [Qemu-devel] 答复: Re: 答复: Re: [BUG]COLO failover hang + + + + + +Hi, + +On 2017/3/21 16:10, address@hidden wrote: +> Thank you。 +> +> I have test aready。 +> +> When the Primary Node panic,the Secondary Node qemu hang at the same place。 +> +> Incorrding +http://wiki.qemu-project.org/Features/COLO +,kill Primary Node qemu +will not produce the problem,but Primary Node panic can。 +> +> I think due to the feature of channel does not support +QIO_CHANNEL_FEATURE_SHUTDOWN. +> +> + +Yes, you are right, when we do failover for primary/secondary VM, we will +shutdown the related +fd in case it is stuck in the read/write fd. + +It seems that you didn't follow the above introduction exactly to do the test. +Could you +share your test procedures ? Especially the commands used in the test. + +Thanks, +Hailiang + +> when failover,channel_shutdown could not shut down the channel. +> +> +> so the colo_process_incoming_thread will hang at recvmsg. +> +> +> I test a patch: +> +> +> diff --git a/migration/socket.c b/migration/socket.c +> +> +> index 13966f1..d65a0ea 100644 +> +> +> --- a/migration/socket.c +> +> +> +++ b/migration/socket.c +> +> +> @@ -147,8 +147,9 @@ static gboolean +socket_accept_incoming_migration(QIOChannel *ioc, +> +> +> } +> +> +> +> +> +> trace_migration_socket_incoming_accepted() +> +> +> +> +> +> qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") +> +> +> + qio_channel_set_feature(QIO_CHANNEL(sioc), QIO_CHANNEL_FEATURE_SHUTDOWN) +> +> +> migration_channel_process_incoming(migrate_get_current(), +> +> +> QIO_CHANNEL(sioc)) +> +> +> object_unref(OBJECT(sioc)) +> +> +> +> +> My test will not hang any more. +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> 原始邮件 +> +> +> +> 发件人: address@hidden +> 收件人:王广10165992 address@hidden +> 抄送人: address@hidden address@hidden +> 日 期 :2017å¹´03月21日 15:58 +> 主 题 :Re: [Qemu-devel] 答复: Re: [BUG]COLO failover hang +> +> +> +> +> +> Hi,Wang. +> +> You can test this branch: +> +> +https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk +> +> and please follow wiki ensure your own configuration correctly. +> +> +http://wiki.qemu-project.org/Features/COLO +> +> +> Thanks +> +> Zhang Chen +> +> +> On 03/21/2017 03:27 PM, address@hidden wrote: +> > +> > hi. +> > +> > I test the git qemu master have the same problem. +> > +> > (gdb) bt +> > +> > #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, +> > niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 +> > +> > #1 0x00007f658e4aa0c2 in qio_channel_read +> > (address@hidden, address@hidden "", +> > address@hidden, address@hidden) at io/channel.c:114 +> > +> > #2 0x00007f658e3ea990 in channel_get_buffer (opaque=<optimized out>, +> > buf=0x7f65907cb838 "", pos=<optimized out>, size=32768) at +> > migration/qemu-file-channel.c:78 +> > +> > #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at +> > migration/qemu-file.c:295 +> > +> > #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, +> > address@hidden) at migration/qemu-file.c:555 +> > +> > #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at +> > migration/qemu-file.c:568 +> > +> > #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at +> > migration/qemu-file.c:648 +> > +> > #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, +> > address@hidden) at migration/colo.c:244 +> > +> > #8 0x00007f658e3e681e in colo_receive_check_message (f=<optimized +> > out>, address@hidden, +> > address@hidden) +> > +> > at migration/colo.c:264 +> > +> > #9 0x00007f658e3e740e in colo_process_incoming_thread +> > (opaque=0x7f658eb30360 <mis_current.31286>) at migration/colo.c:577 +> > +> > #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 +> > +> > #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 +> > +> > (gdb) p ioc->name +> > +> > $2 = 0x7f658ff7d5c0 "migration-socket-incoming" +> > +> > (gdb) p ioc->features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN +> > +> > $3 = 0 +> > +> > +> > (gdb) bt +> > +> > #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, +> > condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 +> > +> > #1 0x00007fdcc6966350 in g_main_dispatch (context=<optimized out>) at +> > gmain.c:3054 +> > +> > #2 g_main_context_dispatch (context=<optimized out>, +> > address@hidden) at gmain.c:3630 +> > +> > #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 +> > +> > #4 os_host_main_loop_wait (timeout=<optimized out>) at +> > util/main-loop.c:258 +> > +> > #5 main_loop_wait (address@hidden) at +> > util/main-loop.c:506 +> > +> > #6 0x00007fdccb526187 in main_loop () at vl.c:1898 +> > +> > #7 main (argc=<optimized out>, argv=<optimized out>, envp=<optimized +> > out>) at vl.c:4709 +> > +> > (gdb) p ioc->features +> > +> > $1 = 6 +> > +> > (gdb) p ioc->name +> > +> > $2 = 0x7fdcce1b1ab0 "migration-socket-listener" +> > +> > +> > May be socket_accept_incoming_migration should +> > call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? +> > +> > +> > thank you. +> > +> > +> > +> > +> > +> > 原始邮件 +> > address@hidden +> > address@hidden +> > address@hidden@huawei.com> +> > *日 期 :*2017å¹´03月16日 14:46 +> > *主 题 :**Re: [Qemu-devel] COLO failover hang* +> > +> > +> > +> > +> > On 03/15/2017 05:06 PM, wangguang wrote: +> > > am testing QEMU COLO feature described here [QEMU +> > > Wiki]( +http://wiki.qemu-project.org/Features/COLO +). +> > > +> > > When the Primary Node panic,the Secondary Node qemu hang. +> > > hang at recvmsg in qio_channel_socket_readv. +> > > And I run { 'execute': 'nbd-server-stop' } and { "execute": +> > > "x-colo-lost-heartbeat" } in Secondary VM's +> > > monitor,the Secondary Node qemu still hang at recvmsg . +> > > +> > > I found that the colo in qemu is not complete yet. +> > > Do the colo have any plan for development? +> > +> > Yes, We are developing. You can see some of patch we pushing. +> > +> > > Has anyone ever run it successfully? Any help is appreciated! +> > +> > In our internal version can run it successfully, +> > The failover detail you can ask Zhanghailiang for help. +> > Next time if you have some question about COLO, +> > please cc me and zhanghailiang address@hidden +> > +> > +> > Thanks +> > Zhang Chen +> > +> > +> > > +> > > +> > > +> > > centos7.2+qemu2.7.50 +> > > (gdb) bt +> > > #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 +> > > #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=<optimized out>, +> > > iov=<optimized out>, niov=<optimized out>, fds=0x0, nfds=0x0, errp=0x0) at +> > > io/channel-socket.c:497 +> > > #2 0x00007f3e03329472 in qio_channel_read (address@hidden, +> > > address@hidden "", address@hidden, +> > > address@hidden) at io/channel.c:97 +> > > #3 0x00007f3e032750e0 in channel_get_buffer (opaque=<optimized out>, +> > > buf=0x7f3e05910f38 "", pos=<optimized out>, size=32768) at +> > > migration/qemu-file-channel.c:78 +> > > #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at +> > > migration/qemu-file.c:257 +> > > #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, +> > > address@hidden) at migration/qemu-file.c:510 +> > > #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at +> > > migration/qemu-file.c:523 +> > > #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at +> > > migration/qemu-file.c:603 +> > > #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, +> > > address@hidden) at migration/colo..c:215 +> > > #9 0x00007f3e0327250d in colo_wait_handle_message (errp=0x7f3d62bfaa48, +> > > checkpoint_request=<synthetic pointer>, f=<optimized out>) at +> > > migration/colo.c:546 +> > > #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at +> > > migration/colo.c:649 +> > > #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 +> > > #12 0x00007f3dfc9c03ed in clone () from /lib64/libc.so.6 +> > > +> > > +> > > +> > > +> > > +> > > -- +> > > View this message in context: +http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html +> > > Sent from the Developer mailing list archive at Nabble.com. +> > > +> > > +> > > +> > > +> > +> > -- +> > Thanks +> > Zhang Chen +> > +> > +> > +> > +> > +> + +diff --git a/migration/socket.c b/migration/socket.c + + +index 13966f1..d65a0ea 100644 + + +--- a/migration/socket.c + + ++++ b/migration/socket.c + + +@@ -147,8 +147,9 @@ static gboolean socket_accept_incoming_migration(QIOChannel +*ioc, + + + } + + + + + + trace_migration_socket_incoming_accepted() + + + + + + qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") + + ++ qio_channel_set_feature(QIO_CHANNEL(sioc), QIO_CHANNEL_FEATURE_SHUTDOWN) + + + migration_channel_process_incoming(migrate_get_current(), + + + QIO_CHANNEL(sioc)) + + + object_unref(OBJECT(sioc)) + + + + +Is this patch ok? + +I have test it . The test could not hang any more. + + + + + + + + + + + + +原始邮件 + + + +发件人: address@hidden +收件人: address@hidden address@hidden +抄送人: address@hidden address@hidden address@hidden +日 期 :2017å¹´03月22日 09:11 +主 题 :Re: [Qemu-devel] 答复: Re: 答复: Re: [BUG]COLO failover hang + + + + + +On 2017/3/21 19:56, Dr. David Alan Gilbert wrote: +> * Hailiang Zhang (address@hidden) wrote: +>> Hi, +>> +>> Thanks for reporting this, and i confirmed it in my test, and it is a bug. +>> +>> Though we tried to call qemu_file_shutdown() to shutdown the related fd, in +>> case COLO thread/incoming thread is stuck in read/write() while do failover, +>> but it didn't take effect, because all the fd used by COLO (also migration) +>> has been wrapped by qio channel, and it will not call the shutdown API if +>> we didn't qio_channel_set_feature(QIO_CHANNEL(sioc), +QIO_CHANNEL_FEATURE_SHUTDOWN). +>> +>> Cc: Dr. David Alan Gilbert address@hidden +>> +>> I doubted migration cancel has the same problem, it may be stuck in write() +>> if we tried to cancel migration. +>> +>> void fd_start_outgoing_migration(MigrationState *s, const char *fdname, +Error **errp) +>> { +>> qio_channel_set_name(QIO_CHANNEL(ioc), "migration-fd-outgoing") +>> migration_channel_connect(s, ioc, NULL) +>> ... ... +>> We didn't call qio_channel_set_feature(QIO_CHANNEL(sioc), +QIO_CHANNEL_FEATURE_SHUTDOWN) above, +>> and the +>> migrate_fd_cancel() +>> { +>> ... ... +>> if (s->state == MIGRATION_STATUS_CANCELLING && f) { +>> qemu_file_shutdown(f) --> This will not take effect. No ? +>> } +>> } +> +> (cc'd in Daniel Berrange). +> I see that we call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN) +at the +> top of qio_channel_socket_new so I think that's safe isn't it? +> + +Hmm, you are right, this problem is only exist for the migration incoming fd, +thanks. + +> Dave +> +>> Thanks, +>> Hailiang +>> +>> On 2017/3/21 16:10, address@hidden wrote: +>>> Thank you。 +>>> +>>> I have test aready。 +>>> +>>> When the Primary Node panic,the Secondary Node qemu hang at the same place。 +>>> +>>> Incorrding +http://wiki.qemu-project.org/Features/COLO +,kill Primary Node +qemu will not produce the problem,but Primary Node panic can。 +>>> +>>> I think due to the feature of channel does not support +QIO_CHANNEL_FEATURE_SHUTDOWN. +>>> +>>> +>>> when failover,channel_shutdown could not shut down the channel. +>>> +>>> +>>> so the colo_process_incoming_thread will hang at recvmsg. +>>> +>>> +>>> I test a patch: +>>> +>>> +>>> diff --git a/migration/socket.c b/migration/socket.c +>>> +>>> +>>> index 13966f1..d65a0ea 100644 +>>> +>>> +>>> --- a/migration/socket.c +>>> +>>> +>>> +++ b/migration/socket.c +>>> +>>> +>>> @@ -147,8 +147,9 @@ static gboolean +socket_accept_incoming_migration(QIOChannel *ioc, +>>> +>>> +>>> } +>>> +>>> +>>> +>>> +>>> +>>> trace_migration_socket_incoming_accepted() +>>> +>>> +>>> +>>> +>>> +>>> qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") +>>> +>>> +>>> + qio_channel_set_feature(QIO_CHANNEL(sioc), +QIO_CHANNEL_FEATURE_SHUTDOWN) +>>> +>>> +>>> migration_channel_process_incoming(migrate_get_current(), +>>> +>>> +>>> QIO_CHANNEL(sioc)) +>>> +>>> +>>> object_unref(OBJECT(sioc)) +>>> +>>> +>>> +>>> +>>> My test will not hang any more. +>>> +>>> +>>> +>>> +>>> +>>> +>>> +>>> +>>> +>>> +>>> +>>> +>>> +>>> +>>> +>>> +>>> +>>> 原始邮件 +>>> +>>> +>>> +>>> 发件人: address@hidden +>>> 收件人:王广10165992 address@hidden +>>> 抄送人: address@hidden address@hidden +>>> 日 期 :2017å¹´03月21日 15:58 +>>> 主 题 :Re: [Qemu-devel] 答复: Re: [BUG]COLO failover hang +>>> +>>> +>>> +>>> +>>> +>>> Hi,Wang. +>>> +>>> You can test this branch: +>>> +>>> +https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk +>>> +>>> and please follow wiki ensure your own configuration correctly. +>>> +>>> +http://wiki.qemu-project.org/Features/COLO +>>> +>>> +>>> Thanks +>>> +>>> Zhang Chen +>>> +>>> +>>> On 03/21/2017 03:27 PM, address@hidden wrote: +>>> > +>>> > hi. +>>> > +>>> > I test the git qemu master have the same problem. +>>> > +>>> > (gdb) bt +>>> > +>>> > #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, +>>> > niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 +>>> > +>>> > #1 0x00007f658e4aa0c2 in qio_channel_read +>>> > (address@hidden, address@hidden "", +>>> > address@hidden, address@hidden) at io/channel.c:114 +>>> > +>>> > #2 0x00007f658e3ea990 in channel_get_buffer (opaque=<optimized out>, +>>> > buf=0x7f65907cb838 "", pos=<optimized out>, size=32768) at +>>> > migration/qemu-file-channel.c:78 +>>> > +>>> > #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at +>>> > migration/qemu-file.c:295 +>>> > +>>> > #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, +>>> > address@hidden) at migration/qemu-file.c:555 +>>> > +>>> > #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at +>>> > migration/qemu-file.c:568 +>>> > +>>> > #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at +>>> > migration/qemu-file.c:648 +>>> > +>>> > #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, +>>> > address@hidden) at migration/colo.c:244 +>>> > +>>> > #8 0x00007f658e3e681e in colo_receive_check_message (f=<optimized +>>> > out>, address@hidden, +>>> > address@hidden) +>>> > +>>> > at migration/colo.c:264 +>>> > +>>> > #9 0x00007f658e3e740e in colo_process_incoming_thread +>>> > (opaque=0x7f658eb30360 <mis_current.31286>) at migration/colo.c:577 +>>> > +>>> > #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 +>>> > +>>> > #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 +>>> > +>>> > (gdb) p ioc->name +>>> > +>>> > $2 = 0x7f658ff7d5c0 "migration-socket-incoming" +>>> > +>>> > (gdb) p ioc->features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN +>>> > +>>> > $3 = 0 +>>> > +>>> > +>>> > (gdb) bt +>>> > +>>> > #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, +>>> > condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 +>>> > +>>> > #1 0x00007fdcc6966350 in g_main_dispatch (context=<optimized out>) at +>>> > gmain.c:3054 +>>> > +>>> > #2 g_main_context_dispatch (context=<optimized out>, +>>> > address@hidden) at gmain.c:3630 +>>> > +>>> > #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 +>>> > +>>> > #4 os_host_main_loop_wait (timeout=<optimized out>) at +>>> > util/main-loop.c:258 +>>> > +>>> > #5 main_loop_wait (address@hidden) at +>>> > util/main-loop.c:506 +>>> > +>>> > #6 0x00007fdccb526187 in main_loop () at vl.c:1898 +>>> > +>>> > #7 main (argc=<optimized out>, argv=<optimized out>, envp=<optimized +>>> > out>) at vl.c:4709 +>>> > +>>> > (gdb) p ioc->features +>>> > +>>> > $1 = 6 +>>> > +>>> > (gdb) p ioc->name +>>> > +>>> > $2 = 0x7fdcce1b1ab0 "migration-socket-listener" +>>> > +>>> > +>>> > May be socket_accept_incoming_migration should +>>> > call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? +>>> > +>>> > +>>> > thank you. +>>> > +>>> > +>>> > +>>> > +>>> > +>>> > 原始邮件 +>>> > address@hidden +>>> > address@hidden +>>> > address@hidden@huawei.com> +>>> > *日 期 :*2017å¹´03月16日 14:46 +>>> > *主 题 :**Re: [Qemu-devel] COLO failover hang* +>>> > +>>> > +>>> > +>>> > +>>> > On 03/15/2017 05:06 PM, wangguang wrote: +>>> > > am testing QEMU COLO feature described here [QEMU +>>> > > Wiki]( +http://wiki.qemu-project.org/Features/COLO +). +>>> > > +>>> > > When the Primary Node panic,the Secondary Node qemu hang. +>>> > > hang at recvmsg in qio_channel_socket_readv. +>>> > > And I run { 'execute': 'nbd-server-stop' } and { "execute": +>>> > > "x-colo-lost-heartbeat" } in Secondary VM's +>>> > > monitor,the Secondary Node qemu still hang at recvmsg . +>>> > > +>>> > > I found that the colo in qemu is not complete yet. +>>> > > Do the colo have any plan for development? +>>> > +>>> > Yes, We are developing. You can see some of patch we pushing. +>>> > +>>> > > Has anyone ever run it successfully? Any help is appreciated! +>>> > +>>> > In our internal version can run it successfully, +>>> > The failover detail you can ask Zhanghailiang for help. +>>> > Next time if you have some question about COLO, +>>> > please cc me and zhanghailiang address@hidden +>>> > +>>> > +>>> > Thanks +>>> > Zhang Chen +>>> > +>>> > +>>> > > +>>> > > +>>> > > +>>> > > centos7.2+qemu2.7.50 +>>> > > (gdb) bt +>>> > > #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 +>>> > > #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=<optimized out>, +>>> > > iov=<optimized out>, niov=<optimized out>, fds=0x0, nfds=0x0, errp=0x0) +at +>>> > > io/channel-socket.c:497 +>>> > > #2 0x00007f3e03329472 in qio_channel_read (address@hidden, +>>> > > address@hidden "", address@hidden, +>>> > > address@hidden) at io/channel.c:97 +>>> > > #3 0x00007f3e032750e0 in channel_get_buffer (opaque=<optimized out>, +>>> > > buf=0x7f3e05910f38 "", pos=<optimized out>, size=32768) at +>>> > > migration/qemu-file-channel.c:78 +>>> > > #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at +>>> > > migration/qemu-file.c:257 +>>> > > #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, +>>> > > address@hidden) at migration/qemu-file.c:510 +>>> > > #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at +>>> > > migration/qemu-file.c:523 +>>> > > #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at +>>> > > migration/qemu-file.c:603 +>>> > > #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, +>>> > > address@hidden) at migration/colo.c:215 +>>> > > #9 0x00007f3e0327250d in colo_wait_handle_message (errp=0x7f3d62bfaa48, +>>> > > checkpoint_request=<synthetic pointer>, f=<optimized out>) at +>>> > > migration/colo.c:546 +>>> > > #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at +>>> > > migration/colo.c:649 +>>> > > #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 +>>> > > #12 0x00007f3dfc9c03ed in clone () from /lib64/libc..so.6 +>>> > > +>>> > > +>>> > > +>>> > > +>>> > > +>>> > > -- +>>> > > View this message in context: +http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html +>>> > > Sent from the Developer mailing list archive at Nabble.com. +>>> > > +>>> > > +>>> > > +>>> > > +>>> > +>>> > -- +>>> > Thanks +>>> > Zhang Chen +>>> > +>>> > +>>> > +>>> > +>>> > +>>> +>> +> -- +> Dr. David Alan Gilbert / address@hidden / Manchester, UK +> +> . +> + +Hi, + +On 2017/3/22 9:42, address@hidden wrote: +diff --git a/migration/socket.c b/migration/socket.c + + +index 13966f1..d65a0ea 100644 + + +--- a/migration/socket.c + + ++++ b/migration/socket.c + + +@@ -147,8 +147,9 @@ static gboolean socket_accept_incoming_migration(QIOChannel +*ioc, + + + } + + + + + + trace_migration_socket_incoming_accepted() + + + + + + qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") + + ++ qio_channel_set_feature(QIO_CHANNEL(sioc), QIO_CHANNEL_FEATURE_SHUTDOWN) + + + migration_channel_process_incoming(migrate_get_current(), + + + QIO_CHANNEL(sioc)) + + + object_unref(OBJECT(sioc)) + + + + +Is this patch ok? +Yes, i think this works, but a better way maybe to call +qio_channel_set_feature() +in qio_channel_socket_accept(), we didn't set the SHUTDOWN feature for the +socket accept fd, +Or fix it by this: + +diff --git a/io/channel-socket.c b/io/channel-socket.c +index f546c68..ce6894c 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -330,9 +330,8 @@ qio_channel_socket_accept(QIOChannelSocket *ioc, + Error **errp) + { + QIOChannelSocket *cioc; +- +- cioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET)); +- cioc->fd = -1; ++ ++ cioc = qio_channel_socket_new(); + cioc->remoteAddrLen = sizeof(ioc->remoteAddr); + cioc->localAddrLen = sizeof(ioc->localAddr); + + +Thanks, +Hailiang +I have test it . The test could not hang any more. + + + + + + + + + + + + +原始邮件 + + + +发件人: address@hidden +收件人: address@hidden address@hidden +抄送人: address@hidden address@hidden address@hidden +日 期 :2017å¹´03月22日 09:11 +主 题 :Re: [Qemu-devel] 答复: Re: 答复: Re: [BUG]COLO failover hang + + + + + +On 2017/3/21 19:56, Dr. David Alan Gilbert wrote: +> * Hailiang Zhang (address@hidden) wrote: +>> Hi, +>> +>> Thanks for reporting this, and i confirmed it in my test, and it is a bug. +>> +>> Though we tried to call qemu_file_shutdown() to shutdown the related fd, in +>> case COLO thread/incoming thread is stuck in read/write() while do failover, +>> but it didn't take effect, because all the fd used by COLO (also migration) +>> has been wrapped by qio channel, and it will not call the shutdown API if +>> we didn't qio_channel_set_feature(QIO_CHANNEL(sioc), +QIO_CHANNEL_FEATURE_SHUTDOWN). +>> +>> Cc: Dr. David Alan Gilbert address@hidden +>> +>> I doubted migration cancel has the same problem, it may be stuck in write() +>> if we tried to cancel migration. +>> +>> void fd_start_outgoing_migration(MigrationState *s, const char *fdname, +Error **errp) +>> { +>> qio_channel_set_name(QIO_CHANNEL(ioc), "migration-fd-outgoing") +>> migration_channel_connect(s, ioc, NULL) +>> ... ... +>> We didn't call qio_channel_set_feature(QIO_CHANNEL(sioc), +QIO_CHANNEL_FEATURE_SHUTDOWN) above, +>> and the +>> migrate_fd_cancel() +>> { +>> ... ... +>> if (s->state == MIGRATION_STATUS_CANCELLING && f) { +>> qemu_file_shutdown(f) --> This will not take effect. No ? +>> } +>> } +> +> (cc'd in Daniel Berrange). +> I see that we call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN) +at the +> top of qio_channel_socket_new so I think that's safe isn't it? +> + +Hmm, you are right, this problem is only exist for the migration incoming fd, +thanks. + +> Dave +> +>> Thanks, +>> Hailiang +>> +>> On 2017/3/21 16:10, address@hidden wrote: +>>> Thank you。 +>>> +>>> I have test aready。 +>>> +>>> When the Primary Node panic,the Secondary Node qemu hang at the same place。 +>>> +>>> Incorrding +http://wiki.qemu-project.org/Features/COLO +,kill Primary Node +qemu will not produce the problem,but Primary Node panic can。 +>>> +>>> I think due to the feature of channel does not support +QIO_CHANNEL_FEATURE_SHUTDOWN. +>>> +>>> +>>> when failover,channel_shutdown could not shut down the channel. +>>> +>>> +>>> so the colo_process_incoming_thread will hang at recvmsg. +>>> +>>> +>>> I test a patch: +>>> +>>> +>>> diff --git a/migration/socket.c b/migration/socket.c +>>> +>>> +>>> index 13966f1..d65a0ea 100644 +>>> +>>> +>>> --- a/migration/socket.c +>>> +>>> +>>> +++ b/migration/socket.c +>>> +>>> +>>> @@ -147,8 +147,9 @@ static gboolean +socket_accept_incoming_migration(QIOChannel *ioc, +>>> +>>> +>>> } +>>> +>>> +>>> +>>> +>>> +>>> trace_migration_socket_incoming_accepted() +>>> +>>> +>>> +>>> +>>> +>>> qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") +>>> +>>> +>>> + qio_channel_set_feature(QIO_CHANNEL(sioc), +QIO_CHANNEL_FEATURE_SHUTDOWN) +>>> +>>> +>>> migration_channel_process_incoming(migrate_get_current(), +>>> +>>> +>>> QIO_CHANNEL(sioc)) +>>> +>>> +>>> object_unref(OBJECT(sioc)) +>>> +>>> +>>> +>>> +>>> My test will not hang any more. +>>> +>>> +>>> +>>> +>>> +>>> +>>> +>>> +>>> +>>> +>>> +>>> +>>> +>>> +>>> +>>> +>>> +>>> 原始邮件 +>>> +>>> +>>> +>>> 发件人: address@hidden +>>> 收件人:王广10165992 address@hidden +>>> 抄送人: address@hidden address@hidden +>>> 日 期 :2017å¹´03月21日 15:58 +>>> 主 题 :Re: [Qemu-devel] 答复: Re: [BUG]COLO failover hang +>>> +>>> +>>> +>>> +>>> +>>> Hi,Wang. +>>> +>>> You can test this branch: +>>> +>>> +https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk +>>> +>>> and please follow wiki ensure your own configuration correctly. +>>> +>>> +http://wiki.qemu-project.org/Features/COLO +>>> +>>> +>>> Thanks +>>> +>>> Zhang Chen +>>> +>>> +>>> On 03/21/2017 03:27 PM, address@hidden wrote: +>>> > +>>> > hi. +>>> > +>>> > I test the git qemu master have the same problem. +>>> > +>>> > (gdb) bt +>>> > +>>> > #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, +>>> > niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 +>>> > +>>> > #1 0x00007f658e4aa0c2 in qio_channel_read +>>> > (address@hidden, address@hidden "", +>>> > address@hidden, address@hidden) at io/channel.c:114 +>>> > +>>> > #2 0x00007f658e3ea990 in channel_get_buffer (opaque=<optimized out>, +>>> > buf=0x7f65907cb838 "", pos=<optimized out>, size=32768) at +>>> > migration/qemu-file-channel.c:78 +>>> > +>>> > #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at +>>> > migration/qemu-file.c:295 +>>> > +>>> > #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, +>>> > address@hidden) at migration/qemu-file.c:555 +>>> > +>>> > #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at +>>> > migration/qemu-file.c:568 +>>> > +>>> > #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at +>>> > migration/qemu-file.c:648 +>>> > +>>> > #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, +>>> > address@hidden) at migration/colo.c:244 +>>> > +>>> > #8 0x00007f658e3e681e in colo_receive_check_message (f=<optimized +>>> > out>, address@hidden, +>>> > address@hidden) +>>> > +>>> > at migration/colo.c:264 +>>> > +>>> > #9 0x00007f658e3e740e in colo_process_incoming_thread +>>> > (opaque=0x7f658eb30360 <mis_current.31286>) at migration/colo.c:577 +>>> > +>>> > #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 +>>> > +>>> > #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 +>>> > +>>> > (gdb) p ioc->name +>>> > +>>> > $2 = 0x7f658ff7d5c0 "migration-socket-incoming" +>>> > +>>> > (gdb) p ioc->features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN +>>> > +>>> > $3 = 0 +>>> > +>>> > +>>> > (gdb) bt +>>> > +>>> > #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, +>>> > condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 +>>> > +>>> > #1 0x00007fdcc6966350 in g_main_dispatch (context=<optimized out>) at +>>> > gmain.c:3054 +>>> > +>>> > #2 g_main_context_dispatch (context=<optimized out>, +>>> > address@hidden) at gmain.c:3630 +>>> > +>>> > #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 +>>> > +>>> > #4 os_host_main_loop_wait (timeout=<optimized out>) at +>>> > util/main-loop.c:258 +>>> > +>>> > #5 main_loop_wait (address@hidden) at +>>> > util/main-loop.c:506 +>>> > +>>> > #6 0x00007fdccb526187 in main_loop () at vl.c:1898 +>>> > +>>> > #7 main (argc=<optimized out>, argv=<optimized out>, envp=<optimized +>>> > out>) at vl.c:4709 +>>> > +>>> > (gdb) p ioc->features +>>> > +>>> > $1 = 6 +>>> > +>>> > (gdb) p ioc->name +>>> > +>>> > $2 = 0x7fdcce1b1ab0 "migration-socket-listener" +>>> > +>>> > +>>> > May be socket_accept_incoming_migration should +>>> > call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? +>>> > +>>> > +>>> > thank you. +>>> > +>>> > +>>> > +>>> > +>>> > +>>> > 原始邮件 +>>> > address@hidden +>>> > address@hidden +>>> > address@hidden@huawei.com> +>>> > *日 期 :*2017å¹´03月16日 14:46 +>>> > *主 题 :**Re: [Qemu-devel] COLO failover hang* +>>> > +>>> > +>>> > +>>> > +>>> > On 03/15/2017 05:06 PM, wangguang wrote: +>>> > > am testing QEMU COLO feature described here [QEMU +>>> > > Wiki]( +http://wiki.qemu-project.org/Features/COLO +). +>>> > > +>>> > > When the Primary Node panic,the Secondary Node qemu hang. +>>> > > hang at recvmsg in qio_channel_socket_readv. +>>> > > And I run { 'execute': 'nbd-server-stop' } and { "execute": +>>> > > "x-colo-lost-heartbeat" } in Secondary VM's +>>> > > monitor,the Secondary Node qemu still hang at recvmsg . +>>> > > +>>> > > I found that the colo in qemu is not complete yet. +>>> > > Do the colo have any plan for development? +>>> > +>>> > Yes, We are developing. You can see some of patch we pushing. +>>> > +>>> > > Has anyone ever run it successfully? Any help is appreciated! +>>> > +>>> > In our internal version can run it successfully, +>>> > The failover detail you can ask Zhanghailiang for help. +>>> > Next time if you have some question about COLO, +>>> > please cc me and zhanghailiang address@hidden +>>> > +>>> > +>>> > Thanks +>>> > Zhang Chen +>>> > +>>> > +>>> > > +>>> > > +>>> > > +>>> > > centos7.2+qemu2.7.50 +>>> > > (gdb) bt +>>> > > #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 +>>> > > #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=<optimized out>, +>>> > > iov=<optimized out>, niov=<optimized out>, fds=0x0, nfds=0x0, errp=0x0) +at +>>> > > io/channel-socket.c:497 +>>> > > #2 0x00007f3e03329472 in qio_channel_read (address@hidden, +>>> > > address@hidden "", address@hidden, +>>> > > address@hidden) at io/channel.c:97 +>>> > > #3 0x00007f3e032750e0 in channel_get_buffer (opaque=<optimized out>, +>>> > > buf=0x7f3e05910f38 "", pos=<optimized out>, size=32768) at +>>> > > migration/qemu-file-channel.c:78 +>>> > > #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at +>>> > > migration/qemu-file.c:257 +>>> > > #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, +>>> > > address@hidden) at migration/qemu-file.c:510 +>>> > > #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at +>>> > > migration/qemu-file.c:523 +>>> > > #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at +>>> > > migration/qemu-file.c:603 +>>> > > #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, +>>> > > address@hidden) at migration/colo.c:215 +>>> > > #9 0x00007f3e0327250d in colo_wait_handle_message (errp=0x7f3d62bfaa48, +>>> > > checkpoint_request=<synthetic pointer>, f=<optimized out>) at +>>> > > migration/colo.c:546 +>>> > > #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at +>>> > > migration/colo.c:649 +>>> > > #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 +>>> > > #12 0x00007f3dfc9c03ed in clone () from /lib64/libc..so.6 +>>> > > +>>> > > +>>> > > +>>> > > +>>> > > +>>> > > -- +>>> > > View this message in context: +http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html +>>> > > Sent from the Developer mailing list archive at Nabble.com. +>>> > > +>>> > > +>>> > > +>>> > > +>>> > +>>> > -- +>>> > Thanks +>>> > Zhang Chen +>>> > +>>> > +>>> > +>>> > +>>> > +>>> +>> +> -- +> Dr. David Alan Gilbert / address@hidden / Manchester, UK +> +> . +> + diff --git a/results/classifier/zero-shot/014/peripherals/60339453 b/results/classifier/zero-shot/014/peripherals/60339453 new file mode 100644 index 00000000..a0503d6c --- /dev/null +++ b/results/classifier/zero-shot/014/peripherals/60339453 @@ -0,0 +1,88 @@ +peripherals: 0.824 +kernel: 0.805 +register: 0.787 +boot: 0.782 +arm: 0.780 +performance: 0.764 +permissions: 0.750 +TCG: 0.748 +alpha: 0.718 +VMM: 0.712 +risc-v: 0.707 +device: 0.706 +mistranslation: 0.699 +hypervisor: 0.697 +PID: 0.685 +network: 0.682 +vnc: 0.680 +debug: 0.672 +graphic: 0.671 +operating system: 0.670 +KVM: 0.669 +user-level: 0.663 +semantic: 0.662 +architecture: 0.649 +x86: 0.647 +virtual: 0.630 +files: 0.623 +ppc: 0.615 +socket: 0.607 +i386: 0.533 +assembly: 0.486 + +[BUG] scsi: vmw_pvscsi: Boot hangs during scsi under qemu, post commit e662502b3a78 + +Hi, + +Commit e662502b3a78 ("scsi: vmw_pvscsi: Set correct residual data length"), +and its backports to stable trees, makes kernel hang during boot, when +ran as a VM under qemu with following parameters: + + -drive file=$DISKFILE,if=none,id=sda + -device pvscsi + -device scsi-hd,bus=scsi.0,drive=sda + +Diving deeper, commit e662502b3a78 + + @@ -585,7 +585,13 @@ static void pvscsi_complete_request(struct +pvscsi_adapter *adapter, + case BTSTAT_SUCCESS: + + /* + + * Commands like INQUIRY may transfer less data than + + * requested by the initiator via bufflen. Set residual + + * count to make upper layer aware of the actual amount + + * of data returned. + + */ + + scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen); + +assumes 'e->dataLen' is properly armed with actual num of bytes +transferred; alas qemu's hw/scsi/vmw_pvscsi.c never arms the 'dataLen' +field of the completion descriptor (kept zero). + +As a result, the residual count is set as the *entire* 'scsi_bufflen' of a +good transfer, which makes upper scsi layers repeatedly ignore this +valid transfer. + +Not properly arming 'dataLen' seems as an oversight in qemu, which needs +to be fixed. + +However, since kernels with commit e662502b3a78 (and backports) now fail +to boot under qemu's "-device pvscsi", a suggested workaround is to set +the residual count *only* if 'e->dataLen' is armed, e.g: + + @@ -588,7 +588,8 @@ static void pvscsi_complete_request(struct pvscsi_adapter +*adapter, + * count to make upper layer aware of the actual +amount + * of data returned. + */ + - scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen); + + if (e->dataLen) + + scsi_set_resid(cmd, scsi_bufflen(cmd) - +e->dataLen); + +in order to make kernels boot on old qemu binaries. + +Best, +Shmulik + -- cgit 1.4.1