diff options
| author | Christian Krinitsin <mail@krinitsin.com> | 2025-07-03 19:39:53 +0200 |
|---|---|---|
| committer | Christian Krinitsin <mail@krinitsin.com> | 2025-07-03 19:39:53 +0200 |
| commit | dee4dcba78baf712cab403d47d9db319ab7f95d6 (patch) | |
| tree | 418478faf06786701a56268672f73d6b0b4eb239 /results/classifier/014/peripherals | |
| parent | 4d9e26c0333abd39bdbd039dcdb30ed429c475ba (diff) | |
| download | emulator-bug-study-dee4dcba78baf712cab403d47d9db319ab7f95d6.tar.gz emulator-bug-study-dee4dcba78baf712cab403d47d9db319ab7f95d6.zip | |
restructure results
Diffstat (limited to 'results/classifier/014/peripherals')
| -rw-r--r-- | results/classifier/014/peripherals/56937788 | 371 | ||||
| -rw-r--r-- | results/classifier/014/peripherals/57756589 | 1448 | ||||
| -rw-r--r-- | results/classifier/014/peripherals/60339453 | 88 |
3 files changed, 0 insertions, 1907 deletions
diff --git a/results/classifier/014/peripherals/56937788 b/results/classifier/014/peripherals/56937788 deleted file mode 100644 index 82617706..00000000 --- a/results/classifier/014/peripherals/56937788 +++ /dev/null @@ -1,371 +0,0 @@ -peripherals: 0.807 -user-level: 0.794 -risc-v: 0.773 -hypervisor: 0.765 -TCG: 0.760 -KVM: 0.755 -vnc: 0.743 -mistranslation: 0.735 -VMM: 0.731 -virtual: 0.730 -ppc: 0.728 -debug: 0.723 -graphic: 0.720 -operating system: 0.713 -register: 0.706 -semantic: 0.705 -device: 0.697 -i386: 0.694 -x86: 0.693 -performance: 0.692 -permissions: 0.685 -files: 0.680 -arm: 0.665 -assembly: 0.638 -boot: 0.636 -network: 0.633 -alpha: 0.631 -architecture: 0.627 -PID: 0.620 -socket: 0.613 -kernel: 0.594 - -[Qemu-devel] [Bug] virtio-blk: qemu will crash if hotplug virtio-blk device failed - -I found that hotplug virtio-blk device will lead to qemu crash. - -Re-production steps: - -1. Run VM named vm001 - -2. Create a virtio-blk.xml which contains wrong configurations: -<disk device="lun" rawio="yes" type="block"> - <driver cache="none" io="native" name="qemu" type="raw" /> - <source dev="/dev/mapper/11-dm" /> - <target bus="virtio" dev="vdx" /> -</disk> - -3. Run command : virsh attach-device vm001 vm001 - -Libvirt will return err msg: - -error: Failed to attach device from blk-scsi.xml - -error: internal error: unable to execute QEMU command 'device_add': Please set -scsi=off for virtio-blk devices in order to use virtio 1.0 - -it means hotplug virtio-blk device failed. - -4. Suspend or shutdown VM will leads to qemu crash - - - -from gdb: - - -(gdb) bt -#0 object_get_class (address@hidden) at qom/object.c:750 -#1 0x00007f9a72582e01 in virtio_vmstate_change (opaque=0x7f9a73d10960, -running=0, state=<optimized out>) at -/mnt/sdb/lzc/code/open/qemu/hw/virtio/virtio.c:2203 -#2 0x00007f9a7261ef52 in vm_state_notify (address@hidden, address@hidden) at -vl.c:1685 -#3 0x00007f9a7252603a in do_vm_stop (state=RUN_STATE_PAUSED) at -/mnt/sdb/lzc/code/open/qemu/cpus.c:941 -#4 vm_stop (address@hidden) at /mnt/sdb/lzc/code/open/qemu/cpus.c:1807 -#5 0x00007f9a7262eb1b in qmp_stop (address@hidden) at qmp.c:102 -#6 0x00007f9a7262c70a in qmp_marshal_stop (args=<optimized out>, -ret=<optimized out>, errp=0x7ffe63e255d8) at qmp-marshal.c:5854 -#7 0x00007f9a72897e79 in do_qmp_dispatch (errp=0x7ffe63e255d0, -request=0x7f9a76510120, cmds=0x7f9a72ee7980 <qmp_commands>) at -qapi/qmp-dispatch.c:104 -#8 qmp_dispatch (cmds=0x7f9a72ee7980 <qmp_commands>, address@hidden) at -qapi/qmp-dispatch.c:131 -#9 0x00007f9a725288d5 in handle_qmp_command (parser=<optimized out>, -tokens=<optimized out>) at /mnt/sdb/lzc/code/open/qemu/monitor.c:3852 -#10 0x00007f9a7289d514 in json_message_process_token (lexer=0x7f9a73ce4498, -input=0x7f9a73cc6880, type=JSON_RCURLY, x=36, y=17) at -qobject/json-streamer.c:105 -#11 0x00007f9a728bb69b in json_lexer_feed_char (address@hidden, ch=125 '}', -address@hidden) at qobject/json-lexer.c:323 -#12 0x00007f9a728bb75e in json_lexer_feed (lexer=0x7f9a73ce4498, -buffer=<optimized out>, size=<optimized out>) at qobject/json-lexer.c:373 -#13 0x00007f9a7289d5d9 in json_message_parser_feed (parser=<optimized out>, -buffer=<optimized out>, size=<optimized out>) at qobject/json-streamer.c:124 -#14 0x00007f9a7252722e in monitor_qmp_read (opaque=<optimized out>, -buf=<optimized out>, size=<optimized out>) at -/mnt/sdb/lzc/code/open/qemu/monitor.c:3894 -#15 0x00007f9a7284ee1b in tcp_chr_read (chan=<optimized out>, cond=<optimized -out>, opaque=<optimized out>) at chardev/char-socket.c:441 -#16 0x00007f9a6e03e99a in g_main_context_dispatch () from -/usr/lib64/libglib-2.0.so.0 -#17 0x00007f9a728a342c in glib_pollfds_poll () at util/main-loop.c:214 -#18 os_host_main_loop_wait (timeout=<optimized out>) at util/main-loop.c:261 -#19 main_loop_wait (address@hidden) at util/main-loop.c:515 -#20 0x00007f9a724e7547 in main_loop () at vl.c:1999 -#21 main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at -vl.c:4877 - -Problem happens in virtio_vmstate_change which is called by vm_state_notify, -static void virtio_vmstate_change(void *opaque, int running, RunState state) -{ - VirtIODevice *vdev = opaque; - BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); - VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); - bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK); - vdev->vm_running = running; - - if (backend_run) { - virtio_set_status(vdev, vdev->status); - } - - if (k->vmstate_change) { - k->vmstate_change(qbus->parent, backend_run); - } - - if (!backend_run) { - virtio_set_status(vdev, vdev->status); - } -} - -Vdev's parent_bus is NULL, so qdev_get_parent_bus(DEVICE(vdev)) will crash. -virtio_vmstate_change is added to the list vm_change_state_head at -virtio_blk_device_realize(virtio_init), -but after hotplug virtio-blk failed, virtio_vmstate_change will not be removed -from vm_change_state_head. - - -I apply a patch as follews: - -diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c -index 5884ce3..ea532dc 100644 ---- a/hw/virtio/virtio.c -+++ b/hw/virtio/virtio.c -@@ -2491,6 +2491,7 @@ static void virtio_device_realize(DeviceState *dev, Error -**errp) - virtio_bus_device_plugged(vdev, &err); - if (err != NULL) { - error_propagate(errp, err); -+ vdc->unrealize(dev, NULL); - return; - } - -On Tue, Oct 31, 2017 at 05:19:08AM +0000, linzhecheng wrote: -> -I found that hotplug virtio-blk device will lead to qemu crash. -The author posted a patch in a separate email thread. Please see -"[PATCH] fix: unrealize virtio device if we fail to hotplug it". - -> -Re-production steps: -> -> -1. Run VM named vm001 -> -> -2. Create a virtio-blk.xml which contains wrong configurations: -> -<disk device="lun" rawio="yes" type="block"> -> -<driver cache="none" io="native" name="qemu" type="raw" /> -> -<source dev="/dev/mapper/11-dm" /> -> -<target bus="virtio" dev="vdx" /> -> -</disk> -> -> -3. Run command : virsh attach-device vm001 vm001 -> -> -Libvirt will return err msg: -> -> -error: Failed to attach device from blk-scsi.xml -> -> -error: internal error: unable to execute QEMU command 'device_add': Please -> -set scsi=off for virtio-blk devices in order to use virtio 1.0 -> -> -it means hotplug virtio-blk device failed. -> -> -4. Suspend or shutdown VM will leads to qemu crash -> -> -> -> -from gdb: -> -> -> -(gdb) bt -> -#0 object_get_class (address@hidden) at qom/object.c:750 -> -#1 0x00007f9a72582e01 in virtio_vmstate_change (opaque=0x7f9a73d10960, -> -running=0, state=<optimized out>) at -> -/mnt/sdb/lzc/code/open/qemu/hw/virtio/virtio.c:2203 -> -#2 0x00007f9a7261ef52 in vm_state_notify (address@hidden, address@hidden) at -> -vl.c:1685 -> -#3 0x00007f9a7252603a in do_vm_stop (state=RUN_STATE_PAUSED) at -> -/mnt/sdb/lzc/code/open/qemu/cpus.c:941 -> -#4 vm_stop (address@hidden) at /mnt/sdb/lzc/code/open/qemu/cpus.c:1807 -> -#5 0x00007f9a7262eb1b in qmp_stop (address@hidden) at qmp.c:102 -> -#6 0x00007f9a7262c70a in qmp_marshal_stop (args=<optimized out>, -> -ret=<optimized out>, errp=0x7ffe63e255d8) at qmp-marshal.c:5854 -> -#7 0x00007f9a72897e79 in do_qmp_dispatch (errp=0x7ffe63e255d0, -> -request=0x7f9a76510120, cmds=0x7f9a72ee7980 <qmp_commands>) at -> -qapi/qmp-dispatch.c:104 -> -#8 qmp_dispatch (cmds=0x7f9a72ee7980 <qmp_commands>, address@hidden) at -> -qapi/qmp-dispatch.c:131 -> -#9 0x00007f9a725288d5 in handle_qmp_command (parser=<optimized out>, -> -tokens=<optimized out>) at /mnt/sdb/lzc/code/open/qemu/monitor.c:3852 -> -#10 0x00007f9a7289d514 in json_message_process_token (lexer=0x7f9a73ce4498, -> -input=0x7f9a73cc6880, type=JSON_RCURLY, x=36, y=17) at -> -qobject/json-streamer.c:105 -> -#11 0x00007f9a728bb69b in json_lexer_feed_char (address@hidden, ch=125 '}', -> -address@hidden) at qobject/json-lexer.c:323 -> -#12 0x00007f9a728bb75e in json_lexer_feed (lexer=0x7f9a73ce4498, -> -buffer=<optimized out>, size=<optimized out>) at qobject/json-lexer.c:373 -> -#13 0x00007f9a7289d5d9 in json_message_parser_feed (parser=<optimized out>, -> -buffer=<optimized out>, size=<optimized out>) at qobject/json-streamer.c:124 -> -#14 0x00007f9a7252722e in monitor_qmp_read (opaque=<optimized out>, -> -buf=<optimized out>, size=<optimized out>) at -> -/mnt/sdb/lzc/code/open/qemu/monitor.c:3894 -> -#15 0x00007f9a7284ee1b in tcp_chr_read (chan=<optimized out>, cond=<optimized -> -out>, opaque=<optimized out>) at chardev/char-socket.c:441 -> -#16 0x00007f9a6e03e99a in g_main_context_dispatch () from -> -/usr/lib64/libglib-2.0.so.0 -> -#17 0x00007f9a728a342c in glib_pollfds_poll () at util/main-loop.c:214 -> -#18 os_host_main_loop_wait (timeout=<optimized out>) at util/main-loop.c:261 -> -#19 main_loop_wait (address@hidden) at util/main-loop.c:515 -> -#20 0x00007f9a724e7547 in main_loop () at vl.c:1999 -> -#21 main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) -> -at vl.c:4877 -> -> -Problem happens in virtio_vmstate_change which is called by vm_state_notify, -> -static void virtio_vmstate_change(void *opaque, int running, RunState state) -> -{ -> -VirtIODevice *vdev = opaque; -> -BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); -> -VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); -> -bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK); -> -vdev->vm_running = running; -> -> -if (backend_run) { -> -virtio_set_status(vdev, vdev->status); -> -} -> -> -if (k->vmstate_change) { -> -k->vmstate_change(qbus->parent, backend_run); -> -} -> -> -if (!backend_run) { -> -virtio_set_status(vdev, vdev->status); -> -} -> -} -> -> -Vdev's parent_bus is NULL, so qdev_get_parent_bus(DEVICE(vdev)) will crash. -> -virtio_vmstate_change is added to the list vm_change_state_head at -> -virtio_blk_device_realize(virtio_init), -> -but after hotplug virtio-blk failed, virtio_vmstate_change will not be -> -removed from vm_change_state_head. -> -> -> -I apply a patch as follews: -> -> -diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c -> -index 5884ce3..ea532dc 100644 -> ---- a/hw/virtio/virtio.c -> -+++ b/hw/virtio/virtio.c -> -@@ -2491,6 +2491,7 @@ static void virtio_device_realize(DeviceState *dev, -> -Error **errp) -> -virtio_bus_device_plugged(vdev, &err); -> -if (err != NULL) { -> -error_propagate(errp, err); -> -+ vdc->unrealize(dev, NULL); -> -return; -> -} -signature.asc -Description: -PGP signature - diff --git a/results/classifier/014/peripherals/57756589 b/results/classifier/014/peripherals/57756589 deleted file mode 100644 index 5891931d..00000000 --- a/results/classifier/014/peripherals/57756589 +++ /dev/null @@ -1,1448 +0,0 @@ -peripherals: 0.875 -hypervisor: 0.863 -mistranslation: 0.861 -register: 0.858 -architecture: 0.856 -device: 0.853 -vnc: 0.851 -virtual: 0.845 -permissions: 0.842 -assembly: 0.841 -performance: 0.839 -ppc: 0.838 -semantic: 0.835 -operating system: 0.835 -TCG: 0.833 -VMM: 0.833 -arm: 0.828 -boot: 0.827 -user-level: 0.826 -graphic: 0.824 -network: 0.822 -socket: 0.820 -PID: 0.819 -KVM: 0.817 -kernel: 0.817 -files: 0.816 -x86: 0.814 -alpha: 0.810 -debug: 0.803 -i386: 0.782 -risc-v: 0.755 - -[Qemu-devel] 答复: Re: 答复: Re: 答复: Re: [BUG]COLO failover hang - -amost like wikiï¼but panic in Primary Node. - - - - -setp: - -1 - -Primary Node. - -x86_64-softmmu/qemu-system-x86_64 -enable-kvm -boot c -m 2048 -smp 2 -qmp stdio --vnc :7 -name primary -cpu qemu64,+kvmclock -device piix3-usb-uhci -usb --usbdevice tablet\ - - -drive -if=virtio,id=colo-disk0,driver=quorum,read-pattern=fifo,vote-threshold=1, - - -children.0.file.filename=/mnt/sdd/pure_IMG/linux/redhat/rhel_6.5_64_2U_ide,children.0.driver=qcow2 - -S \ - - -netdev -tap,id=hn1,vhost=off,script=/etc/qemu-ifup2,downscript=/etc/qemu-ifdown2 \ - - -device e1000,id=e1,netdev=hn1,mac=52:a4:00:12:78:67 \ - - -netdev -tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown \ - - -device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66 \ - - -chardev socket,id=mirror0,host=9.61.1.8,port=9003,server,nowait -chardev -socket,id=compare1,host=9.61.1.8,port=9004,server,nowait \ - - -chardev socket,id=compare0,host=9.61.1.8,port=9001,server,nowait -chardev -socket,id=compare0-0,host=9.61.1.8,port=9001 \ - - -chardev socket,id=compare_out,host=9.61.1.8,port=9005,server,nowait \ - - -chardev socket,id=compare_out0,host=9.61.1.8,port=9005 \ - - -object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0 \ - - -object filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out --object filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0 \ - - -object -colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0 - -2 Second node: - -x86_64-softmmu/qemu-system-x86_64 -boot c -m 2048 -smp 2 -qmp stdio -vnc :7 --name secondary -enable-kvm -cpu qemu64,+kvmclock -device piix3-usb-uhci -usb --usbdevice tablet\ - - -drive -if=none,id=colo-disk0,file.filename=/mnt/sdd/pure_IMG/linux/redhat/rhel_6.5_64_2U_ide,driver=qcow2,node-name=node0 - \ - - -drive -if=virtio,id=active-disk0,driver=replication,mode=secondary,file.driver=qcow2,top-id=active-disk0,file.file.filename=/mnt/ramfstest/active_disk.img,file.backing.driver=qcow2,file.backing.file.filename=/mnt/ramfstest/hidden_disk.img,file.backing.backing=colo-disk0 - \ - - -netdev -tap,id=hn1,vhost=off,script=/etc/qemu-ifup2,downscript=/etc/qemu-ifdown2 \ - - -device e1000,id=e1,netdev=hn1,mac=52:a4:00:12:78:67 \ - - -netdev -tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown \ - - -device e1000,netdev=hn0,mac=52:a4:00:12:78:66 -chardev -socket,id=red0,host=9.61.1.8,port=9003 \ - - -chardev socket,id=red1,host=9.61.1.8,port=9004 \ - - -object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0 \ - - -object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1 \ - - -object filter-rewriter,id=rew0,netdev=hn0,queue=all -incoming tcp:0:8888 - -3 Secondary node: - -{'execute':'qmp_capabilities'} - -{ 'execute': 'nbd-server-start', - - 'arguments': {'addr': {'type': 'inet', 'data': {'host': '9.61.1.7', 'port': -'8889'} } } - -} - -{'execute': 'nbd-server-add', 'arguments': {'device': 'colo-disk0', 'writable': -true } } - -4:Primary Nodeï¼ - -{'execute':'qmp_capabilities'} - - -{ 'execute': 'human-monitor-command', - - 'arguments': {'command-line': 'drive_add -n buddy -driver=replication,mode=primary,file.driver=nbd,file.host=9.61.1.7,file.port=8889,file.export=colo-disk0,node-name=node0'}} - -{ 'execute':'x-blockdev-change', 'arguments':{'parent': 'colo-disk0', 'node': -'node0' } } - -{ 'execute': 'migrate-set-capabilities', - - 'arguments': {'capabilities': [ {'capability': 'x-colo', 'state': true } -] } } - -{ 'execute': 'migrate', 'arguments': {'uri': 'tcp:9.61.1.7:8888' } } - - - - -then can see two runing VMs, whenever you make changes to PVM, SVM will be -synced. - - - - -5ï¼Primary Nodeï¼ - -echo c ï¼ /proc/sysrq-trigger - - - - -ï¼ï¼Secondary node: - -{ 'execute': 'nbd-server-stop' } - -{ "execute": "x-colo-lost-heartbeat" } - - - - -then can see the Secondary node hang at recvmsg recvmsg . - - - - - - - - - - - - -åå§é®ä»¶ - - - -åä»¶äººï¼ address@hidden -æ¶ä»¶äººï¼ç广10165992 address@hidden -æéäººï¼ address@hidden address@hidden -æ¥ æ ï¼2017å¹´03æ21æ¥ 16:27 -主 é¢ ï¼Re: [Qemu-devel] çå¤: Re: çå¤: Re: [BUG]COLO failover hang - - - - - -Hi, - -On 2017/3/21 16:10, address@hidden wrote: -ï¼ Thank youã -ï¼ -ï¼ I have test areadyã -ï¼ -ï¼ When the Primary Node panic,the Secondary Node qemu hang at the same placeã -ï¼ -ï¼ Incorrding -http://wiki.qemu-project.org/Features/COLO -ï¼kill Primary Node qemu -will not produce the problem,but Primary Node panic canã -ï¼ -ï¼ I think due to the feature of channel does not support -QIO_CHANNEL_FEATURE_SHUTDOWN. -ï¼ -ï¼ - -Yes, you are right, when we do failover for primary/secondary VM, we will -shutdown the related -fd in case it is stuck in the read/write fd. - -It seems that you didn't follow the above introduction exactly to do the test. -Could you -share your test procedures ? Especially the commands used in the test. - -Thanks, -Hailiang - -ï¼ when failover,channel_shutdown could not shut down the channel. -ï¼ -ï¼ -ï¼ so the colo_process_incoming_thread will hang at recvmsg. -ï¼ -ï¼ -ï¼ I test a patch: -ï¼ -ï¼ -ï¼ diff --git a/migration/socket.c b/migration/socket.c -ï¼ -ï¼ -ï¼ index 13966f1..d65a0ea 100644 -ï¼ -ï¼ -ï¼ --- a/migration/socket.c -ï¼ -ï¼ -ï¼ +++ b/migration/socket.c -ï¼ -ï¼ -ï¼ @@ -147,8 +147,9 @@ static gboolean -socket_accept_incoming_migration(QIOChannel *ioc, -ï¼ -ï¼ -ï¼ } -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ trace_migration_socket_incoming_accepted() -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") -ï¼ -ï¼ -ï¼ + qio_channel_set_feature(QIO_CHANNEL(sioc), QIO_CHANNEL_FEATURE_SHUTDOWN) -ï¼ -ï¼ -ï¼ migration_channel_process_incoming(migrate_get_current(), -ï¼ -ï¼ -ï¼ QIO_CHANNEL(sioc)) -ï¼ -ï¼ -ï¼ object_unref(OBJECT(sioc)) -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ My test will not hang any more. -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ åå§é®ä»¶ -ï¼ -ï¼ -ï¼ -ï¼ åä»¶äººï¼ address@hidden -ï¼ æ¶ä»¶äººï¼ç广10165992 address@hidden -ï¼ æéäººï¼ address@hidden address@hidden -ï¼ æ¥ æ ï¼2017å¹´03æ21æ¥ 15:58 -ï¼ ä¸» é¢ ï¼Re: [Qemu-devel] çå¤: Re: [BUG]COLO failover hang -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ Hi,Wang. -ï¼ -ï¼ You can test this branch: -ï¼ -ï¼ -https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk -ï¼ -ï¼ and please follow wiki ensure your own configuration correctly. -ï¼ -ï¼ -http://wiki.qemu-project.org/Features/COLO -ï¼ -ï¼ -ï¼ Thanks -ï¼ -ï¼ Zhang Chen -ï¼ -ï¼ -ï¼ On 03/21/2017 03:27 PM, address@hidden wrote: -ï¼ ï¼ -ï¼ ï¼ hi. -ï¼ ï¼ -ï¼ ï¼ I test the git qemu master have the same problem. -ï¼ ï¼ -ï¼ ï¼ (gdb) bt -ï¼ ï¼ -ï¼ ï¼ #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, -ï¼ ï¼ niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 -ï¼ ï¼ -ï¼ ï¼ #1 0x00007f658e4aa0c2 in qio_channel_read -ï¼ ï¼ (address@hidden, address@hidden "", -ï¼ ï¼ address@hidden, address@hidden) at io/channel.c:114 -ï¼ ï¼ -ï¼ ï¼ #2 0x00007f658e3ea990 in channel_get_buffer (opaque=ï¼optimized outï¼, -ï¼ ï¼ buf=0x7f65907cb838 "", pos=ï¼optimized outï¼, size=32768) at -ï¼ ï¼ migration/qemu-file-channel.c:78 -ï¼ ï¼ -ï¼ ï¼ #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at -ï¼ ï¼ migration/qemu-file.c:295 -ï¼ ï¼ -ï¼ ï¼ #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, -ï¼ ï¼ address@hidden) at migration/qemu-file.c:555 -ï¼ ï¼ -ï¼ ï¼ #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at -ï¼ ï¼ migration/qemu-file.c:568 -ï¼ ï¼ -ï¼ ï¼ #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at -ï¼ ï¼ migration/qemu-file.c:648 -ï¼ ï¼ -ï¼ ï¼ #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, -ï¼ ï¼ address@hidden) at migration/colo.c:244 -ï¼ ï¼ -ï¼ ï¼ #8 0x00007f658e3e681e in colo_receive_check_message (f=ï¼optimized -ï¼ ï¼ outï¼, address@hidden, -ï¼ ï¼ address@hidden) -ï¼ ï¼ -ï¼ ï¼ at migration/colo.c:264 -ï¼ ï¼ -ï¼ ï¼ #9 0x00007f658e3e740e in colo_process_incoming_thread -ï¼ ï¼ (opaque=0x7f658eb30360 ï¼mis_current.31286ï¼) at migration/colo.c:577 -ï¼ ï¼ -ï¼ ï¼ #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 -ï¼ ï¼ -ï¼ ï¼ #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 -ï¼ ï¼ -ï¼ ï¼ (gdb) p ioc-ï¼name -ï¼ ï¼ -ï¼ ï¼ $2 = 0x7f658ff7d5c0 "migration-socket-incoming" -ï¼ ï¼ -ï¼ ï¼ (gdb) p ioc-ï¼features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN -ï¼ ï¼ -ï¼ ï¼ $3 = 0 -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ (gdb) bt -ï¼ ï¼ -ï¼ ï¼ #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, -ï¼ ï¼ condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 -ï¼ ï¼ -ï¼ ï¼ #1 0x00007fdcc6966350 in g_main_dispatch (context=ï¼optimized outï¼) at -ï¼ ï¼ gmain.c:3054 -ï¼ ï¼ -ï¼ ï¼ #2 g_main_context_dispatch (context=ï¼optimized outï¼, -ï¼ ï¼ address@hidden) at gmain.c:3630 -ï¼ ï¼ -ï¼ ï¼ #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 -ï¼ ï¼ -ï¼ ï¼ #4 os_host_main_loop_wait (timeout=ï¼optimized outï¼) at -ï¼ ï¼ util/main-loop.c:258 -ï¼ ï¼ -ï¼ ï¼ #5 main_loop_wait (address@hidden) at -ï¼ ï¼ util/main-loop.c:506 -ï¼ ï¼ -ï¼ ï¼ #6 0x00007fdccb526187 in main_loop () at vl.c:1898 -ï¼ ï¼ -ï¼ ï¼ #7 main (argc=ï¼optimized outï¼, argv=ï¼optimized outï¼, envp=ï¼optimized -ï¼ ï¼ outï¼) at vl.c:4709 -ï¼ ï¼ -ï¼ ï¼ (gdb) p ioc-ï¼features -ï¼ ï¼ -ï¼ ï¼ $1 = 6 -ï¼ ï¼ -ï¼ ï¼ (gdb) p ioc-ï¼name -ï¼ ï¼ -ï¼ ï¼ $2 = 0x7fdcce1b1ab0 "migration-socket-listener" -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ May be socket_accept_incoming_migration should -ï¼ ï¼ call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ thank you. -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ åå§é®ä»¶ -ï¼ ï¼ address@hidden -ï¼ ï¼ address@hidden -ï¼ ï¼ address@hidden@huawei.comï¼ -ï¼ ï¼ *æ¥ æ ï¼*2017å¹´03æ16æ¥ 14:46 -ï¼ ï¼ *主 é¢ ï¼**Re: [Qemu-devel] COLO failover hang* -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ On 03/15/2017 05:06 PM, wangguang wrote: -ï¼ ï¼ ï¼ am testing QEMU COLO feature described here [QEMU -ï¼ ï¼ ï¼ Wiki]( -http://wiki.qemu-project.org/Features/COLO -). -ï¼ ï¼ ï¼ -ï¼ ï¼ ï¼ When the Primary Node panic,the Secondary Node qemu hang. -ï¼ ï¼ ï¼ hang at recvmsg in qio_channel_socket_readv. -ï¼ ï¼ ï¼ And I run { 'execute': 'nbd-server-stop' } and { "execute": -ï¼ ï¼ ï¼ "x-colo-lost-heartbeat" } in Secondary VM's -ï¼ ï¼ ï¼ monitor,the Secondary Node qemu still hang at recvmsg . -ï¼ ï¼ ï¼ -ï¼ ï¼ ï¼ I found that the colo in qemu is not complete yet. -ï¼ ï¼ ï¼ Do the colo have any plan for development? -ï¼ ï¼ -ï¼ ï¼ Yes, We are developing. You can see some of patch we pushing. -ï¼ ï¼ -ï¼ ï¼ ï¼ Has anyone ever run it successfully? Any help is appreciated! -ï¼ ï¼ -ï¼ ï¼ In our internal version can run it successfully, -ï¼ ï¼ The failover detail you can ask Zhanghailiang for help. -ï¼ ï¼ Next time if you have some question about COLO, -ï¼ ï¼ please cc me and zhanghailiang address@hidden -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ Thanks -ï¼ ï¼ Zhang Chen -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ ï¼ -ï¼ ï¼ ï¼ -ï¼ ï¼ ï¼ -ï¼ ï¼ ï¼ centos7.2+qemu2.7.50 -ï¼ ï¼ ï¼ (gdb) bt -ï¼ ï¼ ï¼ #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 -ï¼ ï¼ ï¼ #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=ï¼optimized outï¼, -ï¼ ï¼ ï¼ iov=ï¼optimized outï¼, niov=ï¼optimized outï¼, fds=0x0, nfds=0x0, errp=0x0) at -ï¼ ï¼ ï¼ io/channel-socket.c:497 -ï¼ ï¼ ï¼ #2 0x00007f3e03329472 in qio_channel_read (address@hidden, -ï¼ ï¼ ï¼ address@hidden "", address@hidden, -ï¼ ï¼ ï¼ address@hidden) at io/channel.c:97 -ï¼ ï¼ ï¼ #3 0x00007f3e032750e0 in channel_get_buffer (opaque=ï¼optimized outï¼, -ï¼ ï¼ ï¼ buf=0x7f3e05910f38 "", pos=ï¼optimized outï¼, size=32768) at -ï¼ ï¼ ï¼ migration/qemu-file-channel.c:78 -ï¼ ï¼ ï¼ #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at -ï¼ ï¼ ï¼ migration/qemu-file.c:257 -ï¼ ï¼ ï¼ #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, -ï¼ ï¼ ï¼ address@hidden) at migration/qemu-file.c:510 -ï¼ ï¼ ï¼ #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at -ï¼ ï¼ ï¼ migration/qemu-file.c:523 -ï¼ ï¼ ï¼ #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at -ï¼ ï¼ ï¼ migration/qemu-file.c:603 -ï¼ ï¼ ï¼ #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, -ï¼ ï¼ ï¼ address@hidden) at migration/colo..c:215 -ï¼ ï¼ ï¼ #9 0x00007f3e0327250d in colo_wait_handle_message (errp=0x7f3d62bfaa48, -ï¼ ï¼ ï¼ checkpoint_request=ï¼synthetic pointerï¼, f=ï¼optimized outï¼) at -ï¼ ï¼ ï¼ migration/colo.c:546 -ï¼ ï¼ ï¼ #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at -ï¼ ï¼ ï¼ migration/colo.c:649 -ï¼ ï¼ ï¼ #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 -ï¼ ï¼ ï¼ #12 0x00007f3dfc9c03ed in clone () from /lib64/libc.so.6 -ï¼ ï¼ ï¼ -ï¼ ï¼ ï¼ -ï¼ ï¼ ï¼ -ï¼ ï¼ ï¼ -ï¼ ï¼ ï¼ -ï¼ ï¼ ï¼ -- -ï¼ ï¼ ï¼ View this message in context: -http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html -ï¼ ï¼ ï¼ Sent from the Developer mailing list archive at Nabble.com. -ï¼ ï¼ ï¼ -ï¼ ï¼ ï¼ -ï¼ ï¼ ï¼ -ï¼ ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -- -ï¼ ï¼ Thanks -ï¼ ï¼ Zhang Chen -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ - -diff --git a/migration/socket.c b/migration/socket.c - - -index 13966f1..d65a0ea 100644 - - ---- a/migration/socket.c - - -+++ b/migration/socket.c - - -@@ -147,8 +147,9 @@ static gboolean socket_accept_incoming_migration(QIOChannel -*ioc, - - - } - - - - - - trace_migration_socket_incoming_accepted() - - - - - - qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") - - -+ qio_channel_set_feature(QIO_CHANNEL(sioc), QIO_CHANNEL_FEATURE_SHUTDOWN) - - - migration_channel_process_incoming(migrate_get_current(), - - - QIO_CHANNEL(sioc)) - - - object_unref(OBJECT(sioc)) - - - - -Is this patch ok? - -I have test it . The test could not hang any more. - - - - - - - - - - - - -åå§é®ä»¶ - - - -åä»¶äººï¼ address@hidden -æ¶ä»¶äººï¼ address@hidden address@hidden -æéäººï¼ address@hidden address@hidden address@hidden -æ¥ æ ï¼2017å¹´03æ22æ¥ 09:11 -主 é¢ ï¼Re: [Qemu-devel] çå¤: Re: çå¤: Re: [BUG]COLO failover hang - - - - - -On 2017/3/21 19:56, Dr. David Alan Gilbert wrote: -ï¼ * Hailiang Zhang (address@hidden) wrote: -ï¼ï¼ Hi, -ï¼ï¼ -ï¼ï¼ Thanks for reporting this, and i confirmed it in my test, and it is a bug. -ï¼ï¼ -ï¼ï¼ Though we tried to call qemu_file_shutdown() to shutdown the related fd, in -ï¼ï¼ case COLO thread/incoming thread is stuck in read/write() while do failover, -ï¼ï¼ but it didn't take effect, because all the fd used by COLO (also migration) -ï¼ï¼ has been wrapped by qio channel, and it will not call the shutdown API if -ï¼ï¼ we didn't qio_channel_set_feature(QIO_CHANNEL(sioc), -QIO_CHANNEL_FEATURE_SHUTDOWN). -ï¼ï¼ -ï¼ï¼ Cc: Dr. David Alan Gilbert address@hidden -ï¼ï¼ -ï¼ï¼ I doubted migration cancel has the same problem, it may be stuck in write() -ï¼ï¼ if we tried to cancel migration. -ï¼ï¼ -ï¼ï¼ void fd_start_outgoing_migration(MigrationState *s, const char *fdname, -Error **errp) -ï¼ï¼ { -ï¼ï¼ qio_channel_set_name(QIO_CHANNEL(ioc), "migration-fd-outgoing") -ï¼ï¼ migration_channel_connect(s, ioc, NULL) -ï¼ï¼ ... ... -ï¼ï¼ We didn't call qio_channel_set_feature(QIO_CHANNEL(sioc), -QIO_CHANNEL_FEATURE_SHUTDOWN) above, -ï¼ï¼ and the -ï¼ï¼ migrate_fd_cancel() -ï¼ï¼ { -ï¼ï¼ ... ... -ï¼ï¼ if (s-ï¼state == MIGRATION_STATUS_CANCELLING && f) { -ï¼ï¼ qemu_file_shutdown(f) --ï¼ This will not take effect. No ? -ï¼ï¼ } -ï¼ï¼ } -ï¼ -ï¼ (cc'd in Daniel Berrange). -ï¼ I see that we call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN) -at the -ï¼ top of qio_channel_socket_new so I think that's safe isn't it? -ï¼ - -Hmm, you are right, this problem is only exist for the migration incoming fd, -thanks. - -ï¼ Dave -ï¼ -ï¼ï¼ Thanks, -ï¼ï¼ Hailiang -ï¼ï¼ -ï¼ï¼ On 2017/3/21 16:10, address@hidden wrote: -ï¼ï¼ï¼ Thank youã -ï¼ï¼ï¼ -ï¼ï¼ï¼ I have test areadyã -ï¼ï¼ï¼ -ï¼ï¼ï¼ When the Primary Node panic,the Secondary Node qemu hang at the same placeã -ï¼ï¼ï¼ -ï¼ï¼ï¼ Incorrding -http://wiki.qemu-project.org/Features/COLO -ï¼kill Primary Node -qemu will not produce the problem,but Primary Node panic canã -ï¼ï¼ï¼ -ï¼ï¼ï¼ I think due to the feature of channel does not support -QIO_CHANNEL_FEATURE_SHUTDOWN. -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ when failover,channel_shutdown could not shut down the channel. -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ so the colo_process_incoming_thread will hang at recvmsg. -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ I test a patch: -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ diff --git a/migration/socket.c b/migration/socket.c -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ index 13966f1..d65a0ea 100644 -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ --- a/migration/socket.c -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ +++ b/migration/socket.c -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ @@ -147,8 +147,9 @@ static gboolean -socket_accept_incoming_migration(QIOChannel *ioc, -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ } -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ trace_migration_socket_incoming_accepted() -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ + qio_channel_set_feature(QIO_CHANNEL(sioc), -QIO_CHANNEL_FEATURE_SHUTDOWN) -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ migration_channel_process_incoming(migrate_get_current(), -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ QIO_CHANNEL(sioc)) -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ object_unref(OBJECT(sioc)) -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ My test will not hang any more. -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ åå§é®ä»¶ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ åä»¶äººï¼ address@hidden -ï¼ï¼ï¼ æ¶ä»¶äººï¼ç广10165992 address@hidden -ï¼ï¼ï¼ æéäººï¼ address@hidden address@hidden -ï¼ï¼ï¼ æ¥ æ ï¼2017å¹´03æ21æ¥ 15:58 -ï¼ï¼ï¼ 主 é¢ ï¼Re: [Qemu-devel] çå¤: Re: [BUG]COLO failover hang -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ Hi,Wang. -ï¼ï¼ï¼ -ï¼ï¼ï¼ You can test this branch: -ï¼ï¼ï¼ -ï¼ï¼ï¼ -https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk -ï¼ï¼ï¼ -ï¼ï¼ï¼ and please follow wiki ensure your own configuration correctly. -ï¼ï¼ï¼ -ï¼ï¼ï¼ -http://wiki.qemu-project.org/Features/COLO -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ Thanks -ï¼ï¼ï¼ -ï¼ï¼ï¼ Zhang Chen -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ On 03/21/2017 03:27 PM, address@hidden wrote: -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ hi. -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ I test the git qemu master have the same problem. -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ (gdb) bt -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, -ï¼ï¼ï¼ ï¼ niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #1 0x00007f658e4aa0c2 in qio_channel_read -ï¼ï¼ï¼ ï¼ (address@hidden, address@hidden "", -ï¼ï¼ï¼ ï¼ address@hidden, address@hidden) at io/channel.c:114 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #2 0x00007f658e3ea990 in channel_get_buffer (opaque=ï¼optimized outï¼, -ï¼ï¼ï¼ ï¼ buf=0x7f65907cb838 "", pos=ï¼optimized outï¼, size=32768) at -ï¼ï¼ï¼ ï¼ migration/qemu-file-channel.c:78 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at -ï¼ï¼ï¼ ï¼ migration/qemu-file.c:295 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, -ï¼ï¼ï¼ ï¼ address@hidden) at migration/qemu-file.c:555 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at -ï¼ï¼ï¼ ï¼ migration/qemu-file.c:568 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at -ï¼ï¼ï¼ ï¼ migration/qemu-file.c:648 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, -ï¼ï¼ï¼ ï¼ address@hidden) at migration/colo.c:244 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #8 0x00007f658e3e681e in colo_receive_check_message (f=ï¼optimized -ï¼ï¼ï¼ ï¼ outï¼, address@hidden, -ï¼ï¼ï¼ ï¼ address@hidden) -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ at migration/colo.c:264 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #9 0x00007f658e3e740e in colo_process_incoming_thread -ï¼ï¼ï¼ ï¼ (opaque=0x7f658eb30360 ï¼mis_current.31286ï¼) at migration/colo.c:577 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ (gdb) p ioc-ï¼name -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ $2 = 0x7f658ff7d5c0 "migration-socket-incoming" -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ (gdb) p ioc-ï¼features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ $3 = 0 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ (gdb) bt -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, -ï¼ï¼ï¼ ï¼ condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #1 0x00007fdcc6966350 in g_main_dispatch (context=ï¼optimized outï¼) at -ï¼ï¼ï¼ ï¼ gmain.c:3054 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #2 g_main_context_dispatch (context=ï¼optimized outï¼, -ï¼ï¼ï¼ ï¼ address@hidden) at gmain.c:3630 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #4 os_host_main_loop_wait (timeout=ï¼optimized outï¼) at -ï¼ï¼ï¼ ï¼ util/main-loop.c:258 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #5 main_loop_wait (address@hidden) at -ï¼ï¼ï¼ ï¼ util/main-loop.c:506 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #6 0x00007fdccb526187 in main_loop () at vl.c:1898 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #7 main (argc=ï¼optimized outï¼, argv=ï¼optimized outï¼, envp=ï¼optimized -ï¼ï¼ï¼ ï¼ outï¼) at vl.c:4709 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ (gdb) p ioc-ï¼features -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ $1 = 6 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ (gdb) p ioc-ï¼name -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ $2 = 0x7fdcce1b1ab0 "migration-socket-listener" -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ May be socket_accept_incoming_migration should -ï¼ï¼ï¼ ï¼ call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ thank you. -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ åå§é®ä»¶ -ï¼ï¼ï¼ ï¼ address@hidden -ï¼ï¼ï¼ ï¼ address@hidden -ï¼ï¼ï¼ ï¼ address@hidden@huawei.comï¼ -ï¼ï¼ï¼ ï¼ *æ¥ æ ï¼*2017å¹´03æ16æ¥ 14:46 -ï¼ï¼ï¼ ï¼ *主 é¢ ï¼**Re: [Qemu-devel] COLO failover hang* -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ On 03/15/2017 05:06 PM, wangguang wrote: -ï¼ï¼ï¼ ï¼ ï¼ am testing QEMU COLO feature described here [QEMU -ï¼ï¼ï¼ ï¼ ï¼ Wiki]( -http://wiki.qemu-project.org/Features/COLO -). -ï¼ï¼ï¼ ï¼ ï¼ -ï¼ï¼ï¼ ï¼ ï¼ When the Primary Node panic,the Secondary Node qemu hang. -ï¼ï¼ï¼ ï¼ ï¼ hang at recvmsg in qio_channel_socket_readv. -ï¼ï¼ï¼ ï¼ ï¼ And I run { 'execute': 'nbd-server-stop' } and { "execute": -ï¼ï¼ï¼ ï¼ ï¼ "x-colo-lost-heartbeat" } in Secondary VM's -ï¼ï¼ï¼ ï¼ ï¼ monitor,the Secondary Node qemu still hang at recvmsg . -ï¼ï¼ï¼ ï¼ ï¼ -ï¼ï¼ï¼ ï¼ ï¼ I found that the colo in qemu is not complete yet. -ï¼ï¼ï¼ ï¼ ï¼ Do the colo have any plan for development? -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ Yes, We are developing. You can see some of patch we pushing. -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ ï¼ Has anyone ever run it successfully? Any help is appreciated! -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ In our internal version can run it successfully, -ï¼ï¼ï¼ ï¼ The failover detail you can ask Zhanghailiang for help. -ï¼ï¼ï¼ ï¼ Next time if you have some question about COLO, -ï¼ï¼ï¼ ï¼ please cc me and zhanghailiang address@hidden -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ Thanks -ï¼ï¼ï¼ ï¼ Zhang Chen -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ ï¼ -ï¼ï¼ï¼ ï¼ ï¼ -ï¼ï¼ï¼ ï¼ ï¼ -ï¼ï¼ï¼ ï¼ ï¼ centos7.2+qemu2.7.50 -ï¼ï¼ï¼ ï¼ ï¼ (gdb) bt -ï¼ï¼ï¼ ï¼ ï¼ #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 -ï¼ï¼ï¼ ï¼ ï¼ #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=ï¼optimized outï¼, -ï¼ï¼ï¼ ï¼ ï¼ iov=ï¼optimized outï¼, niov=ï¼optimized outï¼, fds=0x0, nfds=0x0, errp=0x0) -at -ï¼ï¼ï¼ ï¼ ï¼ io/channel-socket.c:497 -ï¼ï¼ï¼ ï¼ ï¼ #2 0x00007f3e03329472 in qio_channel_read (address@hidden, -ï¼ï¼ï¼ ï¼ ï¼ address@hidden "", address@hidden, -ï¼ï¼ï¼ ï¼ ï¼ address@hidden) at io/channel.c:97 -ï¼ï¼ï¼ ï¼ ï¼ #3 0x00007f3e032750e0 in channel_get_buffer (opaque=ï¼optimized outï¼, -ï¼ï¼ï¼ ï¼ ï¼ buf=0x7f3e05910f38 "", pos=ï¼optimized outï¼, size=32768) at -ï¼ï¼ï¼ ï¼ ï¼ migration/qemu-file-channel.c:78 -ï¼ï¼ï¼ ï¼ ï¼ #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at -ï¼ï¼ï¼ ï¼ ï¼ migration/qemu-file.c:257 -ï¼ï¼ï¼ ï¼ ï¼ #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, -ï¼ï¼ï¼ ï¼ ï¼ address@hidden) at migration/qemu-file.c:510 -ï¼ï¼ï¼ ï¼ ï¼ #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at -ï¼ï¼ï¼ ï¼ ï¼ migration/qemu-file.c:523 -ï¼ï¼ï¼ ï¼ ï¼ #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at -ï¼ï¼ï¼ ï¼ ï¼ migration/qemu-file.c:603 -ï¼ï¼ï¼ ï¼ ï¼ #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, -ï¼ï¼ï¼ ï¼ ï¼ address@hidden) at migration/colo.c:215 -ï¼ï¼ï¼ ï¼ ï¼ #9 0x00007f3e0327250d in colo_wait_handle_message (errp=0x7f3d62bfaa48, -ï¼ï¼ï¼ ï¼ ï¼ checkpoint_request=ï¼synthetic pointerï¼, f=ï¼optimized outï¼) at -ï¼ï¼ï¼ ï¼ ï¼ migration/colo.c:546 -ï¼ï¼ï¼ ï¼ ï¼ #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at -ï¼ï¼ï¼ ï¼ ï¼ migration/colo.c:649 -ï¼ï¼ï¼ ï¼ ï¼ #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 -ï¼ï¼ï¼ ï¼ ï¼ #12 0x00007f3dfc9c03ed in clone () from /lib64/libc..so.6 -ï¼ï¼ï¼ ï¼ ï¼ -ï¼ï¼ï¼ ï¼ ï¼ -ï¼ï¼ï¼ ï¼ ï¼ -ï¼ï¼ï¼ ï¼ ï¼ -ï¼ï¼ï¼ ï¼ ï¼ -ï¼ï¼ï¼ ï¼ ï¼ -- -ï¼ï¼ï¼ ï¼ ï¼ View this message in context: -http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html -ï¼ï¼ï¼ ï¼ ï¼ Sent from the Developer mailing list archive at Nabble.com. -ï¼ï¼ï¼ ï¼ ï¼ -ï¼ï¼ï¼ ï¼ ï¼ -ï¼ï¼ï¼ ï¼ ï¼ -ï¼ï¼ï¼ ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -- -ï¼ï¼ï¼ ï¼ Thanks -ï¼ï¼ï¼ ï¼ Zhang Chen -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ -ï¼ï¼ -ï¼ -- -ï¼ Dr. David Alan Gilbert / address@hidden / Manchester, UK -ï¼ -ï¼ . -ï¼ - -Hi, - -On 2017/3/22 9:42, address@hidden wrote: -diff --git a/migration/socket.c b/migration/socket.c - - -index 13966f1..d65a0ea 100644 - - ---- a/migration/socket.c - - -+++ b/migration/socket.c - - -@@ -147,8 +147,9 @@ static gboolean socket_accept_incoming_migration(QIOChannel -*ioc, - - - } - - - - - - trace_migration_socket_incoming_accepted() - - - - - - qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") - - -+ qio_channel_set_feature(QIO_CHANNEL(sioc), QIO_CHANNEL_FEATURE_SHUTDOWN) - - - migration_channel_process_incoming(migrate_get_current(), - - - QIO_CHANNEL(sioc)) - - - object_unref(OBJECT(sioc)) - - - - -Is this patch ok? -Yes, i think this works, but a better way maybe to call -qio_channel_set_feature() -in qio_channel_socket_accept(), we didn't set the SHUTDOWN feature for the -socket accept fd, -Or fix it by this: - -diff --git a/io/channel-socket.c b/io/channel-socket.c -index f546c68..ce6894c 100644 ---- a/io/channel-socket.c -+++ b/io/channel-socket.c -@@ -330,9 +330,8 @@ qio_channel_socket_accept(QIOChannelSocket *ioc, - Error **errp) - { - QIOChannelSocket *cioc; -- -- cioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET)); -- cioc->fd = -1; -+ -+ cioc = qio_channel_socket_new(); - cioc->remoteAddrLen = sizeof(ioc->remoteAddr); - cioc->localAddrLen = sizeof(ioc->localAddr); - - -Thanks, -Hailiang -I have test it . The test could not hang any more. - - - - - - - - - - - - -åå§é®ä»¶ - - - -åä»¶äººï¼ address@hidden -æ¶ä»¶äººï¼ address@hidden address@hidden -æéäººï¼ address@hidden address@hidden address@hidden -æ¥ æ ï¼2017å¹´03æ22æ¥ 09:11 -主 é¢ ï¼Re: [Qemu-devel] çå¤: Re: çå¤: Re: [BUG]COLO failover hang - - - - - -On 2017/3/21 19:56, Dr. David Alan Gilbert wrote: -ï¼ * Hailiang Zhang (address@hidden) wrote: -ï¼ï¼ Hi, -ï¼ï¼ -ï¼ï¼ Thanks for reporting this, and i confirmed it in my test, and it is a bug. -ï¼ï¼ -ï¼ï¼ Though we tried to call qemu_file_shutdown() to shutdown the related fd, in -ï¼ï¼ case COLO thread/incoming thread is stuck in read/write() while do failover, -ï¼ï¼ but it didn't take effect, because all the fd used by COLO (also migration) -ï¼ï¼ has been wrapped by qio channel, and it will not call the shutdown API if -ï¼ï¼ we didn't qio_channel_set_feature(QIO_CHANNEL(sioc), -QIO_CHANNEL_FEATURE_SHUTDOWN). -ï¼ï¼ -ï¼ï¼ Cc: Dr. David Alan Gilbert address@hidden -ï¼ï¼ -ï¼ï¼ I doubted migration cancel has the same problem, it may be stuck in write() -ï¼ï¼ if we tried to cancel migration. -ï¼ï¼ -ï¼ï¼ void fd_start_outgoing_migration(MigrationState *s, const char *fdname, -Error **errp) -ï¼ï¼ { -ï¼ï¼ qio_channel_set_name(QIO_CHANNEL(ioc), "migration-fd-outgoing") -ï¼ï¼ migration_channel_connect(s, ioc, NULL) -ï¼ï¼ ... ... -ï¼ï¼ We didn't call qio_channel_set_feature(QIO_CHANNEL(sioc), -QIO_CHANNEL_FEATURE_SHUTDOWN) above, -ï¼ï¼ and the -ï¼ï¼ migrate_fd_cancel() -ï¼ï¼ { -ï¼ï¼ ... ... -ï¼ï¼ if (s-ï¼state == MIGRATION_STATUS_CANCELLING && f) { -ï¼ï¼ qemu_file_shutdown(f) --ï¼ This will not take effect. No ? -ï¼ï¼ } -ï¼ï¼ } -ï¼ -ï¼ (cc'd in Daniel Berrange). -ï¼ I see that we call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN) -at the -ï¼ top of qio_channel_socket_new so I think that's safe isn't it? -ï¼ - -Hmm, you are right, this problem is only exist for the migration incoming fd, -thanks. - -ï¼ Dave -ï¼ -ï¼ï¼ Thanks, -ï¼ï¼ Hailiang -ï¼ï¼ -ï¼ï¼ On 2017/3/21 16:10, address@hidden wrote: -ï¼ï¼ï¼ Thank youã -ï¼ï¼ï¼ -ï¼ï¼ï¼ I have test areadyã -ï¼ï¼ï¼ -ï¼ï¼ï¼ When the Primary Node panic,the Secondary Node qemu hang at the same placeã -ï¼ï¼ï¼ -ï¼ï¼ï¼ Incorrding -http://wiki.qemu-project.org/Features/COLO -ï¼kill Primary Node -qemu will not produce the problem,but Primary Node panic canã -ï¼ï¼ï¼ -ï¼ï¼ï¼ I think due to the feature of channel does not support -QIO_CHANNEL_FEATURE_SHUTDOWN. -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ when failover,channel_shutdown could not shut down the channel. -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ so the colo_process_incoming_thread will hang at recvmsg. -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ I test a patch: -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ diff --git a/migration/socket.c b/migration/socket.c -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ index 13966f1..d65a0ea 100644 -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ --- a/migration/socket.c -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ +++ b/migration/socket.c -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ @@ -147,8 +147,9 @@ static gboolean -socket_accept_incoming_migration(QIOChannel *ioc, -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ } -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ trace_migration_socket_incoming_accepted() -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ + qio_channel_set_feature(QIO_CHANNEL(sioc), -QIO_CHANNEL_FEATURE_SHUTDOWN) -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ migration_channel_process_incoming(migrate_get_current(), -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ QIO_CHANNEL(sioc)) -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ object_unref(OBJECT(sioc)) -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ My test will not hang any more. -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ åå§é®ä»¶ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ åä»¶äººï¼ address@hidden -ï¼ï¼ï¼ æ¶ä»¶äººï¼ç广10165992 address@hidden -ï¼ï¼ï¼ æéäººï¼ address@hidden address@hidden -ï¼ï¼ï¼ æ¥ æ ï¼2017å¹´03æ21æ¥ 15:58 -ï¼ï¼ï¼ 主 é¢ ï¼Re: [Qemu-devel] çå¤: Re: [BUG]COLO failover hang -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ Hi,Wang. -ï¼ï¼ï¼ -ï¼ï¼ï¼ You can test this branch: -ï¼ï¼ï¼ -ï¼ï¼ï¼ -https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk -ï¼ï¼ï¼ -ï¼ï¼ï¼ and please follow wiki ensure your own configuration correctly. -ï¼ï¼ï¼ -ï¼ï¼ï¼ -http://wiki.qemu-project.org/Features/COLO -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ Thanks -ï¼ï¼ï¼ -ï¼ï¼ï¼ Zhang Chen -ï¼ï¼ï¼ -ï¼ï¼ï¼ -ï¼ï¼ï¼ On 03/21/2017 03:27 PM, address@hidden wrote: -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ hi. -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ I test the git qemu master have the same problem. -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ (gdb) bt -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, -ï¼ï¼ï¼ ï¼ niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #1 0x00007f658e4aa0c2 in qio_channel_read -ï¼ï¼ï¼ ï¼ (address@hidden, address@hidden "", -ï¼ï¼ï¼ ï¼ address@hidden, address@hidden) at io/channel.c:114 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #2 0x00007f658e3ea990 in channel_get_buffer (opaque=ï¼optimized outï¼, -ï¼ï¼ï¼ ï¼ buf=0x7f65907cb838 "", pos=ï¼optimized outï¼, size=32768) at -ï¼ï¼ï¼ ï¼ migration/qemu-file-channel.c:78 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at -ï¼ï¼ï¼ ï¼ migration/qemu-file.c:295 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, -ï¼ï¼ï¼ ï¼ address@hidden) at migration/qemu-file.c:555 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at -ï¼ï¼ï¼ ï¼ migration/qemu-file.c:568 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at -ï¼ï¼ï¼ ï¼ migration/qemu-file.c:648 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, -ï¼ï¼ï¼ ï¼ address@hidden) at migration/colo.c:244 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #8 0x00007f658e3e681e in colo_receive_check_message (f=ï¼optimized -ï¼ï¼ï¼ ï¼ outï¼, address@hidden, -ï¼ï¼ï¼ ï¼ address@hidden) -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ at migration/colo.c:264 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #9 0x00007f658e3e740e in colo_process_incoming_thread -ï¼ï¼ï¼ ï¼ (opaque=0x7f658eb30360 ï¼mis_current.31286ï¼) at migration/colo.c:577 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ (gdb) p ioc-ï¼name -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ $2 = 0x7f658ff7d5c0 "migration-socket-incoming" -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ (gdb) p ioc-ï¼features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ $3 = 0 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ (gdb) bt -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, -ï¼ï¼ï¼ ï¼ condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #1 0x00007fdcc6966350 in g_main_dispatch (context=ï¼optimized outï¼) at -ï¼ï¼ï¼ ï¼ gmain.c:3054 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #2 g_main_context_dispatch (context=ï¼optimized outï¼, -ï¼ï¼ï¼ ï¼ address@hidden) at gmain.c:3630 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #4 os_host_main_loop_wait (timeout=ï¼optimized outï¼) at -ï¼ï¼ï¼ ï¼ util/main-loop.c:258 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #5 main_loop_wait (address@hidden) at -ï¼ï¼ï¼ ï¼ util/main-loop.c:506 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #6 0x00007fdccb526187 in main_loop () at vl.c:1898 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ #7 main (argc=ï¼optimized outï¼, argv=ï¼optimized outï¼, envp=ï¼optimized -ï¼ï¼ï¼ ï¼ outï¼) at vl.c:4709 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ (gdb) p ioc-ï¼features -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ $1 = 6 -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ (gdb) p ioc-ï¼name -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ $2 = 0x7fdcce1b1ab0 "migration-socket-listener" -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ May be socket_accept_incoming_migration should -ï¼ï¼ï¼ ï¼ call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ thank you. -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ åå§é®ä»¶ -ï¼ï¼ï¼ ï¼ address@hidden -ï¼ï¼ï¼ ï¼ address@hidden -ï¼ï¼ï¼ ï¼ address@hidden@huawei.comï¼ -ï¼ï¼ï¼ ï¼ *æ¥ æ ï¼*2017å¹´03æ16æ¥ 14:46 -ï¼ï¼ï¼ ï¼ *主 é¢ ï¼**Re: [Qemu-devel] COLO failover hang* -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ On 03/15/2017 05:06 PM, wangguang wrote: -ï¼ï¼ï¼ ï¼ ï¼ am testing QEMU COLO feature described here [QEMU -ï¼ï¼ï¼ ï¼ ï¼ Wiki]( -http://wiki.qemu-project.org/Features/COLO -). -ï¼ï¼ï¼ ï¼ ï¼ -ï¼ï¼ï¼ ï¼ ï¼ When the Primary Node panic,the Secondary Node qemu hang. -ï¼ï¼ï¼ ï¼ ï¼ hang at recvmsg in qio_channel_socket_readv. -ï¼ï¼ï¼ ï¼ ï¼ And I run { 'execute': 'nbd-server-stop' } and { "execute": -ï¼ï¼ï¼ ï¼ ï¼ "x-colo-lost-heartbeat" } in Secondary VM's -ï¼ï¼ï¼ ï¼ ï¼ monitor,the Secondary Node qemu still hang at recvmsg . -ï¼ï¼ï¼ ï¼ ï¼ -ï¼ï¼ï¼ ï¼ ï¼ I found that the colo in qemu is not complete yet. -ï¼ï¼ï¼ ï¼ ï¼ Do the colo have any plan for development? -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ Yes, We are developing. You can see some of patch we pushing. -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ ï¼ Has anyone ever run it successfully? Any help is appreciated! -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ In our internal version can run it successfully, -ï¼ï¼ï¼ ï¼ The failover detail you can ask Zhanghailiang for help. -ï¼ï¼ï¼ ï¼ Next time if you have some question about COLO, -ï¼ï¼ï¼ ï¼ please cc me and zhanghailiang address@hidden -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ Thanks -ï¼ï¼ï¼ ï¼ Zhang Chen -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ ï¼ -ï¼ï¼ï¼ ï¼ ï¼ -ï¼ï¼ï¼ ï¼ ï¼ -ï¼ï¼ï¼ ï¼ ï¼ centos7.2+qemu2.7.50 -ï¼ï¼ï¼ ï¼ ï¼ (gdb) bt -ï¼ï¼ï¼ ï¼ ï¼ #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 -ï¼ï¼ï¼ ï¼ ï¼ #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=ï¼optimized outï¼, -ï¼ï¼ï¼ ï¼ ï¼ iov=ï¼optimized outï¼, niov=ï¼optimized outï¼, fds=0x0, nfds=0x0, errp=0x0) -at -ï¼ï¼ï¼ ï¼ ï¼ io/channel-socket.c:497 -ï¼ï¼ï¼ ï¼ ï¼ #2 0x00007f3e03329472 in qio_channel_read (address@hidden, -ï¼ï¼ï¼ ï¼ ï¼ address@hidden "", address@hidden, -ï¼ï¼ï¼ ï¼ ï¼ address@hidden) at io/channel.c:97 -ï¼ï¼ï¼ ï¼ ï¼ #3 0x00007f3e032750e0 in channel_get_buffer (opaque=ï¼optimized outï¼, -ï¼ï¼ï¼ ï¼ ï¼ buf=0x7f3e05910f38 "", pos=ï¼optimized outï¼, size=32768) at -ï¼ï¼ï¼ ï¼ ï¼ migration/qemu-file-channel.c:78 -ï¼ï¼ï¼ ï¼ ï¼ #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at -ï¼ï¼ï¼ ï¼ ï¼ migration/qemu-file.c:257 -ï¼ï¼ï¼ ï¼ ï¼ #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, -ï¼ï¼ï¼ ï¼ ï¼ address@hidden) at migration/qemu-file.c:510 -ï¼ï¼ï¼ ï¼ ï¼ #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at -ï¼ï¼ï¼ ï¼ ï¼ migration/qemu-file.c:523 -ï¼ï¼ï¼ ï¼ ï¼ #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at -ï¼ï¼ï¼ ï¼ ï¼ migration/qemu-file.c:603 -ï¼ï¼ï¼ ï¼ ï¼ #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, -ï¼ï¼ï¼ ï¼ ï¼ address@hidden) at migration/colo.c:215 -ï¼ï¼ï¼ ï¼ ï¼ #9 0x00007f3e0327250d in colo_wait_handle_message (errp=0x7f3d62bfaa48, -ï¼ï¼ï¼ ï¼ ï¼ checkpoint_request=ï¼synthetic pointerï¼, f=ï¼optimized outï¼) at -ï¼ï¼ï¼ ï¼ ï¼ migration/colo.c:546 -ï¼ï¼ï¼ ï¼ ï¼ #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at -ï¼ï¼ï¼ ï¼ ï¼ migration/colo.c:649 -ï¼ï¼ï¼ ï¼ ï¼ #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 -ï¼ï¼ï¼ ï¼ ï¼ #12 0x00007f3dfc9c03ed in clone () from /lib64/libc..so.6 -ï¼ï¼ï¼ ï¼ ï¼ -ï¼ï¼ï¼ ï¼ ï¼ -ï¼ï¼ï¼ ï¼ ï¼ -ï¼ï¼ï¼ ï¼ ï¼ -ï¼ï¼ï¼ ï¼ ï¼ -ï¼ï¼ï¼ ï¼ ï¼ -- -ï¼ï¼ï¼ ï¼ ï¼ View this message in context: -http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html -ï¼ï¼ï¼ ï¼ ï¼ Sent from the Developer mailing list archive at Nabble.com. -ï¼ï¼ï¼ ï¼ ï¼ -ï¼ï¼ï¼ ï¼ ï¼ -ï¼ï¼ï¼ ï¼ ï¼ -ï¼ï¼ï¼ ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -- -ï¼ï¼ï¼ ï¼ Thanks -ï¼ï¼ï¼ ï¼ Zhang Chen -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ ï¼ -ï¼ï¼ï¼ -ï¼ï¼ -ï¼ -- -ï¼ Dr. David Alan Gilbert / address@hidden / Manchester, UK -ï¼ -ï¼ . -ï¼ - diff --git a/results/classifier/014/peripherals/60339453 b/results/classifier/014/peripherals/60339453 deleted file mode 100644 index a0503d6c..00000000 --- a/results/classifier/014/peripherals/60339453 +++ /dev/null @@ -1,88 +0,0 @@ -peripherals: 0.824 -kernel: 0.805 -register: 0.787 -boot: 0.782 -arm: 0.780 -performance: 0.764 -permissions: 0.750 -TCG: 0.748 -alpha: 0.718 -VMM: 0.712 -risc-v: 0.707 -device: 0.706 -mistranslation: 0.699 -hypervisor: 0.697 -PID: 0.685 -network: 0.682 -vnc: 0.680 -debug: 0.672 -graphic: 0.671 -operating system: 0.670 -KVM: 0.669 -user-level: 0.663 -semantic: 0.662 -architecture: 0.649 -x86: 0.647 -virtual: 0.630 -files: 0.623 -ppc: 0.615 -socket: 0.607 -i386: 0.533 -assembly: 0.486 - -[BUG] scsi: vmw_pvscsi: Boot hangs during scsi under qemu, post commit e662502b3a78 - -Hi, - -Commit e662502b3a78 ("scsi: vmw_pvscsi: Set correct residual data length"), -and its backports to stable trees, makes kernel hang during boot, when -ran as a VM under qemu with following parameters: - - -drive file=$DISKFILE,if=none,id=sda - -device pvscsi - -device scsi-hd,bus=scsi.0,drive=sda - -Diving deeper, commit e662502b3a78 - - @@ -585,7 +585,13 @@ static void pvscsi_complete_request(struct -pvscsi_adapter *adapter, - case BTSTAT_SUCCESS: - + /* - + * Commands like INQUIRY may transfer less data than - + * requested by the initiator via bufflen. Set residual - + * count to make upper layer aware of the actual amount - + * of data returned. - + */ - + scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen); - -assumes 'e->dataLen' is properly armed with actual num of bytes -transferred; alas qemu's hw/scsi/vmw_pvscsi.c never arms the 'dataLen' -field of the completion descriptor (kept zero). - -As a result, the residual count is set as the *entire* 'scsi_bufflen' of a -good transfer, which makes upper scsi layers repeatedly ignore this -valid transfer. - -Not properly arming 'dataLen' seems as an oversight in qemu, which needs -to be fixed. - -However, since kernels with commit e662502b3a78 (and backports) now fail -to boot under qemu's "-device pvscsi", a suggested workaround is to set -the residual count *only* if 'e->dataLen' is armed, e.g: - - @@ -588,7 +588,8 @@ static void pvscsi_complete_request(struct pvscsi_adapter -*adapter, - * count to make upper layer aware of the actual -amount - * of data returned. - */ - - scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen); - + if (e->dataLen) - + scsi_set_resid(cmd, scsi_bufflen(cmd) - -e->dataLen); - -in order to make kernels boot on old qemu binaries. - -Best, -Shmulik - |