summary refs log tree commit diff stats
path: root/results/classifier/014/peripherals
diff options
context:
space:
mode:
Diffstat (limited to 'results/classifier/014/peripherals')
-rw-r--r--results/classifier/014/peripherals/56937788371
-rw-r--r--results/classifier/014/peripherals/577565891448
-rw-r--r--results/classifier/014/peripherals/6033945388
3 files changed, 1907 insertions, 0 deletions
diff --git a/results/classifier/014/peripherals/56937788 b/results/classifier/014/peripherals/56937788
new file mode 100644
index 00000000..82617706
--- /dev/null
+++ b/results/classifier/014/peripherals/56937788
@@ -0,0 +1,371 @@
+peripherals: 0.807
+user-level: 0.794
+risc-v: 0.773
+hypervisor: 0.765
+TCG: 0.760
+KVM: 0.755
+vnc: 0.743
+mistranslation: 0.735
+VMM: 0.731
+virtual: 0.730
+ppc: 0.728
+debug: 0.723
+graphic: 0.720
+operating system: 0.713
+register: 0.706
+semantic: 0.705
+device: 0.697
+i386: 0.694
+x86: 0.693
+performance: 0.692
+permissions: 0.685
+files: 0.680
+arm: 0.665
+assembly: 0.638
+boot: 0.636
+network: 0.633
+alpha: 0.631
+architecture: 0.627
+PID: 0.620
+socket: 0.613
+kernel: 0.594
+
+[Qemu-devel] [Bug] virtio-blk: qemu will crash if hotplug virtio-blk device failed
+
+I found that hotplug virtio-blk device will lead to qemu crash.
+
+Re-production steps:
+
+1.       Run VM named vm001
+
+2.       Create a virtio-blk.xml which contains wrong configurations:
+<disk device="lun" rawio="yes" type="block">
+  <driver cache="none" io="native" name="qemu" type="raw" />
+  <source dev="/dev/mapper/11-dm" />
+  <target bus="virtio" dev="vdx" />
+</disk>
+
+3.       Run command : virsh attach-device vm001 vm001
+
+Libvirt will return err msg:
+
+error: Failed to attach device from blk-scsi.xml
+
+error: internal error: unable to execute QEMU command 'device_add': Please set 
+scsi=off for virtio-blk devices in order to use virtio 1.0
+
+it means hotplug virtio-blk device failed.
+
+4.       Suspend or shutdown VM will leads to qemu crash
+
+
+
+from gdb:
+
+
+(gdb) bt
+#0  object_get_class (address@hidden) at qom/object.c:750
+#1  0x00007f9a72582e01 in virtio_vmstate_change (opaque=0x7f9a73d10960, 
+running=0, state=<optimized out>) at 
+/mnt/sdb/lzc/code/open/qemu/hw/virtio/virtio.c:2203
+#2  0x00007f9a7261ef52 in vm_state_notify (address@hidden, address@hidden) at 
+vl.c:1685
+#3  0x00007f9a7252603a in do_vm_stop (state=RUN_STATE_PAUSED) at 
+/mnt/sdb/lzc/code/open/qemu/cpus.c:941
+#4  vm_stop (address@hidden) at /mnt/sdb/lzc/code/open/qemu/cpus.c:1807
+#5  0x00007f9a7262eb1b in qmp_stop (address@hidden) at qmp.c:102
+#6  0x00007f9a7262c70a in qmp_marshal_stop (args=<optimized out>, 
+ret=<optimized out>, errp=0x7ffe63e255d8) at qmp-marshal.c:5854
+#7  0x00007f9a72897e79 in do_qmp_dispatch (errp=0x7ffe63e255d0, 
+request=0x7f9a76510120, cmds=0x7f9a72ee7980 <qmp_commands>) at 
+qapi/qmp-dispatch.c:104
+#8  qmp_dispatch (cmds=0x7f9a72ee7980 <qmp_commands>, address@hidden) at 
+qapi/qmp-dispatch.c:131
+#9  0x00007f9a725288d5 in handle_qmp_command (parser=<optimized out>, 
+tokens=<optimized out>) at /mnt/sdb/lzc/code/open/qemu/monitor.c:3852
+#10 0x00007f9a7289d514 in json_message_process_token (lexer=0x7f9a73ce4498, 
+input=0x7f9a73cc6880, type=JSON_RCURLY, x=36, y=17) at 
+qobject/json-streamer.c:105
+#11 0x00007f9a728bb69b in json_lexer_feed_char (address@hidden, ch=125 '}', 
+address@hidden) at qobject/json-lexer.c:323
+#12 0x00007f9a728bb75e in json_lexer_feed (lexer=0x7f9a73ce4498, 
+buffer=<optimized out>, size=<optimized out>) at qobject/json-lexer.c:373
+#13 0x00007f9a7289d5d9 in json_message_parser_feed (parser=<optimized out>, 
+buffer=<optimized out>, size=<optimized out>) at qobject/json-streamer.c:124
+#14 0x00007f9a7252722e in monitor_qmp_read (opaque=<optimized out>, 
+buf=<optimized out>, size=<optimized out>) at 
+/mnt/sdb/lzc/code/open/qemu/monitor.c:3894
+#15 0x00007f9a7284ee1b in tcp_chr_read (chan=<optimized out>, cond=<optimized 
+out>, opaque=<optimized out>) at chardev/char-socket.c:441
+#16 0x00007f9a6e03e99a in g_main_context_dispatch () from 
+/usr/lib64/libglib-2.0.so.0
+#17 0x00007f9a728a342c in glib_pollfds_poll () at util/main-loop.c:214
+#18 os_host_main_loop_wait (timeout=<optimized out>) at util/main-loop.c:261
+#19 main_loop_wait (address@hidden) at util/main-loop.c:515
+#20 0x00007f9a724e7547 in main_loop () at vl.c:1999
+#21 main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at 
+vl.c:4877
+
+Problem happens in virtio_vmstate_change which is called by vm_state_notify,
+static void virtio_vmstate_change(void *opaque, int running, RunState state)
+{
+    VirtIODevice *vdev = opaque;
+    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+    bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
+    vdev->vm_running = running;
+
+    if (backend_run) {
+        virtio_set_status(vdev, vdev->status);
+    }
+
+    if (k->vmstate_change) {
+        k->vmstate_change(qbus->parent, backend_run);
+    }
+
+    if (!backend_run) {
+        virtio_set_status(vdev, vdev->status);
+    }
+}
+
+Vdev's parent_bus is NULL, so qdev_get_parent_bus(DEVICE(vdev)) will crash.
+virtio_vmstate_change is added to the list vm_change_state_head at 
+virtio_blk_device_realize(virtio_init),
+but after hotplug virtio-blk failed, virtio_vmstate_change will not be removed 
+from vm_change_state_head.
+
+
+I apply a patch as follews:
+
+diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
+index 5884ce3..ea532dc 100644
+--- a/hw/virtio/virtio.c
++++ b/hw/virtio/virtio.c
+@@ -2491,6 +2491,7 @@ static void virtio_device_realize(DeviceState *dev, Error 
+**errp)
+     virtio_bus_device_plugged(vdev, &err);
+     if (err != NULL) {
+         error_propagate(errp, err);
++        vdc->unrealize(dev, NULL);
+         return;
+     }
+
+On Tue, Oct 31, 2017 at 05:19:08AM +0000, linzhecheng wrote:
+>
+I found that hotplug virtio-blk device will lead to qemu crash.
+The author posted a patch in a separate email thread.  Please see
+"[PATCH] fix: unrealize virtio device if we fail to hotplug it".
+
+>
+Re-production steps:
+>
+>
+1.       Run VM named vm001
+>
+>
+2.       Create a virtio-blk.xml which contains wrong configurations:
+>
+<disk device="lun" rawio="yes" type="block">
+>
+<driver cache="none" io="native" name="qemu" type="raw" />
+>
+<source dev="/dev/mapper/11-dm" />
+>
+<target bus="virtio" dev="vdx" />
+>
+</disk>
+>
+>
+3.       Run command : virsh attach-device vm001 vm001
+>
+>
+Libvirt will return err msg:
+>
+>
+error: Failed to attach device from blk-scsi.xml
+>
+>
+error: internal error: unable to execute QEMU command 'device_add': Please
+>
+set scsi=off for virtio-blk devices in order to use virtio 1.0
+>
+>
+it means hotplug virtio-blk device failed.
+>
+>
+4.       Suspend or shutdown VM will leads to qemu crash
+>
+>
+>
+>
+from gdb:
+>
+>
+>
+(gdb) bt
+>
+#0  object_get_class (address@hidden) at qom/object.c:750
+>
+#1  0x00007f9a72582e01 in virtio_vmstate_change (opaque=0x7f9a73d10960,
+>
+running=0, state=<optimized out>) at
+>
+/mnt/sdb/lzc/code/open/qemu/hw/virtio/virtio.c:2203
+>
+#2  0x00007f9a7261ef52 in vm_state_notify (address@hidden, address@hidden) at
+>
+vl.c:1685
+>
+#3  0x00007f9a7252603a in do_vm_stop (state=RUN_STATE_PAUSED) at
+>
+/mnt/sdb/lzc/code/open/qemu/cpus.c:941
+>
+#4  vm_stop (address@hidden) at /mnt/sdb/lzc/code/open/qemu/cpus.c:1807
+>
+#5  0x00007f9a7262eb1b in qmp_stop (address@hidden) at qmp.c:102
+>
+#6  0x00007f9a7262c70a in qmp_marshal_stop (args=<optimized out>,
+>
+ret=<optimized out>, errp=0x7ffe63e255d8) at qmp-marshal.c:5854
+>
+#7  0x00007f9a72897e79 in do_qmp_dispatch (errp=0x7ffe63e255d0,
+>
+request=0x7f9a76510120, cmds=0x7f9a72ee7980 <qmp_commands>) at
+>
+qapi/qmp-dispatch.c:104
+>
+#8  qmp_dispatch (cmds=0x7f9a72ee7980 <qmp_commands>, address@hidden) at
+>
+qapi/qmp-dispatch.c:131
+>
+#9  0x00007f9a725288d5 in handle_qmp_command (parser=<optimized out>,
+>
+tokens=<optimized out>) at /mnt/sdb/lzc/code/open/qemu/monitor.c:3852
+>
+#10 0x00007f9a7289d514 in json_message_process_token (lexer=0x7f9a73ce4498,
+>
+input=0x7f9a73cc6880, type=JSON_RCURLY, x=36, y=17) at
+>
+qobject/json-streamer.c:105
+>
+#11 0x00007f9a728bb69b in json_lexer_feed_char (address@hidden, ch=125 '}',
+>
+address@hidden) at qobject/json-lexer.c:323
+>
+#12 0x00007f9a728bb75e in json_lexer_feed (lexer=0x7f9a73ce4498,
+>
+buffer=<optimized out>, size=<optimized out>) at qobject/json-lexer.c:373
+>
+#13 0x00007f9a7289d5d9 in json_message_parser_feed (parser=<optimized out>,
+>
+buffer=<optimized out>, size=<optimized out>) at qobject/json-streamer.c:124
+>
+#14 0x00007f9a7252722e in monitor_qmp_read (opaque=<optimized out>,
+>
+buf=<optimized out>, size=<optimized out>) at
+>
+/mnt/sdb/lzc/code/open/qemu/monitor.c:3894
+>
+#15 0x00007f9a7284ee1b in tcp_chr_read (chan=<optimized out>, cond=<optimized
+>
+out>, opaque=<optimized out>) at chardev/char-socket.c:441
+>
+#16 0x00007f9a6e03e99a in g_main_context_dispatch () from
+>
+/usr/lib64/libglib-2.0.so.0
+>
+#17 0x00007f9a728a342c in glib_pollfds_poll () at util/main-loop.c:214
+>
+#18 os_host_main_loop_wait (timeout=<optimized out>) at util/main-loop.c:261
+>
+#19 main_loop_wait (address@hidden) at util/main-loop.c:515
+>
+#20 0x00007f9a724e7547 in main_loop () at vl.c:1999
+>
+#21 main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>)
+>
+at vl.c:4877
+>
+>
+Problem happens in virtio_vmstate_change which is called by vm_state_notify,
+>
+static void virtio_vmstate_change(void *opaque, int running, RunState state)
+>
+{
+>
+VirtIODevice *vdev = opaque;
+>
+BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
+>
+VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+>
+bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
+>
+vdev->vm_running = running;
+>
+>
+if (backend_run) {
+>
+virtio_set_status(vdev, vdev->status);
+>
+}
+>
+>
+if (k->vmstate_change) {
+>
+k->vmstate_change(qbus->parent, backend_run);
+>
+}
+>
+>
+if (!backend_run) {
+>
+virtio_set_status(vdev, vdev->status);
+>
+}
+>
+}
+>
+>
+Vdev's parent_bus is NULL, so qdev_get_parent_bus(DEVICE(vdev)) will crash.
+>
+virtio_vmstate_change is added to the list vm_change_state_head at
+>
+virtio_blk_device_realize(virtio_init),
+>
+but after hotplug virtio-blk failed, virtio_vmstate_change will not be
+>
+removed from vm_change_state_head.
+>
+>
+>
+I apply a patch as follews:
+>
+>
+diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
+>
+index 5884ce3..ea532dc 100644
+>
+--- a/hw/virtio/virtio.c
+>
++++ b/hw/virtio/virtio.c
+>
+@@ -2491,6 +2491,7 @@ static void virtio_device_realize(DeviceState *dev,
+>
+Error **errp)
+>
+virtio_bus_device_plugged(vdev, &err);
+>
+if (err != NULL) {
+>
+error_propagate(errp, err);
+>
++        vdc->unrealize(dev, NULL);
+>
+return;
+>
+}
+signature.asc
+Description:
+PGP signature
+
diff --git a/results/classifier/014/peripherals/57756589 b/results/classifier/014/peripherals/57756589
new file mode 100644
index 00000000..5891931d
--- /dev/null
+++ b/results/classifier/014/peripherals/57756589
@@ -0,0 +1,1448 @@
+peripherals: 0.875
+hypervisor: 0.863
+mistranslation: 0.861
+register: 0.858
+architecture: 0.856
+device: 0.853
+vnc: 0.851
+virtual: 0.845
+permissions: 0.842
+assembly: 0.841
+performance: 0.839
+ppc: 0.838
+semantic: 0.835
+operating system: 0.835
+TCG: 0.833
+VMM: 0.833
+arm: 0.828
+boot: 0.827
+user-level: 0.826
+graphic: 0.824
+network: 0.822
+socket: 0.820
+PID: 0.819
+KVM: 0.817
+kernel: 0.817
+files: 0.816
+x86: 0.814
+alpha: 0.810
+debug: 0.803
+i386: 0.782
+risc-v: 0.755
+
+[Qemu-devel] 答复: Re:   答复: Re:  答复: Re: [BUG]COLO failover hang
+
+amost like wiki,but panic in Primary Node.
+
+
+
+
+setp:
+
+1 
+
+Primary Node.
+
+x86_64-softmmu/qemu-system-x86_64 -enable-kvm -boot c -m 2048 -smp 2 -qmp stdio 
+-vnc :7 -name primary -cpu qemu64,+kvmclock -device piix3-usb-uhci -usb 
+-usbdevice tablet\
+
+  -drive 
+if=virtio,id=colo-disk0,driver=quorum,read-pattern=fifo,vote-threshold=1,
+
+   
+children.0.file.filename=/mnt/sdd/pure_IMG/linux/redhat/rhel_6.5_64_2U_ide,children.0.driver=qcow2
+ -S \
+
+  -netdev 
+tap,id=hn1,vhost=off,script=/etc/qemu-ifup2,downscript=/etc/qemu-ifdown2 \
+
+  -device e1000,id=e1,netdev=hn1,mac=52:a4:00:12:78:67 \
+
+  -netdev 
+tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown \
+
+  -device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66 \
+
+  -chardev socket,id=mirror0,host=9.61.1.8,port=9003,server,nowait -chardev 
+socket,id=compare1,host=9.61.1.8,port=9004,server,nowait \
+
+  -chardev socket,id=compare0,host=9.61.1.8,port=9001,server,nowait -chardev 
+socket,id=compare0-0,host=9.61.1.8,port=9001 \
+
+  -chardev socket,id=compare_out,host=9.61.1.8,port=9005,server,nowait \
+
+  -chardev socket,id=compare_out0,host=9.61.1.8,port=9005 \
+
+  -object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0 \
+
+  -object filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out 
+-object filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0 \
+
+  -object 
+colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0
+
+2 Second node:
+
+x86_64-softmmu/qemu-system-x86_64 -boot c -m 2048 -smp 2 -qmp stdio -vnc :7 
+-name secondary -enable-kvm -cpu qemu64,+kvmclock -device piix3-usb-uhci -usb 
+-usbdevice tablet\
+
+  -drive 
+if=none,id=colo-disk0,file.filename=/mnt/sdd/pure_IMG/linux/redhat/rhel_6.5_64_2U_ide,driver=qcow2,node-name=node0
+ \
+
+  -drive 
+if=virtio,id=active-disk0,driver=replication,mode=secondary,file.driver=qcow2,top-id=active-disk0,file.file.filename=/mnt/ramfstest/active_disk.img,file.backing.driver=qcow2,file.backing.file.filename=/mnt/ramfstest/hidden_disk.img,file.backing.backing=colo-disk0
+  \
+
+   -netdev 
+tap,id=hn1,vhost=off,script=/etc/qemu-ifup2,downscript=/etc/qemu-ifdown2 \
+
+  -device e1000,id=e1,netdev=hn1,mac=52:a4:00:12:78:67 \
+
+  -netdev 
+tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown \
+
+  -device e1000,netdev=hn0,mac=52:a4:00:12:78:66 -chardev 
+socket,id=red0,host=9.61.1.8,port=9003 \
+
+  -chardev socket,id=red1,host=9.61.1.8,port=9004 \
+
+  -object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0 \
+
+  -object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1 \
+
+  -object filter-rewriter,id=rew0,netdev=hn0,queue=all -incoming tcp:0:8888
+
+3  Secondary node:
+
+{'execute':'qmp_capabilities'}
+
+{ 'execute': 'nbd-server-start',
+
+  'arguments': {'addr': {'type': 'inet', 'data': {'host': '9.61.1.7', 'port': 
+'8889'} } }
+
+}
+
+{'execute': 'nbd-server-add', 'arguments': {'device': 'colo-disk0', 'writable': 
+true } }
+
+4:Primary Node:
+
+{'execute':'qmp_capabilities'}
+
+
+{ 'execute': 'human-monitor-command',
+
+  'arguments': {'command-line': 'drive_add -n buddy 
+driver=replication,mode=primary,file.driver=nbd,file.host=9.61.1.7,file.port=8889,file.export=colo-disk0,node-name=node0'}}
+
+{ 'execute':'x-blockdev-change', 'arguments':{'parent': 'colo-disk0', 'node': 
+'node0' } }
+
+{ 'execute': 'migrate-set-capabilities',
+
+      'arguments': {'capabilities': [ {'capability': 'x-colo', 'state': true } 
+] } }
+
+{ 'execute': 'migrate', 'arguments': {'uri': 'tcp:9.61.1.7:8888' } }
+
+
+
+
+then can see two runing VMs, whenever you make changes to PVM, SVM will be 
+synced.  
+
+
+
+
+5:Primary Node:
+
+echo c > /proc/sysrq-trigger
+
+
+
+
+6:Secondary node:
+
+{ 'execute': 'nbd-server-stop' }
+
+{ "execute": "x-colo-lost-heartbeat" }
+
+
+
+
+then can see the Secondary node hang at recvmsg recvmsg .
+
+
+
+
+
+
+
+
+
+
+
+
+原始邮件
+
+
+
+发件人: address@hidden
+收件人:王广10165992 address@hidden
+抄送人: address@hidden address@hidden
+日 期 :2017年03月21日 16:27
+主 题 :Re: [Qemu-devel]  答复: Re:  答复: Re: [BUG]COLO failover hang
+
+
+
+
+
+Hi,
+
+On 2017/3/21 16:10, address@hidden wrote:
+> Thank you。
+>
+> I have test aready。
+>
+> When the Primary Node panic,the Secondary Node qemu hang at the same place。
+>
+> Incorrding
+http://wiki.qemu-project.org/Features/COLO
+,kill Primary Node qemu 
+will not produce the problem,but Primary Node panic can。
+>
+> I think due to the feature of channel does not support 
+QIO_CHANNEL_FEATURE_SHUTDOWN.
+>
+>
+
+Yes, you are right, when we do failover for primary/secondary VM, we will 
+shutdown the related
+fd in case it is stuck in the read/write fd.
+
+It seems that you didn't follow the above introduction exactly to do the test. 
+Could you
+share your test procedures ? Especially the commands used in the test.
+
+Thanks,
+Hailiang
+
+> when failover,channel_shutdown could not shut down the channel.
+>
+>
+> so the colo_process_incoming_thread will hang at recvmsg.
+>
+>
+> I test a patch:
+>
+>
+> diff --git a/migration/socket.c b/migration/socket.c
+>
+>
+> index 13966f1..d65a0ea 100644
+>
+>
+> --- a/migration/socket.c
+>
+>
+> +++ b/migration/socket.c
+>
+>
+> @@ -147,8 +147,9 @@ static gboolean 
+socket_accept_incoming_migration(QIOChannel *ioc,
+>
+>
+>       }
+>
+>
+>
+>
+>
+>       trace_migration_socket_incoming_accepted()
+>
+>
+>
+>
+>
+>       qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming")
+>
+>
+> +    qio_channel_set_feature(QIO_CHANNEL(sioc), QIO_CHANNEL_FEATURE_SHUTDOWN)
+>
+>
+>       migration_channel_process_incoming(migrate_get_current(),
+>
+>
+>                                          QIO_CHANNEL(sioc))
+>
+>
+>       object_unref(OBJECT(sioc))
+>
+>
+>
+>
+> My test will not hang any more.
+>
+>
+>
+>
+>
+>
+>
+>
+>
+>
+>
+>
+>
+>
+>
+>
+>
+> 原始邮件
+>
+>
+>
+> 发件人: address@hidden
+> 收件人:王广10165992 address@hidden
+> 抄送人: address@hidden address@hidden
+> 日 期 :2017年03月21日 15:58
+> 主 题 :Re: [Qemu-devel]  答复: Re:  [BUG]COLO failover hang
+>
+>
+>
+>
+>
+> Hi,Wang.
+>
+> You can test this branch:
+>
+>
+https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk
+>
+> and please follow wiki ensure your own configuration correctly.
+>
+>
+http://wiki.qemu-project.org/Features/COLO
+>
+>
+> Thanks
+>
+> Zhang Chen
+>
+>
+> On 03/21/2017 03:27 PM, address@hidden wrote:
+> >
+> > hi.
+> >
+> > I test the git qemu master have the same problem.
+> >
+> > (gdb) bt
+> >
+> > #0  qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880,
+> > niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461
+> >
+> > #1  0x00007f658e4aa0c2 in qio_channel_read
+> > (address@hidden, address@hidden "",
+> > address@hidden, address@hidden) at io/channel.c:114
+> >
+> > #2  0x00007f658e3ea990 in channel_get_buffer (opaque=<optimized out>,
+> > buf=0x7f65907cb838 "", pos=<optimized out>, size=32768) at
+> > migration/qemu-file-channel.c:78
+> >
+> > #3  0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at
+> > migration/qemu-file.c:295
+> >
+> > #4  0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden,
+> > address@hidden) at migration/qemu-file.c:555
+> >
+> > #5  0x00007f658e3ea34b in qemu_get_byte (address@hidden) at
+> > migration/qemu-file.c:568
+> >
+> > #6  0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at
+> > migration/qemu-file.c:648
+> >
+> > #7  0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800,
+> > address@hidden) at migration/colo.c:244
+> >
+> > #8  0x00007f658e3e681e in colo_receive_check_message (f=<optimized
+> > out>, address@hidden,
+> > address@hidden)
+> >
+> >     at migration/colo.c:264
+> >
+> > #9  0x00007f658e3e740e in colo_process_incoming_thread
+> > (opaque=0x7f658eb30360 <mis_current.31286>) at migration/colo.c:577
+> >
+> > #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0
+> >
+> > #11 0x00007f65881983ed in clone () from /lib64/libc.so.6
+> >
+> > (gdb) p ioc->name
+> >
+> > $2 = 0x7f658ff7d5c0 "migration-socket-incoming"
+> >
+> > (gdb) p ioc->features        Do not support QIO_CHANNEL_FEATURE_SHUTDOWN
+> >
+> > $3 = 0
+> >
+> >
+> > (gdb) bt
+> >
+> > #0  socket_accept_incoming_migration (ioc=0x7fdcceeafa90,
+> > condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137
+> >
+> > #1  0x00007fdcc6966350 in g_main_dispatch (context=<optimized out>) at
+> > gmain.c:3054
+> >
+> > #2  g_main_context_dispatch (context=<optimized out>,
+> > address@hidden) at gmain.c:3630
+> >
+> > #3  0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213
+> >
+> > #4  os_host_main_loop_wait (timeout=<optimized out>) at
+> > util/main-loop.c:258
+> >
+> > #5  main_loop_wait (address@hidden) at
+> > util/main-loop.c:506
+> >
+> > #6  0x00007fdccb526187 in main_loop () at vl.c:1898
+> >
+> > #7  main (argc=<optimized out>, argv=<optimized out>, envp=<optimized
+> > out>) at vl.c:4709
+> >
+> > (gdb) p ioc->features
+> >
+> > $1 = 6
+> >
+> > (gdb) p ioc->name
+> >
+> > $2 = 0x7fdcce1b1ab0 "migration-socket-listener"
+> >
+> >
+> > May be socket_accept_incoming_migration should
+> > call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)??
+> >
+> >
+> > thank you.
+> >
+> >
+> >
+> >
+> >
+> > 原始邮件
+> > address@hidden
+> > address@hidden
+> > address@hidden@huawei.com>
+> > *日 期 :*2017年03月16日 14:46
+> > *主 题 :**Re: [Qemu-devel] COLO failover hang*
+> >
+> >
+> >
+> >
+> > On 03/15/2017 05:06 PM, wangguang wrote:
+> > >   am testing QEMU COLO feature described here [QEMU
+> > > Wiki](
+http://wiki.qemu-project.org/Features/COLO
+).
+> > >
+> > > When the Primary Node panic,the Secondary Node qemu hang.
+> > > hang at recvmsg in qio_channel_socket_readv.
+> > > And  I run  { 'execute': 'nbd-server-stop' } and { "execute":
+> > > "x-colo-lost-heartbeat" } in Secondary VM's
+> > > monitor,the  Secondary Node qemu still hang at recvmsg .
+> > >
+> > > I found that the colo in qemu is not complete yet.
+> > > Do the colo have any plan for development?
+> >
+> > Yes, We are developing. You can see some of patch we pushing.
+> >
+> > > Has anyone ever run it successfully? Any help is appreciated!
+> >
+> > In our internal version can run it successfully,
+> > The failover detail you can ask Zhanghailiang for help.
+> > Next time if you have some question about COLO,
+> > please cc me and zhanghailiang address@hidden
+> >
+> >
+> > Thanks
+> > Zhang Chen
+> >
+> >
+> > >
+> > >
+> > >
+> > > centos7.2+qemu2.7.50
+> > > (gdb) bt
+> > > #0  0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0
+> > > #1  0x00007f3e0332b738 in qio_channel_socket_readv (ioc=<optimized out>,
+> > > iov=<optimized out>, niov=<optimized out>, fds=0x0, nfds=0x0, errp=0x0) at
+> > > io/channel-socket.c:497
+> > > #2  0x00007f3e03329472 in qio_channel_read (address@hidden,
+> > > address@hidden "", address@hidden,
+> > > address@hidden) at io/channel.c:97
+> > > #3  0x00007f3e032750e0 in channel_get_buffer (opaque=<optimized out>,
+> > > buf=0x7f3e05910f38 "", pos=<optimized out>, size=32768) at
+> > > migration/qemu-file-channel.c:78
+> > > #4  0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at
+> > > migration/qemu-file.c:257
+> > > #5  0x00007f3e03274a41 in qemu_peek_byte (address@hidden,
+> > > address@hidden) at migration/qemu-file.c:510
+> > > #6  0x00007f3e03274aab in qemu_get_byte (address@hidden) at
+> > > migration/qemu-file.c:523
+> > > #7  0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at
+> > > migration/qemu-file.c:603
+> > > #8  0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00,
+> > > address@hidden) at migration/colo..c:215
+> > > #9  0x00007f3e0327250d in colo_wait_handle_message (errp=0x7f3d62bfaa48,
+> > > checkpoint_request=<synthetic pointer>, f=<optimized out>) at
+> > > migration/colo.c:546
+> > > #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at
+> > > migration/colo.c:649
+> > > #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0
+> > > #12 0x00007f3dfc9c03ed in clone () from /lib64/libc.so.6
+> > >
+> > >
+> > >
+> > >
+> > >
+> > > --
+> > > View this message in context:
+http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html
+> > > Sent from the Developer mailing list archive at Nabble.com.
+> > >
+> > >
+> > >
+> > >
+> >
+> > --
+> > Thanks
+> > Zhang Chen
+> >
+> >
+> >
+> >
+> >
+>
+
+diff --git a/migration/socket.c b/migration/socket.c
+
+
+index 13966f1..d65a0ea 100644
+
+
+--- a/migration/socket.c
+
+
++++ b/migration/socket.c
+
+
+@@ -147,8 +147,9 @@ static gboolean socket_accept_incoming_migration(QIOChannel 
+*ioc,
+
+
+     }
+
+
+ 
+
+
+     trace_migration_socket_incoming_accepted()
+
+
+    
+
+
+     qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming")
+
+
++    qio_channel_set_feature(QIO_CHANNEL(sioc), QIO_CHANNEL_FEATURE_SHUTDOWN)
+
+
+     migration_channel_process_incoming(migrate_get_current(),
+
+
+                                        QIO_CHANNEL(sioc))
+
+
+     object_unref(OBJECT(sioc))
+
+
+
+
+Is this patch ok? 
+
+I have test it . The test could not hang any more.
+
+
+
+
+
+
+
+
+
+
+
+
+原始邮件
+
+
+
+发件人: address@hidden
+收件人: address@hidden address@hidden
+抄送人: address@hidden address@hidden address@hidden
+日 期 :2017年03月22日 09:11
+主 题 :Re: [Qemu-devel]  答复: Re:  答复: Re: [BUG]COLO failover hang
+
+
+
+
+
+On 2017/3/21 19:56, Dr. David Alan Gilbert wrote:
+> * Hailiang Zhang (address@hidden) wrote:
+>> Hi,
+>>
+>> Thanks for reporting this, and i confirmed it in my test, and it is a bug.
+>>
+>> Though we tried to call qemu_file_shutdown() to shutdown the related fd, in
+>> case COLO thread/incoming thread is stuck in read/write() while do failover,
+>> but it didn't take effect, because all the fd used by COLO (also migration)
+>> has been wrapped by qio channel, and it will not call the shutdown API if
+>> we didn't qio_channel_set_feature(QIO_CHANNEL(sioc), 
+QIO_CHANNEL_FEATURE_SHUTDOWN).
+>>
+>> Cc: Dr. David Alan Gilbert address@hidden
+>>
+>> I doubted migration cancel has the same problem, it may be stuck in write()
+>> if we tried to cancel migration.
+>>
+>> void fd_start_outgoing_migration(MigrationState *s, const char *fdname, 
+Error **errp)
+>> {
+>>      qio_channel_set_name(QIO_CHANNEL(ioc), "migration-fd-outgoing")
+>>      migration_channel_connect(s, ioc, NULL)
+>>      ... ...
+>> We didn't call qio_channel_set_feature(QIO_CHANNEL(sioc), 
+QIO_CHANNEL_FEATURE_SHUTDOWN) above,
+>> and the
+>> migrate_fd_cancel()
+>> {
+>>   ... ...
+>>      if (s->state == MIGRATION_STATUS_CANCELLING && f) {
+>>          qemu_file_shutdown(f)  --> This will not take effect. No ?
+>>      }
+>> }
+>
+> (cc'd in Daniel Berrange).
+> I see that we call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN) 
+at the
+> top of qio_channel_socket_new  so I think that's safe isn't it?
+>
+
+Hmm, you are right, this problem is only exist for the migration incoming fd, 
+thanks.
+
+> Dave
+>
+>> Thanks,
+>> Hailiang
+>>
+>> On 2017/3/21 16:10, address@hidden wrote:
+>>> Thank you。
+>>>
+>>> I have test aready。
+>>>
+>>> When the Primary Node panic,the Secondary Node qemu hang at the same place。
+>>>
+>>> Incorrding
+http://wiki.qemu-project.org/Features/COLO
+,kill Primary Node 
+qemu will not produce the problem,but Primary Node panic can。
+>>>
+>>> I think due to the feature of channel does not support 
+QIO_CHANNEL_FEATURE_SHUTDOWN.
+>>>
+>>>
+>>> when failover,channel_shutdown could not shut down the channel.
+>>>
+>>>
+>>> so the colo_process_incoming_thread will hang at recvmsg.
+>>>
+>>>
+>>> I test a patch:
+>>>
+>>>
+>>> diff --git a/migration/socket.c b/migration/socket.c
+>>>
+>>>
+>>> index 13966f1..d65a0ea 100644
+>>>
+>>>
+>>> --- a/migration/socket.c
+>>>
+>>>
+>>> +++ b/migration/socket.c
+>>>
+>>>
+>>> @@ -147,8 +147,9 @@ static gboolean 
+socket_accept_incoming_migration(QIOChannel *ioc,
+>>>
+>>>
+>>>        }
+>>>
+>>>
+>>>
+>>>
+>>>
+>>>        trace_migration_socket_incoming_accepted()
+>>>
+>>>
+>>>
+>>>
+>>>
+>>>        qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming")
+>>>
+>>>
+>>> +    qio_channel_set_feature(QIO_CHANNEL(sioc), 
+QIO_CHANNEL_FEATURE_SHUTDOWN)
+>>>
+>>>
+>>>        migration_channel_process_incoming(migrate_get_current(),
+>>>
+>>>
+>>>                                           QIO_CHANNEL(sioc))
+>>>
+>>>
+>>>        object_unref(OBJECT(sioc))
+>>>
+>>>
+>>>
+>>>
+>>> My test will not hang any more.
+>>>
+>>>
+>>>
+>>>
+>>>
+>>>
+>>>
+>>>
+>>>
+>>>
+>>>
+>>>
+>>>
+>>>
+>>>
+>>>
+>>>
+>>> 原始邮件
+>>>
+>>>
+>>>
+>>> 发件人: address@hidden
+>>> 收件人:王广10165992 address@hidden
+>>> 抄送人: address@hidden address@hidden
+>>> 日 期 :2017年03月21日 15:58
+>>> 主 题 :Re: [Qemu-devel]  答复: Re:  [BUG]COLO failover hang
+>>>
+>>>
+>>>
+>>>
+>>>
+>>> Hi,Wang.
+>>>
+>>> You can test this branch:
+>>>
+>>>
+https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk
+>>>
+>>> and please follow wiki ensure your own configuration correctly.
+>>>
+>>>
+http://wiki.qemu-project.org/Features/COLO
+>>>
+>>>
+>>> Thanks
+>>>
+>>> Zhang Chen
+>>>
+>>>
+>>> On 03/21/2017 03:27 PM, address@hidden wrote:
+>>> >
+>>> > hi.
+>>> >
+>>> > I test the git qemu master have the same problem.
+>>> >
+>>> > (gdb) bt
+>>> >
+>>> > #0  qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880,
+>>> > niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461
+>>> >
+>>> > #1  0x00007f658e4aa0c2 in qio_channel_read
+>>> > (address@hidden, address@hidden "",
+>>> > address@hidden, address@hidden) at io/channel.c:114
+>>> >
+>>> > #2  0x00007f658e3ea990 in channel_get_buffer (opaque=<optimized out>,
+>>> > buf=0x7f65907cb838 "", pos=<optimized out>, size=32768) at
+>>> > migration/qemu-file-channel.c:78
+>>> >
+>>> > #3  0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at
+>>> > migration/qemu-file.c:295
+>>> >
+>>> > #4  0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden,
+>>> > address@hidden) at migration/qemu-file.c:555
+>>> >
+>>> > #5  0x00007f658e3ea34b in qemu_get_byte (address@hidden) at
+>>> > migration/qemu-file.c:568
+>>> >
+>>> > #6  0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at
+>>> > migration/qemu-file.c:648
+>>> >
+>>> > #7  0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800,
+>>> > address@hidden) at migration/colo.c:244
+>>> >
+>>> > #8  0x00007f658e3e681e in colo_receive_check_message (f=<optimized
+>>> > out>, address@hidden,
+>>> > address@hidden)
+>>> >
+>>> >     at migration/colo.c:264
+>>> >
+>>> > #9  0x00007f658e3e740e in colo_process_incoming_thread
+>>> > (opaque=0x7f658eb30360 <mis_current.31286>) at migration/colo.c:577
+>>> >
+>>> > #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0
+>>> >
+>>> > #11 0x00007f65881983ed in clone () from /lib64/libc.so.6
+>>> >
+>>> > (gdb) p ioc->name
+>>> >
+>>> > $2 = 0x7f658ff7d5c0 "migration-socket-incoming"
+>>> >
+>>> > (gdb) p ioc->features        Do not support QIO_CHANNEL_FEATURE_SHUTDOWN
+>>> >
+>>> > $3 = 0
+>>> >
+>>> >
+>>> > (gdb) bt
+>>> >
+>>> > #0  socket_accept_incoming_migration (ioc=0x7fdcceeafa90,
+>>> > condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137
+>>> >
+>>> > #1  0x00007fdcc6966350 in g_main_dispatch (context=<optimized out>) at
+>>> > gmain.c:3054
+>>> >
+>>> > #2  g_main_context_dispatch (context=<optimized out>,
+>>> > address@hidden) at gmain.c:3630
+>>> >
+>>> > #3  0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213
+>>> >
+>>> > #4  os_host_main_loop_wait (timeout=<optimized out>) at
+>>> > util/main-loop.c:258
+>>> >
+>>> > #5  main_loop_wait (address@hidden) at
+>>> > util/main-loop.c:506
+>>> >
+>>> > #6  0x00007fdccb526187 in main_loop () at vl.c:1898
+>>> >
+>>> > #7  main (argc=<optimized out>, argv=<optimized out>, envp=<optimized
+>>> > out>) at vl.c:4709
+>>> >
+>>> > (gdb) p ioc->features
+>>> >
+>>> > $1 = 6
+>>> >
+>>> > (gdb) p ioc->name
+>>> >
+>>> > $2 = 0x7fdcce1b1ab0 "migration-socket-listener"
+>>> >
+>>> >
+>>> > May be socket_accept_incoming_migration should
+>>> > call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)??
+>>> >
+>>> >
+>>> > thank you.
+>>> >
+>>> >
+>>> >
+>>> >
+>>> >
+>>> > 原始邮件
+>>> > address@hidden
+>>> > address@hidden
+>>> > address@hidden@huawei.com>
+>>> > *日 期 :*2017年03月16日 14:46
+>>> > *主 题 :**Re: [Qemu-devel] COLO failover hang*
+>>> >
+>>> >
+>>> >
+>>> >
+>>> > On 03/15/2017 05:06 PM, wangguang wrote:
+>>> > >   am testing QEMU COLO feature described here [QEMU
+>>> > > Wiki](
+http://wiki.qemu-project.org/Features/COLO
+).
+>>> > >
+>>> > > When the Primary Node panic,the Secondary Node qemu hang.
+>>> > > hang at recvmsg in qio_channel_socket_readv.
+>>> > > And  I run  { 'execute': 'nbd-server-stop' } and { "execute":
+>>> > > "x-colo-lost-heartbeat" } in Secondary VM's
+>>> > > monitor,the  Secondary Node qemu still hang at recvmsg .
+>>> > >
+>>> > > I found that the colo in qemu is not complete yet.
+>>> > > Do the colo have any plan for development?
+>>> >
+>>> > Yes, We are developing. You can see some of patch we pushing.
+>>> >
+>>> > > Has anyone ever run it successfully? Any help is appreciated!
+>>> >
+>>> > In our internal version can run it successfully,
+>>> > The failover detail you can ask Zhanghailiang for help.
+>>> > Next time if you have some question about COLO,
+>>> > please cc me and zhanghailiang address@hidden
+>>> >
+>>> >
+>>> > Thanks
+>>> > Zhang Chen
+>>> >
+>>> >
+>>> > >
+>>> > >
+>>> > >
+>>> > > centos7.2+qemu2.7.50
+>>> > > (gdb) bt
+>>> > > #0  0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0
+>>> > > #1  0x00007f3e0332b738 in qio_channel_socket_readv (ioc=<optimized out>,
+>>> > > iov=<optimized out>, niov=<optimized out>, fds=0x0, nfds=0x0, errp=0x0) 
+at
+>>> > > io/channel-socket.c:497
+>>> > > #2  0x00007f3e03329472 in qio_channel_read (address@hidden,
+>>> > > address@hidden "", address@hidden,
+>>> > > address@hidden) at io/channel.c:97
+>>> > > #3  0x00007f3e032750e0 in channel_get_buffer (opaque=<optimized out>,
+>>> > > buf=0x7f3e05910f38 "", pos=<optimized out>, size=32768) at
+>>> > > migration/qemu-file-channel.c:78
+>>> > > #4  0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at
+>>> > > migration/qemu-file.c:257
+>>> > > #5  0x00007f3e03274a41 in qemu_peek_byte (address@hidden,
+>>> > > address@hidden) at migration/qemu-file.c:510
+>>> > > #6  0x00007f3e03274aab in qemu_get_byte (address@hidden) at
+>>> > > migration/qemu-file.c:523
+>>> > > #7  0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at
+>>> > > migration/qemu-file.c:603
+>>> > > #8  0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00,
+>>> > > address@hidden) at migration/colo.c:215
+>>> > > #9  0x00007f3e0327250d in colo_wait_handle_message (errp=0x7f3d62bfaa48,
+>>> > > checkpoint_request=<synthetic pointer>, f=<optimized out>) at
+>>> > > migration/colo.c:546
+>>> > > #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at
+>>> > > migration/colo.c:649
+>>> > > #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0
+>>> > > #12 0x00007f3dfc9c03ed in clone () from /lib64/libc..so.6
+>>> > >
+>>> > >
+>>> > >
+>>> > >
+>>> > >
+>>> > > --
+>>> > > View this message in context:
+http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html
+>>> > > Sent from the Developer mailing list archive at Nabble.com.
+>>> > >
+>>> > >
+>>> > >
+>>> > >
+>>> >
+>>> > --
+>>> > Thanks
+>>> > Zhang Chen
+>>> >
+>>> >
+>>> >
+>>> >
+>>> >
+>>>
+>>
+> --
+> Dr. David Alan Gilbert / address@hidden / Manchester, UK
+>
+> .
+>
+
+Hi,
+
+On 2017/3/22 9:42, address@hidden wrote:
+diff --git a/migration/socket.c b/migration/socket.c
+
+
+index 13966f1..d65a0ea 100644
+
+
+--- a/migration/socket.c
+
+
++++ b/migration/socket.c
+
+
+@@ -147,8 +147,9 @@ static gboolean socket_accept_incoming_migration(QIOChannel 
+*ioc,
+
+
+      }
+
+
+
+
+
+      trace_migration_socket_incoming_accepted()
+
+
+
+
+
+      qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming")
+
+
++    qio_channel_set_feature(QIO_CHANNEL(sioc), QIO_CHANNEL_FEATURE_SHUTDOWN)
+
+
+      migration_channel_process_incoming(migrate_get_current(),
+
+
+                                         QIO_CHANNEL(sioc))
+
+
+      object_unref(OBJECT(sioc))
+
+
+
+
+Is this patch ok?
+Yes, i think this works, but a better way maybe to call 
+qio_channel_set_feature()
+in qio_channel_socket_accept(), we didn't set the SHUTDOWN feature for the 
+socket accept fd,
+Or fix it by this:
+
+diff --git a/io/channel-socket.c b/io/channel-socket.c
+index f546c68..ce6894c 100644
+--- a/io/channel-socket.c
++++ b/io/channel-socket.c
+@@ -330,9 +330,8 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
+                           Error **errp)
+ {
+     QIOChannelSocket *cioc;
+-
+-    cioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET));
+-    cioc->fd = -1;
++
++    cioc = qio_channel_socket_new();
+     cioc->remoteAddrLen = sizeof(ioc->remoteAddr);
+     cioc->localAddrLen = sizeof(ioc->localAddr);
+
+
+Thanks,
+Hailiang
+I have test it . The test could not hang any more.
+
+
+
+
+
+
+
+
+
+
+
+
+原始邮件
+
+
+
+发件人: address@hidden
+收件人: address@hidden address@hidden
+抄送人: address@hidden address@hidden address@hidden
+日 期 :2017年03月22日 09:11
+主 题 :Re: [Qemu-devel]  答复: Re:  答复: Re: [BUG]COLO failover hang
+
+
+
+
+
+On 2017/3/21 19:56, Dr. David Alan Gilbert wrote:
+> * Hailiang Zhang (address@hidden) wrote:
+>> Hi,
+>>
+>> Thanks for reporting this, and i confirmed it in my test, and it is a bug.
+>>
+>> Though we tried to call qemu_file_shutdown() to shutdown the related fd, in
+>> case COLO thread/incoming thread is stuck in read/write() while do failover,
+>> but it didn't take effect, because all the fd used by COLO (also migration)
+>> has been wrapped by qio channel, and it will not call the shutdown API if
+>> we didn't qio_channel_set_feature(QIO_CHANNEL(sioc), 
+QIO_CHANNEL_FEATURE_SHUTDOWN).
+>>
+>> Cc: Dr. David Alan Gilbert address@hidden
+>>
+>> I doubted migration cancel has the same problem, it may be stuck in write()
+>> if we tried to cancel migration.
+>>
+>> void fd_start_outgoing_migration(MigrationState *s, const char *fdname, 
+Error **errp)
+>> {
+>>      qio_channel_set_name(QIO_CHANNEL(ioc), "migration-fd-outgoing")
+>>      migration_channel_connect(s, ioc, NULL)
+>>      ... ...
+>> We didn't call qio_channel_set_feature(QIO_CHANNEL(sioc), 
+QIO_CHANNEL_FEATURE_SHUTDOWN) above,
+>> and the
+>> migrate_fd_cancel()
+>> {
+>>   ... ...
+>>      if (s->state == MIGRATION_STATUS_CANCELLING && f) {
+>>          qemu_file_shutdown(f)  --> This will not take effect. No ?
+>>      }
+>> }
+>
+> (cc'd in Daniel Berrange).
+> I see that we call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN) 
+at the
+> top of qio_channel_socket_new  so I think that's safe isn't it?
+>
+
+Hmm, you are right, this problem is only exist for the migration incoming fd, 
+thanks.
+
+> Dave
+>
+>> Thanks,
+>> Hailiang
+>>
+>> On 2017/3/21 16:10, address@hidden wrote:
+>>> Thank you。
+>>>
+>>> I have test aready。
+>>>
+>>> When the Primary Node panic,the Secondary Node qemu hang at the same place。
+>>>
+>>> Incorrding
+http://wiki.qemu-project.org/Features/COLO
+,kill Primary Node 
+qemu will not produce the problem,but Primary Node panic can。
+>>>
+>>> I think due to the feature of channel does not support 
+QIO_CHANNEL_FEATURE_SHUTDOWN.
+>>>
+>>>
+>>> when failover,channel_shutdown could not shut down the channel.
+>>>
+>>>
+>>> so the colo_process_incoming_thread will hang at recvmsg.
+>>>
+>>>
+>>> I test a patch:
+>>>
+>>>
+>>> diff --git a/migration/socket.c b/migration/socket.c
+>>>
+>>>
+>>> index 13966f1..d65a0ea 100644
+>>>
+>>>
+>>> --- a/migration/socket.c
+>>>
+>>>
+>>> +++ b/migration/socket.c
+>>>
+>>>
+>>> @@ -147,8 +147,9 @@ static gboolean 
+socket_accept_incoming_migration(QIOChannel *ioc,
+>>>
+>>>
+>>>        }
+>>>
+>>>
+>>>
+>>>
+>>>
+>>>        trace_migration_socket_incoming_accepted()
+>>>
+>>>
+>>>
+>>>
+>>>
+>>>        qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming")
+>>>
+>>>
+>>> +    qio_channel_set_feature(QIO_CHANNEL(sioc), 
+QIO_CHANNEL_FEATURE_SHUTDOWN)
+>>>
+>>>
+>>>        migration_channel_process_incoming(migrate_get_current(),
+>>>
+>>>
+>>>                                           QIO_CHANNEL(sioc))
+>>>
+>>>
+>>>        object_unref(OBJECT(sioc))
+>>>
+>>>
+>>>
+>>>
+>>> My test will not hang any more.
+>>>
+>>>
+>>>
+>>>
+>>>
+>>>
+>>>
+>>>
+>>>
+>>>
+>>>
+>>>
+>>>
+>>>
+>>>
+>>>
+>>>
+>>> 原始邮件
+>>>
+>>>
+>>>
+>>> 发件人: address@hidden
+>>> 收件人:王广10165992 address@hidden
+>>> 抄送人: address@hidden address@hidden
+>>> 日 期 :2017年03月21日 15:58
+>>> 主 题 :Re: [Qemu-devel]  答复: Re:  [BUG]COLO failover hang
+>>>
+>>>
+>>>
+>>>
+>>>
+>>> Hi,Wang.
+>>>
+>>> You can test this branch:
+>>>
+>>>
+https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk
+>>>
+>>> and please follow wiki ensure your own configuration correctly.
+>>>
+>>>
+http://wiki.qemu-project.org/Features/COLO
+>>>
+>>>
+>>> Thanks
+>>>
+>>> Zhang Chen
+>>>
+>>>
+>>> On 03/21/2017 03:27 PM, address@hidden wrote:
+>>> >
+>>> > hi.
+>>> >
+>>> > I test the git qemu master have the same problem.
+>>> >
+>>> > (gdb) bt
+>>> >
+>>> > #0  qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880,
+>>> > niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461
+>>> >
+>>> > #1  0x00007f658e4aa0c2 in qio_channel_read
+>>> > (address@hidden, address@hidden "",
+>>> > address@hidden, address@hidden) at io/channel.c:114
+>>> >
+>>> > #2  0x00007f658e3ea990 in channel_get_buffer (opaque=<optimized out>,
+>>> > buf=0x7f65907cb838 "", pos=<optimized out>, size=32768) at
+>>> > migration/qemu-file-channel.c:78
+>>> >
+>>> > #3  0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at
+>>> > migration/qemu-file.c:295
+>>> >
+>>> > #4  0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden,
+>>> > address@hidden) at migration/qemu-file.c:555
+>>> >
+>>> > #5  0x00007f658e3ea34b in qemu_get_byte (address@hidden) at
+>>> > migration/qemu-file.c:568
+>>> >
+>>> > #6  0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at
+>>> > migration/qemu-file.c:648
+>>> >
+>>> > #7  0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800,
+>>> > address@hidden) at migration/colo.c:244
+>>> >
+>>> > #8  0x00007f658e3e681e in colo_receive_check_message (f=<optimized
+>>> > out>, address@hidden,
+>>> > address@hidden)
+>>> >
+>>> >     at migration/colo.c:264
+>>> >
+>>> > #9  0x00007f658e3e740e in colo_process_incoming_thread
+>>> > (opaque=0x7f658eb30360 <mis_current.31286>) at migration/colo.c:577
+>>> >
+>>> > #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0
+>>> >
+>>> > #11 0x00007f65881983ed in clone () from /lib64/libc.so.6
+>>> >
+>>> > (gdb) p ioc->name
+>>> >
+>>> > $2 = 0x7f658ff7d5c0 "migration-socket-incoming"
+>>> >
+>>> > (gdb) p ioc->features        Do not support QIO_CHANNEL_FEATURE_SHUTDOWN
+>>> >
+>>> > $3 = 0
+>>> >
+>>> >
+>>> > (gdb) bt
+>>> >
+>>> > #0  socket_accept_incoming_migration (ioc=0x7fdcceeafa90,
+>>> > condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137
+>>> >
+>>> > #1  0x00007fdcc6966350 in g_main_dispatch (context=<optimized out>) at
+>>> > gmain.c:3054
+>>> >
+>>> > #2  g_main_context_dispatch (context=<optimized out>,
+>>> > address@hidden) at gmain.c:3630
+>>> >
+>>> > #3  0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213
+>>> >
+>>> > #4  os_host_main_loop_wait (timeout=<optimized out>) at
+>>> > util/main-loop.c:258
+>>> >
+>>> > #5  main_loop_wait (address@hidden) at
+>>> > util/main-loop.c:506
+>>> >
+>>> > #6  0x00007fdccb526187 in main_loop () at vl.c:1898
+>>> >
+>>> > #7  main (argc=<optimized out>, argv=<optimized out>, envp=<optimized
+>>> > out>) at vl.c:4709
+>>> >
+>>> > (gdb) p ioc->features
+>>> >
+>>> > $1 = 6
+>>> >
+>>> > (gdb) p ioc->name
+>>> >
+>>> > $2 = 0x7fdcce1b1ab0 "migration-socket-listener"
+>>> >
+>>> >
+>>> > May be socket_accept_incoming_migration should
+>>> > call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)??
+>>> >
+>>> >
+>>> > thank you.
+>>> >
+>>> >
+>>> >
+>>> >
+>>> >
+>>> > 原始邮件
+>>> > address@hidden
+>>> > address@hidden
+>>> > address@hidden@huawei.com>
+>>> > *日 期 :*2017年03月16日 14:46
+>>> > *主 题 :**Re: [Qemu-devel] COLO failover hang*
+>>> >
+>>> >
+>>> >
+>>> >
+>>> > On 03/15/2017 05:06 PM, wangguang wrote:
+>>> > >   am testing QEMU COLO feature described here [QEMU
+>>> > > Wiki](
+http://wiki.qemu-project.org/Features/COLO
+).
+>>> > >
+>>> > > When the Primary Node panic,the Secondary Node qemu hang.
+>>> > > hang at recvmsg in qio_channel_socket_readv.
+>>> > > And  I run  { 'execute': 'nbd-server-stop' } and { "execute":
+>>> > > "x-colo-lost-heartbeat" } in Secondary VM's
+>>> > > monitor,the  Secondary Node qemu still hang at recvmsg .
+>>> > >
+>>> > > I found that the colo in qemu is not complete yet.
+>>> > > Do the colo have any plan for development?
+>>> >
+>>> > Yes, We are developing. You can see some of patch we pushing.
+>>> >
+>>> > > Has anyone ever run it successfully? Any help is appreciated!
+>>> >
+>>> > In our internal version can run it successfully,
+>>> > The failover detail you can ask Zhanghailiang for help.
+>>> > Next time if you have some question about COLO,
+>>> > please cc me and zhanghailiang address@hidden
+>>> >
+>>> >
+>>> > Thanks
+>>> > Zhang Chen
+>>> >
+>>> >
+>>> > >
+>>> > >
+>>> > >
+>>> > > centos7.2+qemu2.7.50
+>>> > > (gdb) bt
+>>> > > #0  0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0
+>>> > > #1  0x00007f3e0332b738 in qio_channel_socket_readv (ioc=<optimized out>,
+>>> > > iov=<optimized out>, niov=<optimized out>, fds=0x0, nfds=0x0, errp=0x0) 
+at
+>>> > > io/channel-socket.c:497
+>>> > > #2  0x00007f3e03329472 in qio_channel_read (address@hidden,
+>>> > > address@hidden "", address@hidden,
+>>> > > address@hidden) at io/channel.c:97
+>>> > > #3  0x00007f3e032750e0 in channel_get_buffer (opaque=<optimized out>,
+>>> > > buf=0x7f3e05910f38 "", pos=<optimized out>, size=32768) at
+>>> > > migration/qemu-file-channel.c:78
+>>> > > #4  0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at
+>>> > > migration/qemu-file.c:257
+>>> > > #5  0x00007f3e03274a41 in qemu_peek_byte (address@hidden,
+>>> > > address@hidden) at migration/qemu-file.c:510
+>>> > > #6  0x00007f3e03274aab in qemu_get_byte (address@hidden) at
+>>> > > migration/qemu-file.c:523
+>>> > > #7  0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at
+>>> > > migration/qemu-file.c:603
+>>> > > #8  0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00,
+>>> > > address@hidden) at migration/colo.c:215
+>>> > > #9  0x00007f3e0327250d in colo_wait_handle_message (errp=0x7f3d62bfaa48,
+>>> > > checkpoint_request=<synthetic pointer>, f=<optimized out>) at
+>>> > > migration/colo.c:546
+>>> > > #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at
+>>> > > migration/colo.c:649
+>>> > > #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0
+>>> > > #12 0x00007f3dfc9c03ed in clone () from /lib64/libc..so.6
+>>> > >
+>>> > >
+>>> > >
+>>> > >
+>>> > >
+>>> > > --
+>>> > > View this message in context:
+http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html
+>>> > > Sent from the Developer mailing list archive at Nabble.com.
+>>> > >
+>>> > >
+>>> > >
+>>> > >
+>>> >
+>>> > --
+>>> > Thanks
+>>> > Zhang Chen
+>>> >
+>>> >
+>>> >
+>>> >
+>>> >
+>>>
+>>
+> --
+> Dr. David Alan Gilbert / address@hidden / Manchester, UK
+>
+> .
+>
+
diff --git a/results/classifier/014/peripherals/60339453 b/results/classifier/014/peripherals/60339453
new file mode 100644
index 00000000..a0503d6c
--- /dev/null
+++ b/results/classifier/014/peripherals/60339453
@@ -0,0 +1,88 @@
+peripherals: 0.824
+kernel: 0.805
+register: 0.787
+boot: 0.782
+arm: 0.780
+performance: 0.764
+permissions: 0.750
+TCG: 0.748
+alpha: 0.718
+VMM: 0.712
+risc-v: 0.707
+device: 0.706
+mistranslation: 0.699
+hypervisor: 0.697
+PID: 0.685
+network: 0.682
+vnc: 0.680
+debug: 0.672
+graphic: 0.671
+operating system: 0.670
+KVM: 0.669
+user-level: 0.663
+semantic: 0.662
+architecture: 0.649
+x86: 0.647
+virtual: 0.630
+files: 0.623
+ppc: 0.615
+socket: 0.607
+i386: 0.533
+assembly: 0.486
+
+[BUG] scsi: vmw_pvscsi: Boot hangs during scsi under qemu, post commit e662502b3a78
+
+Hi,
+
+Commit e662502b3a78 ("scsi: vmw_pvscsi: Set correct residual data length"),
+and its backports to stable trees, makes kernel hang during boot, when
+ran as a VM under qemu with following parameters:
+
+  -drive file=$DISKFILE,if=none,id=sda
+  -device pvscsi
+  -device scsi-hd,bus=scsi.0,drive=sda
+
+Diving deeper, commit e662502b3a78
+
+  @@ -585,7 +585,13 @@ static void pvscsi_complete_request(struct 
+pvscsi_adapter *adapter,
+                case BTSTAT_SUCCESS:
+  +                     /*
+  +                      * Commands like INQUIRY may transfer less data than
+  +                      * requested by the initiator via bufflen. Set residual
+  +                      * count to make upper layer aware of the actual amount
+  +                      * of data returned.
+  +                      */
+  +                     scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen);
+
+assumes 'e->dataLen' is properly armed with actual num of bytes
+transferred; alas qemu's hw/scsi/vmw_pvscsi.c never arms the 'dataLen'
+field of the completion descriptor (kept zero).
+
+As a result, the residual count is set as the *entire* 'scsi_bufflen' of a
+good transfer, which makes upper scsi layers repeatedly ignore this
+valid transfer.
+
+Not properly arming 'dataLen' seems as an oversight in qemu, which needs
+to be fixed.
+
+However, since kernels with commit e662502b3a78 (and backports) now fail
+to boot under qemu's "-device pvscsi", a suggested workaround is to set
+the residual count *only* if 'e->dataLen' is armed, e.g:
+
+  @@ -588,7 +588,8 @@ static void pvscsi_complete_request(struct pvscsi_adapter 
+*adapter,
+                           * count to make upper layer aware of the actual 
+amount
+                           * of data returned.
+                           */
+  -                       scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen);
+  +                       if (e->dataLen)
+  +                               scsi_set_resid(cmd, scsi_bufflen(cmd) - 
+e->dataLen);
+
+in order to make kernels boot on old qemu binaries.
+
+Best,
+Shmulik
+