diff options
Diffstat (limited to 'results/classifier/015/unknown/57756589')
| -rw-r--r-- | results/classifier/015/unknown/57756589 | 1448 |
1 files changed, 1448 insertions, 0 deletions
diff --git a/results/classifier/015/unknown/57756589 b/results/classifier/015/unknown/57756589 new file mode 100644 index 000000000..5891931d8 --- /dev/null +++ b/results/classifier/015/unknown/57756589 @@ -0,0 +1,1448 @@ +peripherals: 0.875 +hypervisor: 0.863 +mistranslation: 0.861 +register: 0.858 +architecture: 0.856 +device: 0.853 +vnc: 0.851 +virtual: 0.845 +permissions: 0.842 +assembly: 0.841 +performance: 0.839 +ppc: 0.838 +semantic: 0.835 +operating system: 0.835 +TCG: 0.833 +VMM: 0.833 +arm: 0.828 +boot: 0.827 +user-level: 0.826 +graphic: 0.824 +network: 0.822 +socket: 0.820 +PID: 0.819 +KVM: 0.817 +kernel: 0.817 +files: 0.816 +x86: 0.814 +alpha: 0.810 +debug: 0.803 +i386: 0.782 +risc-v: 0.755 + +[Qemu-devel] 答复: Re: 答复: Re: 答复: Re: [BUG]COLO failover hang + +amost like wikiï¼but panic in Primary Node. + + + + +setp: + +1 + +Primary Node. + +x86_64-softmmu/qemu-system-x86_64 -enable-kvm -boot c -m 2048 -smp 2 -qmp stdio +-vnc :7 -name primary -cpu qemu64,+kvmclock -device piix3-usb-uhci -usb +-usbdevice tablet\ + + -drive +if=virtio,id=colo-disk0,driver=quorum,read-pattern=fifo,vote-threshold=1, + + +children.0.file.filename=/mnt/sdd/pure_IMG/linux/redhat/rhel_6.5_64_2U_ide,children.0.driver=qcow2 + -S \ + + -netdev +tap,id=hn1,vhost=off,script=/etc/qemu-ifup2,downscript=/etc/qemu-ifdown2 \ + + -device e1000,id=e1,netdev=hn1,mac=52:a4:00:12:78:67 \ + + -netdev +tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown \ + + -device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66 \ + + -chardev socket,id=mirror0,host=9.61.1.8,port=9003,server,nowait -chardev +socket,id=compare1,host=9.61.1.8,port=9004,server,nowait \ + + -chardev socket,id=compare0,host=9.61.1.8,port=9001,server,nowait -chardev +socket,id=compare0-0,host=9.61.1.8,port=9001 \ + + -chardev socket,id=compare_out,host=9.61.1.8,port=9005,server,nowait \ + + -chardev socket,id=compare_out0,host=9.61.1.8,port=9005 \ + + -object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0 \ + + -object filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out +-object filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0 \ + + -object +colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0 + +2 Second node: + +x86_64-softmmu/qemu-system-x86_64 -boot c -m 2048 -smp 2 -qmp stdio -vnc :7 +-name secondary -enable-kvm -cpu qemu64,+kvmclock -device piix3-usb-uhci -usb +-usbdevice tablet\ + + -drive +if=none,id=colo-disk0,file.filename=/mnt/sdd/pure_IMG/linux/redhat/rhel_6.5_64_2U_ide,driver=qcow2,node-name=node0 + \ + + -drive +if=virtio,id=active-disk0,driver=replication,mode=secondary,file.driver=qcow2,top-id=active-disk0,file.file.filename=/mnt/ramfstest/active_disk.img,file.backing.driver=qcow2,file.backing.file.filename=/mnt/ramfstest/hidden_disk.img,file.backing.backing=colo-disk0 + \ + + -netdev +tap,id=hn1,vhost=off,script=/etc/qemu-ifup2,downscript=/etc/qemu-ifdown2 \ + + -device e1000,id=e1,netdev=hn1,mac=52:a4:00:12:78:67 \ + + -netdev +tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown \ + + -device e1000,netdev=hn0,mac=52:a4:00:12:78:66 -chardev +socket,id=red0,host=9.61.1.8,port=9003 \ + + -chardev socket,id=red1,host=9.61.1.8,port=9004 \ + + -object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0 \ + + -object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1 \ + + -object filter-rewriter,id=rew0,netdev=hn0,queue=all -incoming tcp:0:8888 + +3 Secondary node: + +{'execute':'qmp_capabilities'} + +{ 'execute': 'nbd-server-start', + + 'arguments': {'addr': {'type': 'inet', 'data': {'host': '9.61.1.7', 'port': +'8889'} } } + +} + +{'execute': 'nbd-server-add', 'arguments': {'device': 'colo-disk0', 'writable': +true } } + +4:Primary Nodeï¼ + +{'execute':'qmp_capabilities'} + + +{ 'execute': 'human-monitor-command', + + 'arguments': {'command-line': 'drive_add -n buddy +driver=replication,mode=primary,file.driver=nbd,file.host=9.61.1.7,file.port=8889,file.export=colo-disk0,node-name=node0'}} + +{ 'execute':'x-blockdev-change', 'arguments':{'parent': 'colo-disk0', 'node': +'node0' } } + +{ 'execute': 'migrate-set-capabilities', + + 'arguments': {'capabilities': [ {'capability': 'x-colo', 'state': true } +] } } + +{ 'execute': 'migrate', 'arguments': {'uri': 'tcp:9.61.1.7:8888' } } + + + + +then can see two runing VMs, whenever you make changes to PVM, SVM will be +synced. + + + + +5ï¼Primary Nodeï¼ + +echo c ï¼ /proc/sysrq-trigger + + + + +ï¼ï¼Secondary node: + +{ 'execute': 'nbd-server-stop' } + +{ "execute": "x-colo-lost-heartbeat" } + + + + +then can see the Secondary node hang at recvmsg recvmsg . + + + + + + + + + + + + +åå§é®ä»¶ + + + +åä»¶äººï¼ address@hidden +æ¶ä»¶äººï¼ç广10165992 address@hidden +æéäººï¼ address@hidden address@hidden +æ¥ æ ï¼2017å¹´03æ21æ¥ 16:27 +主 é¢ ï¼Re: [Qemu-devel] çå¤: Re: çå¤: Re: [BUG]COLO failover hang + + + + + +Hi, + +On 2017/3/21 16:10, address@hidden wrote: +ï¼ Thank youã +ï¼ +ï¼ I have test areadyã +ï¼ +ï¼ When the Primary Node panic,the Secondary Node qemu hang at the same placeã +ï¼ +ï¼ Incorrding +http://wiki.qemu-project.org/Features/COLO +ï¼kill Primary Node qemu +will not produce the problem,but Primary Node panic canã +ï¼ +ï¼ I think due to the feature of channel does not support +QIO_CHANNEL_FEATURE_SHUTDOWN. +ï¼ +ï¼ + +Yes, you are right, when we do failover for primary/secondary VM, we will +shutdown the related +fd in case it is stuck in the read/write fd. + +It seems that you didn't follow the above introduction exactly to do the test. +Could you +share your test procedures ? Especially the commands used in the test. + +Thanks, +Hailiang + +ï¼ when failover,channel_shutdown could not shut down the channel. +ï¼ +ï¼ +ï¼ so the colo_process_incoming_thread will hang at recvmsg. +ï¼ +ï¼ +ï¼ I test a patch: +ï¼ +ï¼ +ï¼ diff --git a/migration/socket.c b/migration/socket.c +ï¼ +ï¼ +ï¼ index 13966f1..d65a0ea 100644 +ï¼ +ï¼ +ï¼ --- a/migration/socket.c +ï¼ +ï¼ +ï¼ +++ b/migration/socket.c +ï¼ +ï¼ +ï¼ @@ -147,8 +147,9 @@ static gboolean +socket_accept_incoming_migration(QIOChannel *ioc, +ï¼ +ï¼ +ï¼ } +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ trace_migration_socket_incoming_accepted() +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") +ï¼ +ï¼ +ï¼ + qio_channel_set_feature(QIO_CHANNEL(sioc), QIO_CHANNEL_FEATURE_SHUTDOWN) +ï¼ +ï¼ +ï¼ migration_channel_process_incoming(migrate_get_current(), +ï¼ +ï¼ +ï¼ QIO_CHANNEL(sioc)) +ï¼ +ï¼ +ï¼ object_unref(OBJECT(sioc)) +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ My test will not hang any more. +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ åå§é®ä»¶ +ï¼ +ï¼ +ï¼ +ï¼ åä»¶äººï¼ address@hidden +ï¼ æ¶ä»¶äººï¼ç广10165992 address@hidden +ï¼ æéäººï¼ address@hidden address@hidden +ï¼ æ¥ æ ï¼2017å¹´03æ21æ¥ 15:58 +ï¼ ä¸» é¢ ï¼Re: [Qemu-devel] çå¤: Re: [BUG]COLO failover hang +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ Hi,Wang. +ï¼ +ï¼ You can test this branch: +ï¼ +ï¼ +https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk +ï¼ +ï¼ and please follow wiki ensure your own configuration correctly. +ï¼ +ï¼ +http://wiki.qemu-project.org/Features/COLO +ï¼ +ï¼ +ï¼ Thanks +ï¼ +ï¼ Zhang Chen +ï¼ +ï¼ +ï¼ On 03/21/2017 03:27 PM, address@hidden wrote: +ï¼ ï¼ +ï¼ ï¼ hi. +ï¼ ï¼ +ï¼ ï¼ I test the git qemu master have the same problem. +ï¼ ï¼ +ï¼ ï¼ (gdb) bt +ï¼ ï¼ +ï¼ ï¼ #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, +ï¼ ï¼ niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 +ï¼ ï¼ +ï¼ ï¼ #1 0x00007f658e4aa0c2 in qio_channel_read +ï¼ ï¼ (address@hidden, address@hidden "", +ï¼ ï¼ address@hidden, address@hidden) at io/channel.c:114 +ï¼ ï¼ +ï¼ ï¼ #2 0x00007f658e3ea990 in channel_get_buffer (opaque=ï¼optimized outï¼, +ï¼ ï¼ buf=0x7f65907cb838 "", pos=ï¼optimized outï¼, size=32768) at +ï¼ ï¼ migration/qemu-file-channel.c:78 +ï¼ ï¼ +ï¼ ï¼ #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at +ï¼ ï¼ migration/qemu-file.c:295 +ï¼ ï¼ +ï¼ ï¼ #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, +ï¼ ï¼ address@hidden) at migration/qemu-file.c:555 +ï¼ ï¼ +ï¼ ï¼ #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at +ï¼ ï¼ migration/qemu-file.c:568 +ï¼ ï¼ +ï¼ ï¼ #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at +ï¼ ï¼ migration/qemu-file.c:648 +ï¼ ï¼ +ï¼ ï¼ #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, +ï¼ ï¼ address@hidden) at migration/colo.c:244 +ï¼ ï¼ +ï¼ ï¼ #8 0x00007f658e3e681e in colo_receive_check_message (f=ï¼optimized +ï¼ ï¼ outï¼, address@hidden, +ï¼ ï¼ address@hidden) +ï¼ ï¼ +ï¼ ï¼ at migration/colo.c:264 +ï¼ ï¼ +ï¼ ï¼ #9 0x00007f658e3e740e in colo_process_incoming_thread +ï¼ ï¼ (opaque=0x7f658eb30360 ï¼mis_current.31286ï¼) at migration/colo.c:577 +ï¼ ï¼ +ï¼ ï¼ #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 +ï¼ ï¼ +ï¼ ï¼ #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 +ï¼ ï¼ +ï¼ ï¼ (gdb) p ioc-ï¼name +ï¼ ï¼ +ï¼ ï¼ $2 = 0x7f658ff7d5c0 "migration-socket-incoming" +ï¼ ï¼ +ï¼ ï¼ (gdb) p ioc-ï¼features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN +ï¼ ï¼ +ï¼ ï¼ $3 = 0 +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ (gdb) bt +ï¼ ï¼ +ï¼ ï¼ #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, +ï¼ ï¼ condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 +ï¼ ï¼ +ï¼ ï¼ #1 0x00007fdcc6966350 in g_main_dispatch (context=ï¼optimized outï¼) at +ï¼ ï¼ gmain.c:3054 +ï¼ ï¼ +ï¼ ï¼ #2 g_main_context_dispatch (context=ï¼optimized outï¼, +ï¼ ï¼ address@hidden) at gmain.c:3630 +ï¼ ï¼ +ï¼ ï¼ #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 +ï¼ ï¼ +ï¼ ï¼ #4 os_host_main_loop_wait (timeout=ï¼optimized outï¼) at +ï¼ ï¼ util/main-loop.c:258 +ï¼ ï¼ +ï¼ ï¼ #5 main_loop_wait (address@hidden) at +ï¼ ï¼ util/main-loop.c:506 +ï¼ ï¼ +ï¼ ï¼ #6 0x00007fdccb526187 in main_loop () at vl.c:1898 +ï¼ ï¼ +ï¼ ï¼ #7 main (argc=ï¼optimized outï¼, argv=ï¼optimized outï¼, envp=ï¼optimized +ï¼ ï¼ outï¼) at vl.c:4709 +ï¼ ï¼ +ï¼ ï¼ (gdb) p ioc-ï¼features +ï¼ ï¼ +ï¼ ï¼ $1 = 6 +ï¼ ï¼ +ï¼ ï¼ (gdb) p ioc-ï¼name +ï¼ ï¼ +ï¼ ï¼ $2 = 0x7fdcce1b1ab0 "migration-socket-listener" +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ May be socket_accept_incoming_migration should +ï¼ ï¼ call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ thank you. +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ åå§é®ä»¶ +ï¼ ï¼ address@hidden +ï¼ ï¼ address@hidden +ï¼ ï¼ address@hidden@huawei.comï¼ +ï¼ ï¼ *æ¥ æ ï¼*2017å¹´03æ16æ¥ 14:46 +ï¼ ï¼ *主 é¢ ï¼**Re: [Qemu-devel] COLO failover hang* +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ On 03/15/2017 05:06 PM, wangguang wrote: +ï¼ ï¼ ï¼ am testing QEMU COLO feature described here [QEMU +ï¼ ï¼ ï¼ Wiki]( +http://wiki.qemu-project.org/Features/COLO +). +ï¼ ï¼ ï¼ +ï¼ ï¼ ï¼ When the Primary Node panic,the Secondary Node qemu hang. +ï¼ ï¼ ï¼ hang at recvmsg in qio_channel_socket_readv. +ï¼ ï¼ ï¼ And I run { 'execute': 'nbd-server-stop' } and { "execute": +ï¼ ï¼ ï¼ "x-colo-lost-heartbeat" } in Secondary VM's +ï¼ ï¼ ï¼ monitor,the Secondary Node qemu still hang at recvmsg . +ï¼ ï¼ ï¼ +ï¼ ï¼ ï¼ I found that the colo in qemu is not complete yet. +ï¼ ï¼ ï¼ Do the colo have any plan for development? +ï¼ ï¼ +ï¼ ï¼ Yes, We are developing. You can see some of patch we pushing. +ï¼ ï¼ +ï¼ ï¼ ï¼ Has anyone ever run it successfully? Any help is appreciated! +ï¼ ï¼ +ï¼ ï¼ In our internal version can run it successfully, +ï¼ ï¼ The failover detail you can ask Zhanghailiang for help. +ï¼ ï¼ Next time if you have some question about COLO, +ï¼ ï¼ please cc me and zhanghailiang address@hidden +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ Thanks +ï¼ ï¼ Zhang Chen +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ ï¼ +ï¼ ï¼ ï¼ +ï¼ ï¼ ï¼ +ï¼ ï¼ ï¼ centos7.2+qemu2.7.50 +ï¼ ï¼ ï¼ (gdb) bt +ï¼ ï¼ ï¼ #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 +ï¼ ï¼ ï¼ #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=ï¼optimized outï¼, +ï¼ ï¼ ï¼ iov=ï¼optimized outï¼, niov=ï¼optimized outï¼, fds=0x0, nfds=0x0, errp=0x0) at +ï¼ ï¼ ï¼ io/channel-socket.c:497 +ï¼ ï¼ ï¼ #2 0x00007f3e03329472 in qio_channel_read (address@hidden, +ï¼ ï¼ ï¼ address@hidden "", address@hidden, +ï¼ ï¼ ï¼ address@hidden) at io/channel.c:97 +ï¼ ï¼ ï¼ #3 0x00007f3e032750e0 in channel_get_buffer (opaque=ï¼optimized outï¼, +ï¼ ï¼ ï¼ buf=0x7f3e05910f38 "", pos=ï¼optimized outï¼, size=32768) at +ï¼ ï¼ ï¼ migration/qemu-file-channel.c:78 +ï¼ ï¼ ï¼ #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at +ï¼ ï¼ ï¼ migration/qemu-file.c:257 +ï¼ ï¼ ï¼ #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, +ï¼ ï¼ ï¼ address@hidden) at migration/qemu-file.c:510 +ï¼ ï¼ ï¼ #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at +ï¼ ï¼ ï¼ migration/qemu-file.c:523 +ï¼ ï¼ ï¼ #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at +ï¼ ï¼ ï¼ migration/qemu-file.c:603 +ï¼ ï¼ ï¼ #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, +ï¼ ï¼ ï¼ address@hidden) at migration/colo..c:215 +ï¼ ï¼ ï¼ #9 0x00007f3e0327250d in colo_wait_handle_message (errp=0x7f3d62bfaa48, +ï¼ ï¼ ï¼ checkpoint_request=ï¼synthetic pointerï¼, f=ï¼optimized outï¼) at +ï¼ ï¼ ï¼ migration/colo.c:546 +ï¼ ï¼ ï¼ #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at +ï¼ ï¼ ï¼ migration/colo.c:649 +ï¼ ï¼ ï¼ #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 +ï¼ ï¼ ï¼ #12 0x00007f3dfc9c03ed in clone () from /lib64/libc.so.6 +ï¼ ï¼ ï¼ +ï¼ ï¼ ï¼ +ï¼ ï¼ ï¼ +ï¼ ï¼ ï¼ +ï¼ ï¼ ï¼ +ï¼ ï¼ ï¼ -- +ï¼ ï¼ ï¼ View this message in context: +http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html +ï¼ ï¼ ï¼ Sent from the Developer mailing list archive at Nabble.com. +ï¼ ï¼ ï¼ +ï¼ ï¼ ï¼ +ï¼ ï¼ ï¼ +ï¼ ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ -- +ï¼ ï¼ Thanks +ï¼ ï¼ Zhang Chen +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ + +diff --git a/migration/socket.c b/migration/socket.c + + +index 13966f1..d65a0ea 100644 + + +--- a/migration/socket.c + + ++++ b/migration/socket.c + + +@@ -147,8 +147,9 @@ static gboolean socket_accept_incoming_migration(QIOChannel +*ioc, + + + } + + + + + + trace_migration_socket_incoming_accepted() + + + + + + qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") + + ++ qio_channel_set_feature(QIO_CHANNEL(sioc), QIO_CHANNEL_FEATURE_SHUTDOWN) + + + migration_channel_process_incoming(migrate_get_current(), + + + QIO_CHANNEL(sioc)) + + + object_unref(OBJECT(sioc)) + + + + +Is this patch ok? + +I have test it . The test could not hang any more. + + + + + + + + + + + + +åå§é®ä»¶ + + + +åä»¶äººï¼ address@hidden +æ¶ä»¶äººï¼ address@hidden address@hidden +æéäººï¼ address@hidden address@hidden address@hidden +æ¥ æ ï¼2017å¹´03æ22æ¥ 09:11 +主 é¢ ï¼Re: [Qemu-devel] çå¤: Re: çå¤: Re: [BUG]COLO failover hang + + + + + +On 2017/3/21 19:56, Dr. David Alan Gilbert wrote: +ï¼ * Hailiang Zhang (address@hidden) wrote: +ï¼ï¼ Hi, +ï¼ï¼ +ï¼ï¼ Thanks for reporting this, and i confirmed it in my test, and it is a bug. +ï¼ï¼ +ï¼ï¼ Though we tried to call qemu_file_shutdown() to shutdown the related fd, in +ï¼ï¼ case COLO thread/incoming thread is stuck in read/write() while do failover, +ï¼ï¼ but it didn't take effect, because all the fd used by COLO (also migration) +ï¼ï¼ has been wrapped by qio channel, and it will not call the shutdown API if +ï¼ï¼ we didn't qio_channel_set_feature(QIO_CHANNEL(sioc), +QIO_CHANNEL_FEATURE_SHUTDOWN). +ï¼ï¼ +ï¼ï¼ Cc: Dr. David Alan Gilbert address@hidden +ï¼ï¼ +ï¼ï¼ I doubted migration cancel has the same problem, it may be stuck in write() +ï¼ï¼ if we tried to cancel migration. +ï¼ï¼ +ï¼ï¼ void fd_start_outgoing_migration(MigrationState *s, const char *fdname, +Error **errp) +ï¼ï¼ { +ï¼ï¼ qio_channel_set_name(QIO_CHANNEL(ioc), "migration-fd-outgoing") +ï¼ï¼ migration_channel_connect(s, ioc, NULL) +ï¼ï¼ ... ... +ï¼ï¼ We didn't call qio_channel_set_feature(QIO_CHANNEL(sioc), +QIO_CHANNEL_FEATURE_SHUTDOWN) above, +ï¼ï¼ and the +ï¼ï¼ migrate_fd_cancel() +ï¼ï¼ { +ï¼ï¼ ... ... +ï¼ï¼ if (s-ï¼state == MIGRATION_STATUS_CANCELLING && f) { +ï¼ï¼ qemu_file_shutdown(f) --ï¼ This will not take effect. No ? +ï¼ï¼ } +ï¼ï¼ } +ï¼ +ï¼ (cc'd in Daniel Berrange). +ï¼ I see that we call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN) +at the +ï¼ top of qio_channel_socket_new so I think that's safe isn't it? +ï¼ + +Hmm, you are right, this problem is only exist for the migration incoming fd, +thanks. + +ï¼ Dave +ï¼ +ï¼ï¼ Thanks, +ï¼ï¼ Hailiang +ï¼ï¼ +ï¼ï¼ On 2017/3/21 16:10, address@hidden wrote: +ï¼ï¼ï¼ Thank youã +ï¼ï¼ï¼ +ï¼ï¼ï¼ I have test areadyã +ï¼ï¼ï¼ +ï¼ï¼ï¼ When the Primary Node panic,the Secondary Node qemu hang at the same placeã +ï¼ï¼ï¼ +ï¼ï¼ï¼ Incorrding +http://wiki.qemu-project.org/Features/COLO +ï¼kill Primary Node +qemu will not produce the problem,but Primary Node panic canã +ï¼ï¼ï¼ +ï¼ï¼ï¼ I think due to the feature of channel does not support +QIO_CHANNEL_FEATURE_SHUTDOWN. +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ when failover,channel_shutdown could not shut down the channel. +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ so the colo_process_incoming_thread will hang at recvmsg. +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ I test a patch: +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ diff --git a/migration/socket.c b/migration/socket.c +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ index 13966f1..d65a0ea 100644 +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ --- a/migration/socket.c +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +++ b/migration/socket.c +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ @@ -147,8 +147,9 @@ static gboolean +socket_accept_incoming_migration(QIOChannel *ioc, +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ } +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ trace_migration_socket_incoming_accepted() +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ + qio_channel_set_feature(QIO_CHANNEL(sioc), +QIO_CHANNEL_FEATURE_SHUTDOWN) +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ migration_channel_process_incoming(migrate_get_current(), +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ QIO_CHANNEL(sioc)) +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ object_unref(OBJECT(sioc)) +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ My test will not hang any more. +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ åå§é®ä»¶ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ åä»¶äººï¼ address@hidden +ï¼ï¼ï¼ æ¶ä»¶äººï¼ç广10165992 address@hidden +ï¼ï¼ï¼ æéäººï¼ address@hidden address@hidden +ï¼ï¼ï¼ æ¥ æ ï¼2017å¹´03æ21æ¥ 15:58 +ï¼ï¼ï¼ 主 é¢ ï¼Re: [Qemu-devel] çå¤: Re: [BUG]COLO failover hang +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ Hi,Wang. +ï¼ï¼ï¼ +ï¼ï¼ï¼ You can test this branch: +ï¼ï¼ï¼ +ï¼ï¼ï¼ +https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk +ï¼ï¼ï¼ +ï¼ï¼ï¼ and please follow wiki ensure your own configuration correctly. +ï¼ï¼ï¼ +ï¼ï¼ï¼ +http://wiki.qemu-project.org/Features/COLO +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ Thanks +ï¼ï¼ï¼ +ï¼ï¼ï¼ Zhang Chen +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ On 03/21/2017 03:27 PM, address@hidden wrote: +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ hi. +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ I test the git qemu master have the same problem. +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ (gdb) bt +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, +ï¼ï¼ï¼ ï¼ niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #1 0x00007f658e4aa0c2 in qio_channel_read +ï¼ï¼ï¼ ï¼ (address@hidden, address@hidden "", +ï¼ï¼ï¼ ï¼ address@hidden, address@hidden) at io/channel.c:114 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #2 0x00007f658e3ea990 in channel_get_buffer (opaque=ï¼optimized outï¼, +ï¼ï¼ï¼ ï¼ buf=0x7f65907cb838 "", pos=ï¼optimized outï¼, size=32768) at +ï¼ï¼ï¼ ï¼ migration/qemu-file-channel.c:78 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at +ï¼ï¼ï¼ ï¼ migration/qemu-file.c:295 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, +ï¼ï¼ï¼ ï¼ address@hidden) at migration/qemu-file.c:555 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at +ï¼ï¼ï¼ ï¼ migration/qemu-file.c:568 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at +ï¼ï¼ï¼ ï¼ migration/qemu-file.c:648 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, +ï¼ï¼ï¼ ï¼ address@hidden) at migration/colo.c:244 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #8 0x00007f658e3e681e in colo_receive_check_message (f=ï¼optimized +ï¼ï¼ï¼ ï¼ outï¼, address@hidden, +ï¼ï¼ï¼ ï¼ address@hidden) +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ at migration/colo.c:264 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #9 0x00007f658e3e740e in colo_process_incoming_thread +ï¼ï¼ï¼ ï¼ (opaque=0x7f658eb30360 ï¼mis_current.31286ï¼) at migration/colo.c:577 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ (gdb) p ioc-ï¼name +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ $2 = 0x7f658ff7d5c0 "migration-socket-incoming" +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ (gdb) p ioc-ï¼features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ $3 = 0 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ (gdb) bt +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, +ï¼ï¼ï¼ ï¼ condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #1 0x00007fdcc6966350 in g_main_dispatch (context=ï¼optimized outï¼) at +ï¼ï¼ï¼ ï¼ gmain.c:3054 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #2 g_main_context_dispatch (context=ï¼optimized outï¼, +ï¼ï¼ï¼ ï¼ address@hidden) at gmain.c:3630 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #4 os_host_main_loop_wait (timeout=ï¼optimized outï¼) at +ï¼ï¼ï¼ ï¼ util/main-loop.c:258 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #5 main_loop_wait (address@hidden) at +ï¼ï¼ï¼ ï¼ util/main-loop.c:506 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #6 0x00007fdccb526187 in main_loop () at vl.c:1898 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #7 main (argc=ï¼optimized outï¼, argv=ï¼optimized outï¼, envp=ï¼optimized +ï¼ï¼ï¼ ï¼ outï¼) at vl.c:4709 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ (gdb) p ioc-ï¼features +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ $1 = 6 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ (gdb) p ioc-ï¼name +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ $2 = 0x7fdcce1b1ab0 "migration-socket-listener" +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ May be socket_accept_incoming_migration should +ï¼ï¼ï¼ ï¼ call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ thank you. +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ åå§é®ä»¶ +ï¼ï¼ï¼ ï¼ address@hidden +ï¼ï¼ï¼ ï¼ address@hidden +ï¼ï¼ï¼ ï¼ address@hidden@huawei.comï¼ +ï¼ï¼ï¼ ï¼ *æ¥ æ ï¼*2017å¹´03æ16æ¥ 14:46 +ï¼ï¼ï¼ ï¼ *主 é¢ ï¼**Re: [Qemu-devel] COLO failover hang* +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ On 03/15/2017 05:06 PM, wangguang wrote: +ï¼ï¼ï¼ ï¼ ï¼ am testing QEMU COLO feature described here [QEMU +ï¼ï¼ï¼ ï¼ ï¼ Wiki]( +http://wiki.qemu-project.org/Features/COLO +). +ï¼ï¼ï¼ ï¼ ï¼ +ï¼ï¼ï¼ ï¼ ï¼ When the Primary Node panic,the Secondary Node qemu hang. +ï¼ï¼ï¼ ï¼ ï¼ hang at recvmsg in qio_channel_socket_readv. +ï¼ï¼ï¼ ï¼ ï¼ And I run { 'execute': 'nbd-server-stop' } and { "execute": +ï¼ï¼ï¼ ï¼ ï¼ "x-colo-lost-heartbeat" } in Secondary VM's +ï¼ï¼ï¼ ï¼ ï¼ monitor,the Secondary Node qemu still hang at recvmsg . +ï¼ï¼ï¼ ï¼ ï¼ +ï¼ï¼ï¼ ï¼ ï¼ I found that the colo in qemu is not complete yet. +ï¼ï¼ï¼ ï¼ ï¼ Do the colo have any plan for development? +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ Yes, We are developing. You can see some of patch we pushing. +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ ï¼ Has anyone ever run it successfully? Any help is appreciated! +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ In our internal version can run it successfully, +ï¼ï¼ï¼ ï¼ The failover detail you can ask Zhanghailiang for help. +ï¼ï¼ï¼ ï¼ Next time if you have some question about COLO, +ï¼ï¼ï¼ ï¼ please cc me and zhanghailiang address@hidden +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ Thanks +ï¼ï¼ï¼ ï¼ Zhang Chen +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ ï¼ +ï¼ï¼ï¼ ï¼ ï¼ +ï¼ï¼ï¼ ï¼ ï¼ +ï¼ï¼ï¼ ï¼ ï¼ centos7.2+qemu2.7.50 +ï¼ï¼ï¼ ï¼ ï¼ (gdb) bt +ï¼ï¼ï¼ ï¼ ï¼ #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 +ï¼ï¼ï¼ ï¼ ï¼ #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=ï¼optimized outï¼, +ï¼ï¼ï¼ ï¼ ï¼ iov=ï¼optimized outï¼, niov=ï¼optimized outï¼, fds=0x0, nfds=0x0, errp=0x0) +at +ï¼ï¼ï¼ ï¼ ï¼ io/channel-socket.c:497 +ï¼ï¼ï¼ ï¼ ï¼ #2 0x00007f3e03329472 in qio_channel_read (address@hidden, +ï¼ï¼ï¼ ï¼ ï¼ address@hidden "", address@hidden, +ï¼ï¼ï¼ ï¼ ï¼ address@hidden) at io/channel.c:97 +ï¼ï¼ï¼ ï¼ ï¼ #3 0x00007f3e032750e0 in channel_get_buffer (opaque=ï¼optimized outï¼, +ï¼ï¼ï¼ ï¼ ï¼ buf=0x7f3e05910f38 "", pos=ï¼optimized outï¼, size=32768) at +ï¼ï¼ï¼ ï¼ ï¼ migration/qemu-file-channel.c:78 +ï¼ï¼ï¼ ï¼ ï¼ #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at +ï¼ï¼ï¼ ï¼ ï¼ migration/qemu-file.c:257 +ï¼ï¼ï¼ ï¼ ï¼ #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, +ï¼ï¼ï¼ ï¼ ï¼ address@hidden) at migration/qemu-file.c:510 +ï¼ï¼ï¼ ï¼ ï¼ #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at +ï¼ï¼ï¼ ï¼ ï¼ migration/qemu-file.c:523 +ï¼ï¼ï¼ ï¼ ï¼ #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at +ï¼ï¼ï¼ ï¼ ï¼ migration/qemu-file.c:603 +ï¼ï¼ï¼ ï¼ ï¼ #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, +ï¼ï¼ï¼ ï¼ ï¼ address@hidden) at migration/colo.c:215 +ï¼ï¼ï¼ ï¼ ï¼ #9 0x00007f3e0327250d in colo_wait_handle_message (errp=0x7f3d62bfaa48, +ï¼ï¼ï¼ ï¼ ï¼ checkpoint_request=ï¼synthetic pointerï¼, f=ï¼optimized outï¼) at +ï¼ï¼ï¼ ï¼ ï¼ migration/colo.c:546 +ï¼ï¼ï¼ ï¼ ï¼ #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at +ï¼ï¼ï¼ ï¼ ï¼ migration/colo.c:649 +ï¼ï¼ï¼ ï¼ ï¼ #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 +ï¼ï¼ï¼ ï¼ ï¼ #12 0x00007f3dfc9c03ed in clone () from /lib64/libc..so.6 +ï¼ï¼ï¼ ï¼ ï¼ +ï¼ï¼ï¼ ï¼ ï¼ +ï¼ï¼ï¼ ï¼ ï¼ +ï¼ï¼ï¼ ï¼ ï¼ +ï¼ï¼ï¼ ï¼ ï¼ +ï¼ï¼ï¼ ï¼ ï¼ -- +ï¼ï¼ï¼ ï¼ ï¼ View this message in context: +http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html +ï¼ï¼ï¼ ï¼ ï¼ Sent from the Developer mailing list archive at Nabble.com. +ï¼ï¼ï¼ ï¼ ï¼ +ï¼ï¼ï¼ ï¼ ï¼ +ï¼ï¼ï¼ ï¼ ï¼ +ï¼ï¼ï¼ ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ -- +ï¼ï¼ï¼ ï¼ Thanks +ï¼ï¼ï¼ ï¼ Zhang Chen +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ +ï¼ï¼ +ï¼ -- +ï¼ Dr. David Alan Gilbert / address@hidden / Manchester, UK +ï¼ +ï¼ . +ï¼ + +Hi, + +On 2017/3/22 9:42, address@hidden wrote: +diff --git a/migration/socket.c b/migration/socket.c + + +index 13966f1..d65a0ea 100644 + + +--- a/migration/socket.c + + ++++ b/migration/socket.c + + +@@ -147,8 +147,9 @@ static gboolean socket_accept_incoming_migration(QIOChannel +*ioc, + + + } + + + + + + trace_migration_socket_incoming_accepted() + + + + + + qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") + + ++ qio_channel_set_feature(QIO_CHANNEL(sioc), QIO_CHANNEL_FEATURE_SHUTDOWN) + + + migration_channel_process_incoming(migrate_get_current(), + + + QIO_CHANNEL(sioc)) + + + object_unref(OBJECT(sioc)) + + + + +Is this patch ok? +Yes, i think this works, but a better way maybe to call +qio_channel_set_feature() +in qio_channel_socket_accept(), we didn't set the SHUTDOWN feature for the +socket accept fd, +Or fix it by this: + +diff --git a/io/channel-socket.c b/io/channel-socket.c +index f546c68..ce6894c 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -330,9 +330,8 @@ qio_channel_socket_accept(QIOChannelSocket *ioc, + Error **errp) + { + QIOChannelSocket *cioc; +- +- cioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET)); +- cioc->fd = -1; ++ ++ cioc = qio_channel_socket_new(); + cioc->remoteAddrLen = sizeof(ioc->remoteAddr); + cioc->localAddrLen = sizeof(ioc->localAddr); + + +Thanks, +Hailiang +I have test it . The test could not hang any more. + + + + + + + + + + + + +åå§é®ä»¶ + + + +åä»¶äººï¼ address@hidden +æ¶ä»¶äººï¼ address@hidden address@hidden +æéäººï¼ address@hidden address@hidden address@hidden +æ¥ æ ï¼2017å¹´03æ22æ¥ 09:11 +主 é¢ ï¼Re: [Qemu-devel] çå¤: Re: çå¤: Re: [BUG]COLO failover hang + + + + + +On 2017/3/21 19:56, Dr. David Alan Gilbert wrote: +ï¼ * Hailiang Zhang (address@hidden) wrote: +ï¼ï¼ Hi, +ï¼ï¼ +ï¼ï¼ Thanks for reporting this, and i confirmed it in my test, and it is a bug. +ï¼ï¼ +ï¼ï¼ Though we tried to call qemu_file_shutdown() to shutdown the related fd, in +ï¼ï¼ case COLO thread/incoming thread is stuck in read/write() while do failover, +ï¼ï¼ but it didn't take effect, because all the fd used by COLO (also migration) +ï¼ï¼ has been wrapped by qio channel, and it will not call the shutdown API if +ï¼ï¼ we didn't qio_channel_set_feature(QIO_CHANNEL(sioc), +QIO_CHANNEL_FEATURE_SHUTDOWN). +ï¼ï¼ +ï¼ï¼ Cc: Dr. David Alan Gilbert address@hidden +ï¼ï¼ +ï¼ï¼ I doubted migration cancel has the same problem, it may be stuck in write() +ï¼ï¼ if we tried to cancel migration. +ï¼ï¼ +ï¼ï¼ void fd_start_outgoing_migration(MigrationState *s, const char *fdname, +Error **errp) +ï¼ï¼ { +ï¼ï¼ qio_channel_set_name(QIO_CHANNEL(ioc), "migration-fd-outgoing") +ï¼ï¼ migration_channel_connect(s, ioc, NULL) +ï¼ï¼ ... ... +ï¼ï¼ We didn't call qio_channel_set_feature(QIO_CHANNEL(sioc), +QIO_CHANNEL_FEATURE_SHUTDOWN) above, +ï¼ï¼ and the +ï¼ï¼ migrate_fd_cancel() +ï¼ï¼ { +ï¼ï¼ ... ... +ï¼ï¼ if (s-ï¼state == MIGRATION_STATUS_CANCELLING && f) { +ï¼ï¼ qemu_file_shutdown(f) --ï¼ This will not take effect. No ? +ï¼ï¼ } +ï¼ï¼ } +ï¼ +ï¼ (cc'd in Daniel Berrange). +ï¼ I see that we call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN) +at the +ï¼ top of qio_channel_socket_new so I think that's safe isn't it? +ï¼ + +Hmm, you are right, this problem is only exist for the migration incoming fd, +thanks. + +ï¼ Dave +ï¼ +ï¼ï¼ Thanks, +ï¼ï¼ Hailiang +ï¼ï¼ +ï¼ï¼ On 2017/3/21 16:10, address@hidden wrote: +ï¼ï¼ï¼ Thank youã +ï¼ï¼ï¼ +ï¼ï¼ï¼ I have test areadyã +ï¼ï¼ï¼ +ï¼ï¼ï¼ When the Primary Node panic,the Secondary Node qemu hang at the same placeã +ï¼ï¼ï¼ +ï¼ï¼ï¼ Incorrding +http://wiki.qemu-project.org/Features/COLO +ï¼kill Primary Node +qemu will not produce the problem,but Primary Node panic canã +ï¼ï¼ï¼ +ï¼ï¼ï¼ I think due to the feature of channel does not support +QIO_CHANNEL_FEATURE_SHUTDOWN. +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ when failover,channel_shutdown could not shut down the channel. +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ so the colo_process_incoming_thread will hang at recvmsg. +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ I test a patch: +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ diff --git a/migration/socket.c b/migration/socket.c +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ index 13966f1..d65a0ea 100644 +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ --- a/migration/socket.c +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +++ b/migration/socket.c +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ @@ -147,8 +147,9 @@ static gboolean +socket_accept_incoming_migration(QIOChannel *ioc, +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ } +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ trace_migration_socket_incoming_accepted() +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ + qio_channel_set_feature(QIO_CHANNEL(sioc), +QIO_CHANNEL_FEATURE_SHUTDOWN) +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ migration_channel_process_incoming(migrate_get_current(), +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ QIO_CHANNEL(sioc)) +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ object_unref(OBJECT(sioc)) +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ My test will not hang any more. +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ åå§é®ä»¶ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ åä»¶äººï¼ address@hidden +ï¼ï¼ï¼ æ¶ä»¶äººï¼ç广10165992 address@hidden +ï¼ï¼ï¼ æéäººï¼ address@hidden address@hidden +ï¼ï¼ï¼ æ¥ æ ï¼2017å¹´03æ21æ¥ 15:58 +ï¼ï¼ï¼ 主 é¢ ï¼Re: [Qemu-devel] çå¤: Re: [BUG]COLO failover hang +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ Hi,Wang. +ï¼ï¼ï¼ +ï¼ï¼ï¼ You can test this branch: +ï¼ï¼ï¼ +ï¼ï¼ï¼ +https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk +ï¼ï¼ï¼ +ï¼ï¼ï¼ and please follow wiki ensure your own configuration correctly. +ï¼ï¼ï¼ +ï¼ï¼ï¼ +http://wiki.qemu-project.org/Features/COLO +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ Thanks +ï¼ï¼ï¼ +ï¼ï¼ï¼ Zhang Chen +ï¼ï¼ï¼ +ï¼ï¼ï¼ +ï¼ï¼ï¼ On 03/21/2017 03:27 PM, address@hidden wrote: +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ hi. +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ I test the git qemu master have the same problem. +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ (gdb) bt +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, +ï¼ï¼ï¼ ï¼ niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #1 0x00007f658e4aa0c2 in qio_channel_read +ï¼ï¼ï¼ ï¼ (address@hidden, address@hidden "", +ï¼ï¼ï¼ ï¼ address@hidden, address@hidden) at io/channel.c:114 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #2 0x00007f658e3ea990 in channel_get_buffer (opaque=ï¼optimized outï¼, +ï¼ï¼ï¼ ï¼ buf=0x7f65907cb838 "", pos=ï¼optimized outï¼, size=32768) at +ï¼ï¼ï¼ ï¼ migration/qemu-file-channel.c:78 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at +ï¼ï¼ï¼ ï¼ migration/qemu-file.c:295 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, +ï¼ï¼ï¼ ï¼ address@hidden) at migration/qemu-file.c:555 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at +ï¼ï¼ï¼ ï¼ migration/qemu-file.c:568 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at +ï¼ï¼ï¼ ï¼ migration/qemu-file.c:648 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, +ï¼ï¼ï¼ ï¼ address@hidden) at migration/colo.c:244 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #8 0x00007f658e3e681e in colo_receive_check_message (f=ï¼optimized +ï¼ï¼ï¼ ï¼ outï¼, address@hidden, +ï¼ï¼ï¼ ï¼ address@hidden) +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ at migration/colo.c:264 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #9 0x00007f658e3e740e in colo_process_incoming_thread +ï¼ï¼ï¼ ï¼ (opaque=0x7f658eb30360 ï¼mis_current.31286ï¼) at migration/colo.c:577 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ (gdb) p ioc-ï¼name +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ $2 = 0x7f658ff7d5c0 "migration-socket-incoming" +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ (gdb) p ioc-ï¼features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ $3 = 0 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ (gdb) bt +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, +ï¼ï¼ï¼ ï¼ condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #1 0x00007fdcc6966350 in g_main_dispatch (context=ï¼optimized outï¼) at +ï¼ï¼ï¼ ï¼ gmain.c:3054 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #2 g_main_context_dispatch (context=ï¼optimized outï¼, +ï¼ï¼ï¼ ï¼ address@hidden) at gmain.c:3630 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #4 os_host_main_loop_wait (timeout=ï¼optimized outï¼) at +ï¼ï¼ï¼ ï¼ util/main-loop.c:258 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #5 main_loop_wait (address@hidden) at +ï¼ï¼ï¼ ï¼ util/main-loop.c:506 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #6 0x00007fdccb526187 in main_loop () at vl.c:1898 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ #7 main (argc=ï¼optimized outï¼, argv=ï¼optimized outï¼, envp=ï¼optimized +ï¼ï¼ï¼ ï¼ outï¼) at vl.c:4709 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ (gdb) p ioc-ï¼features +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ $1 = 6 +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ (gdb) p ioc-ï¼name +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ $2 = 0x7fdcce1b1ab0 "migration-socket-listener" +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ May be socket_accept_incoming_migration should +ï¼ï¼ï¼ ï¼ call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ thank you. +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ åå§é®ä»¶ +ï¼ï¼ï¼ ï¼ address@hidden +ï¼ï¼ï¼ ï¼ address@hidden +ï¼ï¼ï¼ ï¼ address@hidden@huawei.comï¼ +ï¼ï¼ï¼ ï¼ *æ¥ æ ï¼*2017å¹´03æ16æ¥ 14:46 +ï¼ï¼ï¼ ï¼ *主 é¢ ï¼**Re: [Qemu-devel] COLO failover hang* +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ On 03/15/2017 05:06 PM, wangguang wrote: +ï¼ï¼ï¼ ï¼ ï¼ am testing QEMU COLO feature described here [QEMU +ï¼ï¼ï¼ ï¼ ï¼ Wiki]( +http://wiki.qemu-project.org/Features/COLO +). +ï¼ï¼ï¼ ï¼ ï¼ +ï¼ï¼ï¼ ï¼ ï¼ When the Primary Node panic,the Secondary Node qemu hang. +ï¼ï¼ï¼ ï¼ ï¼ hang at recvmsg in qio_channel_socket_readv. +ï¼ï¼ï¼ ï¼ ï¼ And I run { 'execute': 'nbd-server-stop' } and { "execute": +ï¼ï¼ï¼ ï¼ ï¼ "x-colo-lost-heartbeat" } in Secondary VM's +ï¼ï¼ï¼ ï¼ ï¼ monitor,the Secondary Node qemu still hang at recvmsg . +ï¼ï¼ï¼ ï¼ ï¼ +ï¼ï¼ï¼ ï¼ ï¼ I found that the colo in qemu is not complete yet. +ï¼ï¼ï¼ ï¼ ï¼ Do the colo have any plan for development? +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ Yes, We are developing. You can see some of patch we pushing. +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ ï¼ Has anyone ever run it successfully? Any help is appreciated! +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ In our internal version can run it successfully, +ï¼ï¼ï¼ ï¼ The failover detail you can ask Zhanghailiang for help. +ï¼ï¼ï¼ ï¼ Next time if you have some question about COLO, +ï¼ï¼ï¼ ï¼ please cc me and zhanghailiang address@hidden +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ Thanks +ï¼ï¼ï¼ ï¼ Zhang Chen +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ ï¼ +ï¼ï¼ï¼ ï¼ ï¼ +ï¼ï¼ï¼ ï¼ ï¼ +ï¼ï¼ï¼ ï¼ ï¼ centos7.2+qemu2.7.50 +ï¼ï¼ï¼ ï¼ ï¼ (gdb) bt +ï¼ï¼ï¼ ï¼ ï¼ #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 +ï¼ï¼ï¼ ï¼ ï¼ #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=ï¼optimized outï¼, +ï¼ï¼ï¼ ï¼ ï¼ iov=ï¼optimized outï¼, niov=ï¼optimized outï¼, fds=0x0, nfds=0x0, errp=0x0) +at +ï¼ï¼ï¼ ï¼ ï¼ io/channel-socket.c:497 +ï¼ï¼ï¼ ï¼ ï¼ #2 0x00007f3e03329472 in qio_channel_read (address@hidden, +ï¼ï¼ï¼ ï¼ ï¼ address@hidden "", address@hidden, +ï¼ï¼ï¼ ï¼ ï¼ address@hidden) at io/channel.c:97 +ï¼ï¼ï¼ ï¼ ï¼ #3 0x00007f3e032750e0 in channel_get_buffer (opaque=ï¼optimized outï¼, +ï¼ï¼ï¼ ï¼ ï¼ buf=0x7f3e05910f38 "", pos=ï¼optimized outï¼, size=32768) at +ï¼ï¼ï¼ ï¼ ï¼ migration/qemu-file-channel.c:78 +ï¼ï¼ï¼ ï¼ ï¼ #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at +ï¼ï¼ï¼ ï¼ ï¼ migration/qemu-file.c:257 +ï¼ï¼ï¼ ï¼ ï¼ #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, +ï¼ï¼ï¼ ï¼ ï¼ address@hidden) at migration/qemu-file.c:510 +ï¼ï¼ï¼ ï¼ ï¼ #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at +ï¼ï¼ï¼ ï¼ ï¼ migration/qemu-file.c:523 +ï¼ï¼ï¼ ï¼ ï¼ #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at +ï¼ï¼ï¼ ï¼ ï¼ migration/qemu-file.c:603 +ï¼ï¼ï¼ ï¼ ï¼ #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, +ï¼ï¼ï¼ ï¼ ï¼ address@hidden) at migration/colo.c:215 +ï¼ï¼ï¼ ï¼ ï¼ #9 0x00007f3e0327250d in colo_wait_handle_message (errp=0x7f3d62bfaa48, +ï¼ï¼ï¼ ï¼ ï¼ checkpoint_request=ï¼synthetic pointerï¼, f=ï¼optimized outï¼) at +ï¼ï¼ï¼ ï¼ ï¼ migration/colo.c:546 +ï¼ï¼ï¼ ï¼ ï¼ #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at +ï¼ï¼ï¼ ï¼ ï¼ migration/colo.c:649 +ï¼ï¼ï¼ ï¼ ï¼ #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 +ï¼ï¼ï¼ ï¼ ï¼ #12 0x00007f3dfc9c03ed in clone () from /lib64/libc..so.6 +ï¼ï¼ï¼ ï¼ ï¼ +ï¼ï¼ï¼ ï¼ ï¼ +ï¼ï¼ï¼ ï¼ ï¼ +ï¼ï¼ï¼ ï¼ ï¼ +ï¼ï¼ï¼ ï¼ ï¼ +ï¼ï¼ï¼ ï¼ ï¼ -- +ï¼ï¼ï¼ ï¼ ï¼ View this message in context: +http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html +ï¼ï¼ï¼ ï¼ ï¼ Sent from the Developer mailing list archive at Nabble.com. +ï¼ï¼ï¼ ï¼ ï¼ +ï¼ï¼ï¼ ï¼ ï¼ +ï¼ï¼ï¼ ï¼ ï¼ +ï¼ï¼ï¼ ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ -- +ï¼ï¼ï¼ ï¼ Thanks +ï¼ï¼ï¼ ï¼ Zhang Chen +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ ï¼ +ï¼ï¼ï¼ +ï¼ï¼ +ï¼ -- +ï¼ Dr. David Alan Gilbert / address@hidden / Manchester, UK +ï¼ +ï¼ . +ï¼ + |