diff options
Diffstat (limited to 'classification_output/01/other/65781993')
| -rw-r--r-- | classification_output/01/other/65781993 | 2793 |
1 files changed, 0 insertions, 2793 deletions
diff --git a/classification_output/01/other/65781993 b/classification_output/01/other/65781993 deleted file mode 100644 index 6543b7f26..000000000 --- a/classification_output/01/other/65781993 +++ /dev/null @@ -1,2793 +0,0 @@ -other: 0.727 -instruction: 0.670 -semantic: 0.665 -mistranslation: 0.650 - -[Qemu-devel] 答复: Re: 答复: Re: [BUG]COLO failover hang - -Thank youã - -I have test areadyã - -When the Primary Node panic,the Secondary Node qemu hang at the same placeã - -Incorrding -http://wiki.qemu-project.org/Features/COLO -ï¼kill Primary Node qemu -will not produce the problem,but Primary Node panic canã - -I think due to the feature of channel does not support -QIO_CHANNEL_FEATURE_SHUTDOWN. - - -when failover,channel_shutdown could not shut down the channel. - - -so the colo_process_incoming_thread will hang at recvmsg. - - -I test a patch: - - -diff --git a/migration/socket.c b/migration/socket.c - - -index 13966f1..d65a0ea 100644 - - ---- a/migration/socket.c - - -+++ b/migration/socket.c - - -@@ -147,8 +147,9 @@ static gboolean socket_accept_incoming_migration(QIOChannel -*ioc, - - - } - - - - - - trace_migration_socket_incoming_accepted() - - - - - - qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") - - -+ qio_channel_set_feature(QIO_CHANNEL(sioc), QIO_CHANNEL_FEATURE_SHUTDOWN) - - - migration_channel_process_incoming(migrate_get_current(), - - - QIO_CHANNEL(sioc)) - - - object_unref(OBJECT(sioc)) - - - - -My test will not hang any more. - - - - - - - - - - - - - - - - - -åå§é®ä»¶ - - - -åä»¶äººï¼ address@hidden -æ¶ä»¶äººï¼ç广10165992 address@hidden -æéäººï¼ address@hidden address@hidden -æ¥ æ ï¼2017å¹´03æ21æ¥ 15:58 -主 é¢ ï¼Re: [Qemu-devel] çå¤: Re: [BUG]COLO failover hang - - - - - -Hi,Wang. - -You can test this branch: -https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk -and please follow wiki ensure your own configuration correctly. -http://wiki.qemu-project.org/Features/COLO -Thanks - -Zhang Chen - - -On 03/21/2017 03:27 PM, address@hidden wrote: -ï¼ -ï¼ hi. -ï¼ -ï¼ I test the git qemu master have the same problem. -ï¼ -ï¼ (gdb) bt -ï¼ -ï¼ #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, -ï¼ niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 -ï¼ -ï¼ #1 0x00007f658e4aa0c2 in qio_channel_read -ï¼ (address@hidden, address@hidden "", -ï¼ address@hidden, address@hidden) at io/channel.c:114 -ï¼ -ï¼ #2 0x00007f658e3ea990 in channel_get_buffer (opaque=ï¼optimized outï¼, -ï¼ buf=0x7f65907cb838 "", pos=ï¼optimized outï¼, size=32768) at -ï¼ migration/qemu-file-channel.c:78 -ï¼ -ï¼ #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at -ï¼ migration/qemu-file.c:295 -ï¼ -ï¼ #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, -ï¼ address@hidden) at migration/qemu-file.c:555 -ï¼ -ï¼ #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at -ï¼ migration/qemu-file.c:568 -ï¼ -ï¼ #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at -ï¼ migration/qemu-file.c:648 -ï¼ -ï¼ #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, -ï¼ address@hidden) at migration/colo.c:244 -ï¼ -ï¼ #8 0x00007f658e3e681e in colo_receive_check_message (f=ï¼optimized -ï¼ outï¼, address@hidden, -ï¼ address@hidden) -ï¼ -ï¼ at migration/colo.c:264 -ï¼ -ï¼ #9 0x00007f658e3e740e in colo_process_incoming_thread -ï¼ (opaque=0x7f658eb30360 ï¼mis_current.31286ï¼) at migration/colo.c:577 -ï¼ -ï¼ #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 -ï¼ -ï¼ #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 -ï¼ -ï¼ (gdb) p ioc-ï¼name -ï¼ -ï¼ $2 = 0x7f658ff7d5c0 "migration-socket-incoming" -ï¼ -ï¼ (gdb) p ioc-ï¼features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN -ï¼ -ï¼ $3 = 0 -ï¼ -ï¼ -ï¼ (gdb) bt -ï¼ -ï¼ #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, -ï¼ condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 -ï¼ -ï¼ #1 0x00007fdcc6966350 in g_main_dispatch (context=ï¼optimized outï¼) at -ï¼ gmain.c:3054 -ï¼ -ï¼ #2 g_main_context_dispatch (context=ï¼optimized outï¼, -ï¼ address@hidden) at gmain.c:3630 -ï¼ -ï¼ #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 -ï¼ -ï¼ #4 os_host_main_loop_wait (timeout=ï¼optimized outï¼) at -ï¼ util/main-loop.c:258 -ï¼ -ï¼ #5 main_loop_wait (address@hidden) at -ï¼ util/main-loop.c:506 -ï¼ -ï¼ #6 0x00007fdccb526187 in main_loop () at vl.c:1898 -ï¼ -ï¼ #7 main (argc=ï¼optimized outï¼, argv=ï¼optimized outï¼, envp=ï¼optimized -ï¼ outï¼) at vl.c:4709 -ï¼ -ï¼ (gdb) p ioc-ï¼features -ï¼ -ï¼ $1 = 6 -ï¼ -ï¼ (gdb) p ioc-ï¼name -ï¼ -ï¼ $2 = 0x7fdcce1b1ab0 "migration-socket-listener" -ï¼ -ï¼ -ï¼ May be socket_accept_incoming_migration should -ï¼ call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? -ï¼ -ï¼ -ï¼ thank you. -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ åå§é®ä»¶ -ï¼ address@hidden -ï¼ address@hidden -ï¼ address@hidden@huawei.comï¼ -ï¼ *æ¥ æ ï¼*2017å¹´03æ16æ¥ 14:46 -ï¼ *主 é¢ ï¼**Re: [Qemu-devel] COLO failover hang* -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ On 03/15/2017 05:06 PM, wangguang wrote: -ï¼ ï¼ am testing QEMU COLO feature described here [QEMU -ï¼ ï¼ Wiki]( -http://wiki.qemu-project.org/Features/COLO -). -ï¼ ï¼ -ï¼ ï¼ When the Primary Node panic,the Secondary Node qemu hang. -ï¼ ï¼ hang at recvmsg in qio_channel_socket_readv. -ï¼ ï¼ And I run { 'execute': 'nbd-server-stop' } and { "execute": -ï¼ ï¼ "x-colo-lost-heartbeat" } in Secondary VM's -ï¼ ï¼ monitor,the Secondary Node qemu still hang at recvmsg . -ï¼ ï¼ -ï¼ ï¼ I found that the colo in qemu is not complete yet. -ï¼ ï¼ Do the colo have any plan for development? -ï¼ -ï¼ Yes, We are developing. You can see some of patch we pushing. -ï¼ -ï¼ ï¼ Has anyone ever run it successfully? Any help is appreciated! -ï¼ -ï¼ In our internal version can run it successfully, -ï¼ The failover detail you can ask Zhanghailiang for help. -ï¼ Next time if you have some question about COLO, -ï¼ please cc me and zhanghailiang address@hidden -ï¼ -ï¼ -ï¼ Thanks -ï¼ Zhang Chen -ï¼ -ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ centos7.2+qemu2.7.50 -ï¼ ï¼ (gdb) bt -ï¼ ï¼ #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 -ï¼ ï¼ #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=ï¼optimized outï¼, -ï¼ ï¼ iov=ï¼optimized outï¼, niov=ï¼optimized outï¼, fds=0x0, nfds=0x0, errp=0x0) at -ï¼ ï¼ io/channel-socket.c:497 -ï¼ ï¼ #2 0x00007f3e03329472 in qio_channel_read (address@hidden, -ï¼ ï¼ address@hidden "", address@hidden, -ï¼ ï¼ address@hidden) at io/channel.c:97 -ï¼ ï¼ #3 0x00007f3e032750e0 in channel_get_buffer (opaque=ï¼optimized outï¼, -ï¼ ï¼ buf=0x7f3e05910f38 "", pos=ï¼optimized outï¼, size=32768) at -ï¼ ï¼ migration/qemu-file-channel.c:78 -ï¼ ï¼ #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at -ï¼ ï¼ migration/qemu-file.c:257 -ï¼ ï¼ #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, -ï¼ ï¼ address@hidden) at migration/qemu-file.c:510 -ï¼ ï¼ #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at -ï¼ ï¼ migration/qemu-file.c:523 -ï¼ ï¼ #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at -ï¼ ï¼ migration/qemu-file.c:603 -ï¼ ï¼ #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, -ï¼ ï¼ address@hidden) at migration/colo.c:215 -ï¼ ï¼ #9 0x00007f3e0327250d in colo_wait_handle_message (errp=0x7f3d62bfaa48, -ï¼ ï¼ checkpoint_request=ï¼synthetic pointerï¼, f=ï¼optimized outï¼) at -ï¼ ï¼ migration/colo.c:546 -ï¼ ï¼ #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at -ï¼ ï¼ migration/colo.c:649 -ï¼ ï¼ #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 -ï¼ ï¼ #12 0x00007f3dfc9c03ed in clone () from /lib64/libc.so.6 -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -- -ï¼ ï¼ View this message in context: -http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html -ï¼ ï¼ Sent from the Developer mailing list archive at Nabble.com. -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ -ï¼ -- -ï¼ Thanks -ï¼ Zhang Chen -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ - --- -Thanks -Zhang Chen - -Hi, - -On 2017/3/21 16:10, address@hidden wrote: -Thank youã - -I have test areadyã - -When the Primary Node panic,the Secondary Node qemu hang at the same placeã - -Incorrding -http://wiki.qemu-project.org/Features/COLO -ï¼kill Primary Node qemu -will not produce the problem,but Primary Node panic canã - -I think due to the feature of channel does not support -QIO_CHANNEL_FEATURE_SHUTDOWN. -Yes, you are right, when we do failover for primary/secondary VM, we will -shutdown the related -fd in case it is stuck in the read/write fd. - -It seems that you didn't follow the above introduction exactly to do the test. -Could you -share your test procedures ? Especially the commands used in the test. - -Thanks, -Hailiang -when failover,channel_shutdown could not shut down the channel. - - -so the colo_process_incoming_thread will hang at recvmsg. - - -I test a patch: - - -diff --git a/migration/socket.c b/migration/socket.c - - -index 13966f1..d65a0ea 100644 - - ---- a/migration/socket.c - - -+++ b/migration/socket.c - - -@@ -147,8 +147,9 @@ static gboolean socket_accept_incoming_migration(QIOChannel -*ioc, - - - } - - - - - - trace_migration_socket_incoming_accepted() - - - - - - qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") - - -+ qio_channel_set_feature(QIO_CHANNEL(sioc), QIO_CHANNEL_FEATURE_SHUTDOWN) - - - migration_channel_process_incoming(migrate_get_current(), - - - QIO_CHANNEL(sioc)) - - - object_unref(OBJECT(sioc)) - - - - -My test will not hang any more. - - - - - - - - - - - - - - - - - -åå§é®ä»¶ - - - -åä»¶äººï¼ address@hidden -æ¶ä»¶äººï¼ç广10165992 address@hidden -æéäººï¼ address@hidden address@hidden -æ¥ æ ï¼2017å¹´03æ21æ¥ 15:58 -主 é¢ ï¼Re: [Qemu-devel] çå¤: Re: [BUG]COLO failover hang - - - - - -Hi,Wang. - -You can test this branch: -https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk -and please follow wiki ensure your own configuration correctly. -http://wiki.qemu-project.org/Features/COLO -Thanks - -Zhang Chen - - -On 03/21/2017 03:27 PM, address@hidden wrote: -ï¼ -ï¼ hi. -ï¼ -ï¼ I test the git qemu master have the same problem. -ï¼ -ï¼ (gdb) bt -ï¼ -ï¼ #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, -ï¼ niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 -ï¼ -ï¼ #1 0x00007f658e4aa0c2 in qio_channel_read -ï¼ (address@hidden, address@hidden "", -ï¼ address@hidden, address@hidden) at io/channel.c:114 -ï¼ -ï¼ #2 0x00007f658e3ea990 in channel_get_buffer (opaque=ï¼optimized outï¼, -ï¼ buf=0x7f65907cb838 "", pos=ï¼optimized outï¼, size=32768) at -ï¼ migration/qemu-file-channel.c:78 -ï¼ -ï¼ #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at -ï¼ migration/qemu-file.c:295 -ï¼ -ï¼ #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, -ï¼ address@hidden) at migration/qemu-file.c:555 -ï¼ -ï¼ #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at -ï¼ migration/qemu-file.c:568 -ï¼ -ï¼ #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at -ï¼ migration/qemu-file.c:648 -ï¼ -ï¼ #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, -ï¼ address@hidden) at migration/colo.c:244 -ï¼ -ï¼ #8 0x00007f658e3e681e in colo_receive_check_message (f=ï¼optimized -ï¼ outï¼, address@hidden, -ï¼ address@hidden) -ï¼ -ï¼ at migration/colo.c:264 -ï¼ -ï¼ #9 0x00007f658e3e740e in colo_process_incoming_thread -ï¼ (opaque=0x7f658eb30360 ï¼mis_current.31286ï¼) at migration/colo.c:577 -ï¼ -ï¼ #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 -ï¼ -ï¼ #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 -ï¼ -ï¼ (gdb) p ioc-ï¼name -ï¼ -ï¼ $2 = 0x7f658ff7d5c0 "migration-socket-incoming" -ï¼ -ï¼ (gdb) p ioc-ï¼features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN -ï¼ -ï¼ $3 = 0 -ï¼ -ï¼ -ï¼ (gdb) bt -ï¼ -ï¼ #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, -ï¼ condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 -ï¼ -ï¼ #1 0x00007fdcc6966350 in g_main_dispatch (context=ï¼optimized outï¼) at -ï¼ gmain.c:3054 -ï¼ -ï¼ #2 g_main_context_dispatch (context=ï¼optimized outï¼, -ï¼ address@hidden) at gmain.c:3630 -ï¼ -ï¼ #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 -ï¼ -ï¼ #4 os_host_main_loop_wait (timeout=ï¼optimized outï¼) at -ï¼ util/main-loop.c:258 -ï¼ -ï¼ #5 main_loop_wait (address@hidden) at -ï¼ util/main-loop.c:506 -ï¼ -ï¼ #6 0x00007fdccb526187 in main_loop () at vl.c:1898 -ï¼ -ï¼ #7 main (argc=ï¼optimized outï¼, argv=ï¼optimized outï¼, envp=ï¼optimized -ï¼ outï¼) at vl.c:4709 -ï¼ -ï¼ (gdb) p ioc-ï¼features -ï¼ -ï¼ $1 = 6 -ï¼ -ï¼ (gdb) p ioc-ï¼name -ï¼ -ï¼ $2 = 0x7fdcce1b1ab0 "migration-socket-listener" -ï¼ -ï¼ -ï¼ May be socket_accept_incoming_migration should -ï¼ call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? -ï¼ -ï¼ -ï¼ thank you. -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ åå§é®ä»¶ -ï¼ address@hidden -ï¼ address@hidden -ï¼ address@hidden@huawei.comï¼ -ï¼ *æ¥ æ ï¼*2017å¹´03æ16æ¥ 14:46 -ï¼ *主 é¢ ï¼**Re: [Qemu-devel] COLO failover hang* -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ On 03/15/2017 05:06 PM, wangguang wrote: -ï¼ ï¼ am testing QEMU COLO feature described here [QEMU -ï¼ ï¼ Wiki]( -http://wiki.qemu-project.org/Features/COLO -). -ï¼ ï¼ -ï¼ ï¼ When the Primary Node panic,the Secondary Node qemu hang. -ï¼ ï¼ hang at recvmsg in qio_channel_socket_readv. -ï¼ ï¼ And I run { 'execute': 'nbd-server-stop' } and { "execute": -ï¼ ï¼ "x-colo-lost-heartbeat" } in Secondary VM's -ï¼ ï¼ monitor,the Secondary Node qemu still hang at recvmsg . -ï¼ ï¼ -ï¼ ï¼ I found that the colo in qemu is not complete yet. -ï¼ ï¼ Do the colo have any plan for development? -ï¼ -ï¼ Yes, We are developing. You can see some of patch we pushing. -ï¼ -ï¼ ï¼ Has anyone ever run it successfully? Any help is appreciated! -ï¼ -ï¼ In our internal version can run it successfully, -ï¼ The failover detail you can ask Zhanghailiang for help. -ï¼ Next time if you have some question about COLO, -ï¼ please cc me and zhanghailiang address@hidden -ï¼ -ï¼ -ï¼ Thanks -ï¼ Zhang Chen -ï¼ -ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ centos7.2+qemu2.7.50 -ï¼ ï¼ (gdb) bt -ï¼ ï¼ #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 -ï¼ ï¼ #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=ï¼optimized outï¼, -ï¼ ï¼ iov=ï¼optimized outï¼, niov=ï¼optimized outï¼, fds=0x0, nfds=0x0, errp=0x0) at -ï¼ ï¼ io/channel-socket.c:497 -ï¼ ï¼ #2 0x00007f3e03329472 in qio_channel_read (address@hidden, -ï¼ ï¼ address@hidden "", address@hidden, -ï¼ ï¼ address@hidden) at io/channel.c:97 -ï¼ ï¼ #3 0x00007f3e032750e0 in channel_get_buffer (opaque=ï¼optimized outï¼, -ï¼ ï¼ buf=0x7f3e05910f38 "", pos=ï¼optimized outï¼, size=32768) at -ï¼ ï¼ migration/qemu-file-channel.c:78 -ï¼ ï¼ #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at -ï¼ ï¼ migration/qemu-file.c:257 -ï¼ ï¼ #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, -ï¼ ï¼ address@hidden) at migration/qemu-file.c:510 -ï¼ ï¼ #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at -ï¼ ï¼ migration/qemu-file.c:523 -ï¼ ï¼ #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at -ï¼ ï¼ migration/qemu-file.c:603 -ï¼ ï¼ #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, -ï¼ ï¼ address@hidden) at migration/colo.c:215 -ï¼ ï¼ #9 0x00007f3e0327250d in colo_wait_handle_message (errp=0x7f3d62bfaa48, -ï¼ ï¼ checkpoint_request=ï¼synthetic pointerï¼, f=ï¼optimized outï¼) at -ï¼ ï¼ migration/colo.c:546 -ï¼ ï¼ #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at -ï¼ ï¼ migration/colo.c:649 -ï¼ ï¼ #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 -ï¼ ï¼ #12 0x00007f3dfc9c03ed in clone () from /lib64/libc.so.6 -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -- -ï¼ ï¼ View this message in context: -http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html -ï¼ ï¼ Sent from the Developer mailing list archive at Nabble.com. -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ -ï¼ -- -ï¼ Thanks -ï¼ Zhang Chen -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ - -Hi, - -Thanks for reporting this, and i confirmed it in my test, and it is a bug. - -Though we tried to call qemu_file_shutdown() to shutdown the related fd, in -case COLO thread/incoming thread is stuck in read/write() while do failover, -but it didn't take effect, because all the fd used by COLO (also migration) -has been wrapped by qio channel, and it will not call the shutdown API if -we didn't qio_channel_set_feature(QIO_CHANNEL(sioc), -QIO_CHANNEL_FEATURE_SHUTDOWN). - -Cc: Dr. David Alan Gilbert <address@hidden> - -I doubted migration cancel has the same problem, it may be stuck in write() -if we tried to cancel migration. - -void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error -**errp) -{ - qio_channel_set_name(QIO_CHANNEL(ioc), "migration-fd-outgoing"); - migration_channel_connect(s, ioc, NULL); - ... ... -We didn't call qio_channel_set_feature(QIO_CHANNEL(sioc), -QIO_CHANNEL_FEATURE_SHUTDOWN) above, -and the -migrate_fd_cancel() -{ - ... ... - if (s->state == MIGRATION_STATUS_CANCELLING && f) { - qemu_file_shutdown(f); --> This will not take effect. No ? - } -} - -Thanks, -Hailiang - -On 2017/3/21 16:10, address@hidden wrote: -Thank youã - -I have test areadyã - -When the Primary Node panic,the Secondary Node qemu hang at the same placeã - -Incorrding -http://wiki.qemu-project.org/Features/COLO -ï¼kill Primary Node qemu -will not produce the problem,but Primary Node panic canã - -I think due to the feature of channel does not support -QIO_CHANNEL_FEATURE_SHUTDOWN. - - -when failover,channel_shutdown could not shut down the channel. - - -so the colo_process_incoming_thread will hang at recvmsg. - - -I test a patch: - - -diff --git a/migration/socket.c b/migration/socket.c - - -index 13966f1..d65a0ea 100644 - - ---- a/migration/socket.c - - -+++ b/migration/socket.c - - -@@ -147,8 +147,9 @@ static gboolean socket_accept_incoming_migration(QIOChannel -*ioc, - - - } - - - - - - trace_migration_socket_incoming_accepted() - - - - - - qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") - - -+ qio_channel_set_feature(QIO_CHANNEL(sioc), QIO_CHANNEL_FEATURE_SHUTDOWN) - - - migration_channel_process_incoming(migrate_get_current(), - - - QIO_CHANNEL(sioc)) - - - object_unref(OBJECT(sioc)) - - - - -My test will not hang any more. - - - - - - - - - - - - - - - - - -åå§é®ä»¶ - - - -åä»¶äººï¼ address@hidden -æ¶ä»¶äººï¼ç广10165992 address@hidden -æéäººï¼ address@hidden address@hidden -æ¥ æ ï¼2017å¹´03æ21æ¥ 15:58 -主 é¢ ï¼Re: [Qemu-devel] çå¤: Re: [BUG]COLO failover hang - - - - - -Hi,Wang. - -You can test this branch: -https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk -and please follow wiki ensure your own configuration correctly. -http://wiki.qemu-project.org/Features/COLO -Thanks - -Zhang Chen - - -On 03/21/2017 03:27 PM, address@hidden wrote: -ï¼ -ï¼ hi. -ï¼ -ï¼ I test the git qemu master have the same problem. -ï¼ -ï¼ (gdb) bt -ï¼ -ï¼ #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, -ï¼ niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 -ï¼ -ï¼ #1 0x00007f658e4aa0c2 in qio_channel_read -ï¼ (address@hidden, address@hidden "", -ï¼ address@hidden, address@hidden) at io/channel.c:114 -ï¼ -ï¼ #2 0x00007f658e3ea990 in channel_get_buffer (opaque=ï¼optimized outï¼, -ï¼ buf=0x7f65907cb838 "", pos=ï¼optimized outï¼, size=32768) at -ï¼ migration/qemu-file-channel.c:78 -ï¼ -ï¼ #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at -ï¼ migration/qemu-file.c:295 -ï¼ -ï¼ #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, -ï¼ address@hidden) at migration/qemu-file.c:555 -ï¼ -ï¼ #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at -ï¼ migration/qemu-file.c:568 -ï¼ -ï¼ #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at -ï¼ migration/qemu-file.c:648 -ï¼ -ï¼ #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, -ï¼ address@hidden) at migration/colo.c:244 -ï¼ -ï¼ #8 0x00007f658e3e681e in colo_receive_check_message (f=ï¼optimized -ï¼ outï¼, address@hidden, -ï¼ address@hidden) -ï¼ -ï¼ at migration/colo.c:264 -ï¼ -ï¼ #9 0x00007f658e3e740e in colo_process_incoming_thread -ï¼ (opaque=0x7f658eb30360 ï¼mis_current.31286ï¼) at migration/colo.c:577 -ï¼ -ï¼ #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 -ï¼ -ï¼ #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 -ï¼ -ï¼ (gdb) p ioc-ï¼name -ï¼ -ï¼ $2 = 0x7f658ff7d5c0 "migration-socket-incoming" -ï¼ -ï¼ (gdb) p ioc-ï¼features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN -ï¼ -ï¼ $3 = 0 -ï¼ -ï¼ -ï¼ (gdb) bt -ï¼ -ï¼ #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, -ï¼ condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 -ï¼ -ï¼ #1 0x00007fdcc6966350 in g_main_dispatch (context=ï¼optimized outï¼) at -ï¼ gmain.c:3054 -ï¼ -ï¼ #2 g_main_context_dispatch (context=ï¼optimized outï¼, -ï¼ address@hidden) at gmain.c:3630 -ï¼ -ï¼ #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 -ï¼ -ï¼ #4 os_host_main_loop_wait (timeout=ï¼optimized outï¼) at -ï¼ util/main-loop.c:258 -ï¼ -ï¼ #5 main_loop_wait (address@hidden) at -ï¼ util/main-loop.c:506 -ï¼ -ï¼ #6 0x00007fdccb526187 in main_loop () at vl.c:1898 -ï¼ -ï¼ #7 main (argc=ï¼optimized outï¼, argv=ï¼optimized outï¼, envp=ï¼optimized -ï¼ outï¼) at vl.c:4709 -ï¼ -ï¼ (gdb) p ioc-ï¼features -ï¼ -ï¼ $1 = 6 -ï¼ -ï¼ (gdb) p ioc-ï¼name -ï¼ -ï¼ $2 = 0x7fdcce1b1ab0 "migration-socket-listener" -ï¼ -ï¼ -ï¼ May be socket_accept_incoming_migration should -ï¼ call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? -ï¼ -ï¼ -ï¼ thank you. -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ åå§é®ä»¶ -ï¼ address@hidden -ï¼ address@hidden -ï¼ address@hidden@huawei.comï¼ -ï¼ *æ¥ æ ï¼*2017å¹´03æ16æ¥ 14:46 -ï¼ *主 é¢ ï¼**Re: [Qemu-devel] COLO failover hang* -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ On 03/15/2017 05:06 PM, wangguang wrote: -ï¼ ï¼ am testing QEMU COLO feature described here [QEMU -ï¼ ï¼ Wiki]( -http://wiki.qemu-project.org/Features/COLO -). -ï¼ ï¼ -ï¼ ï¼ When the Primary Node panic,the Secondary Node qemu hang. -ï¼ ï¼ hang at recvmsg in qio_channel_socket_readv. -ï¼ ï¼ And I run { 'execute': 'nbd-server-stop' } and { "execute": -ï¼ ï¼ "x-colo-lost-heartbeat" } in Secondary VM's -ï¼ ï¼ monitor,the Secondary Node qemu still hang at recvmsg . -ï¼ ï¼ -ï¼ ï¼ I found that the colo in qemu is not complete yet. -ï¼ ï¼ Do the colo have any plan for development? -ï¼ -ï¼ Yes, We are developing. You can see some of patch we pushing. -ï¼ -ï¼ ï¼ Has anyone ever run it successfully? Any help is appreciated! -ï¼ -ï¼ In our internal version can run it successfully, -ï¼ The failover detail you can ask Zhanghailiang for help. -ï¼ Next time if you have some question about COLO, -ï¼ please cc me and zhanghailiang address@hidden -ï¼ -ï¼ -ï¼ Thanks -ï¼ Zhang Chen -ï¼ -ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ centos7.2+qemu2.7.50 -ï¼ ï¼ (gdb) bt -ï¼ ï¼ #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 -ï¼ ï¼ #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=ï¼optimized outï¼, -ï¼ ï¼ iov=ï¼optimized outï¼, niov=ï¼optimized outï¼, fds=0x0, nfds=0x0, errp=0x0) at -ï¼ ï¼ io/channel-socket.c:497 -ï¼ ï¼ #2 0x00007f3e03329472 in qio_channel_read (address@hidden, -ï¼ ï¼ address@hidden "", address@hidden, -ï¼ ï¼ address@hidden) at io/channel.c:97 -ï¼ ï¼ #3 0x00007f3e032750e0 in channel_get_buffer (opaque=ï¼optimized outï¼, -ï¼ ï¼ buf=0x7f3e05910f38 "", pos=ï¼optimized outï¼, size=32768) at -ï¼ ï¼ migration/qemu-file-channel.c:78 -ï¼ ï¼ #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at -ï¼ ï¼ migration/qemu-file.c:257 -ï¼ ï¼ #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, -ï¼ ï¼ address@hidden) at migration/qemu-file.c:510 -ï¼ ï¼ #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at -ï¼ ï¼ migration/qemu-file.c:523 -ï¼ ï¼ #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at -ï¼ ï¼ migration/qemu-file.c:603 -ï¼ ï¼ #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, -ï¼ ï¼ address@hidden) at migration/colo.c:215 -ï¼ ï¼ #9 0x00007f3e0327250d in colo_wait_handle_message (errp=0x7f3d62bfaa48, -ï¼ ï¼ checkpoint_request=ï¼synthetic pointerï¼, f=ï¼optimized outï¼) at -ï¼ ï¼ migration/colo.c:546 -ï¼ ï¼ #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at -ï¼ ï¼ migration/colo.c:649 -ï¼ ï¼ #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 -ï¼ ï¼ #12 0x00007f3dfc9c03ed in clone () from /lib64/libc.so.6 -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -- -ï¼ ï¼ View this message in context: -http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html -ï¼ ï¼ Sent from the Developer mailing list archive at Nabble.com. -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ -ï¼ -- -ï¼ Thanks -ï¼ Zhang Chen -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ - -* Hailiang Zhang (address@hidden) wrote: -> -Hi, -> -> -Thanks for reporting this, and i confirmed it in my test, and it is a bug. -> -> -Though we tried to call qemu_file_shutdown() to shutdown the related fd, in -> -case COLO thread/incoming thread is stuck in read/write() while do failover, -> -but it didn't take effect, because all the fd used by COLO (also migration) -> -has been wrapped by qio channel, and it will not call the shutdown API if -> -we didn't qio_channel_set_feature(QIO_CHANNEL(sioc), -> -QIO_CHANNEL_FEATURE_SHUTDOWN). -> -> -Cc: Dr. David Alan Gilbert <address@hidden> -> -> -I doubted migration cancel has the same problem, it may be stuck in write() -> -if we tried to cancel migration. -> -> -void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error -> -**errp) -> -{ -> -qio_channel_set_name(QIO_CHANNEL(ioc), "migration-fd-outgoing"); -> -migration_channel_connect(s, ioc, NULL); -> -... ... -> -We didn't call qio_channel_set_feature(QIO_CHANNEL(sioc), -> -QIO_CHANNEL_FEATURE_SHUTDOWN) above, -> -and the -> -migrate_fd_cancel() -> -{ -> -... ... -> -if (s->state == MIGRATION_STATUS_CANCELLING && f) { -> -qemu_file_shutdown(f); --> This will not take effect. No ? -> -} -> -} -(cc'd in Daniel Berrange). -I see that we call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN); -at the -top of qio_channel_socket_new; so I think that's safe isn't it? - -Dave - -> -Thanks, -> -Hailiang -> -> -On 2017/3/21 16:10, address@hidden wrote: -> -> Thank youã -> -> -> -> I have test areadyã -> -> -> -> When the Primary Node panic,the Secondary Node qemu hang at the same placeã -> -> -> -> Incorrding -http://wiki.qemu-project.org/Features/COLO -ï¼kill Primary Node -> -> qemu will not produce the problem,but Primary Node panic canã -> -> -> -> I think due to the feature of channel does not support -> -> QIO_CHANNEL_FEATURE_SHUTDOWN. -> -> -> -> -> -> when failover,channel_shutdown could not shut down the channel. -> -> -> -> -> -> so the colo_process_incoming_thread will hang at recvmsg. -> -> -> -> -> -> I test a patch: -> -> -> -> -> -> diff --git a/migration/socket.c b/migration/socket.c -> -> -> -> -> -> index 13966f1..d65a0ea 100644 -> -> -> -> -> -> --- a/migration/socket.c -> -> -> -> -> -> +++ b/migration/socket.c -> -> -> -> -> -> @@ -147,8 +147,9 @@ static gboolean -> -> socket_accept_incoming_migration(QIOChannel *ioc, -> -> -> -> -> -> } -> -> -> -> -> -> -> -> -> -> -> -> trace_migration_socket_incoming_accepted() -> -> -> -> -> -> -> -> -> -> -> -> qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") -> -> -> -> -> -> + qio_channel_set_feature(QIO_CHANNEL(sioc), -> -> QIO_CHANNEL_FEATURE_SHUTDOWN) -> -> -> -> -> -> migration_channel_process_incoming(migrate_get_current(), -> -> -> -> -> -> QIO_CHANNEL(sioc)) -> -> -> -> -> -> object_unref(OBJECT(sioc)) -> -> -> -> -> -> -> -> -> -> My test will not hang any more. -> -> -> -> -> -> -> -> -> -> -> -> -> -> -> -> -> -> -> -> -> -> -> -> -> -> -> -> -> -> -> -> -> -> -> -> åå§é®ä»¶ -> -> -> -> -> -> -> -> åä»¶äººï¼ address@hidden -> -> æ¶ä»¶äººï¼ç广10165992 address@hidden -> -> æéäººï¼ address@hidden address@hidden -> -> æ¥ æ ï¼2017å¹´03æ21æ¥ 15:58 -> -> 主 é¢ ï¼Re: [Qemu-devel] çå¤: Re: [BUG]COLO failover hang -> -> -> -> -> -> -> -> -> -> -> -> Hi,Wang. -> -> -> -> You can test this branch: -> -> -> -> -https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk -> -> -> -> and please follow wiki ensure your own configuration correctly. -> -> -> -> -http://wiki.qemu-project.org/Features/COLO -> -> -> -> -> -> Thanks -> -> -> -> Zhang Chen -> -> -> -> -> -> On 03/21/2017 03:27 PM, address@hidden wrote: -> -> ï¼ -> -> ï¼ hi. -> -> ï¼ -> -> ï¼ I test the git qemu master have the same problem. -> -> ï¼ -> -> ï¼ (gdb) bt -> -> ï¼ -> -> ï¼ #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, -> -> ï¼ niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 -> -> ï¼ -> -> ï¼ #1 0x00007f658e4aa0c2 in qio_channel_read -> -> ï¼ (address@hidden, address@hidden "", -> -> ï¼ address@hidden, address@hidden) at io/channel.c:114 -> -> ï¼ -> -> ï¼ #2 0x00007f658e3ea990 in channel_get_buffer (opaque=ï¼optimized outï¼, -> -> ï¼ buf=0x7f65907cb838 "", pos=ï¼optimized outï¼, size=32768) at -> -> ï¼ migration/qemu-file-channel.c:78 -> -> ï¼ -> -> ï¼ #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at -> -> ï¼ migration/qemu-file.c:295 -> -> ï¼ -> -> ï¼ #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, -> -> ï¼ address@hidden) at migration/qemu-file.c:555 -> -> ï¼ -> -> ï¼ #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at -> -> ï¼ migration/qemu-file.c:568 -> -> ï¼ -> -> ï¼ #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at -> -> ï¼ migration/qemu-file.c:648 -> -> ï¼ -> -> ï¼ #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, -> -> ï¼ address@hidden) at migration/colo.c:244 -> -> ï¼ -> -> ï¼ #8 0x00007f658e3e681e in colo_receive_check_message (f=ï¼optimized -> -> ï¼ outï¼, address@hidden, -> -> ï¼ address@hidden) -> -> ï¼ -> -> ï¼ at migration/colo.c:264 -> -> ï¼ -> -> ï¼ #9 0x00007f658e3e740e in colo_process_incoming_thread -> -> ï¼ (opaque=0x7f658eb30360 ï¼mis_current.31286ï¼) at migration/colo.c:577 -> -> ï¼ -> -> ï¼ #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 -> -> ï¼ -> -> ï¼ #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 -> -> ï¼ -> -> ï¼ (gdb) p ioc-ï¼name -> -> ï¼ -> -> ï¼ $2 = 0x7f658ff7d5c0 "migration-socket-incoming" -> -> ï¼ -> -> ï¼ (gdb) p ioc-ï¼features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN -> -> ï¼ -> -> ï¼ $3 = 0 -> -> ï¼ -> -> ï¼ -> -> ï¼ (gdb) bt -> -> ï¼ -> -> ï¼ #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, -> -> ï¼ condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 -> -> ï¼ -> -> ï¼ #1 0x00007fdcc6966350 in g_main_dispatch (context=ï¼optimized outï¼) at -> -> ï¼ gmain.c:3054 -> -> ï¼ -> -> ï¼ #2 g_main_context_dispatch (context=ï¼optimized outï¼, -> -> ï¼ address@hidden) at gmain.c:3630 -> -> ï¼ -> -> ï¼ #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 -> -> ï¼ -> -> ï¼ #4 os_host_main_loop_wait (timeout=ï¼optimized outï¼) at -> -> ï¼ util/main-loop.c:258 -> -> ï¼ -> -> ï¼ #5 main_loop_wait (address@hidden) at -> -> ï¼ util/main-loop.c:506 -> -> ï¼ -> -> ï¼ #6 0x00007fdccb526187 in main_loop () at vl.c:1898 -> -> ï¼ -> -> ï¼ #7 main (argc=ï¼optimized outï¼, argv=ï¼optimized outï¼, envp=ï¼optimized -> -> ï¼ outï¼) at vl.c:4709 -> -> ï¼ -> -> ï¼ (gdb) p ioc-ï¼features -> -> ï¼ -> -> ï¼ $1 = 6 -> -> ï¼ -> -> ï¼ (gdb) p ioc-ï¼name -> -> ï¼ -> -> ï¼ $2 = 0x7fdcce1b1ab0 "migration-socket-listener" -> -> ï¼ -> -> ï¼ -> -> ï¼ May be socket_accept_incoming_migration should -> -> ï¼ call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? -> -> ï¼ -> -> ï¼ -> -> ï¼ thank you. -> -> ï¼ -> -> ï¼ -> -> ï¼ -> -> ï¼ -> -> ï¼ -> -> ï¼ åå§é®ä»¶ -> -> ï¼ address@hidden -> -> ï¼ address@hidden -> -> ï¼ address@hidden@huawei.comï¼ -> -> ï¼ *æ¥ æ ï¼*2017å¹´03æ16æ¥ 14:46 -> -> ï¼ *主 é¢ ï¼**Re: [Qemu-devel] COLO failover hang* -> -> ï¼ -> -> ï¼ -> -> ï¼ -> -> ï¼ -> -> ï¼ On 03/15/2017 05:06 PM, wangguang wrote: -> -> ï¼ ï¼ am testing QEMU COLO feature described here [QEMU -> -> ï¼ ï¼ Wiki]( -http://wiki.qemu-project.org/Features/COLO -). -> -> ï¼ ï¼ -> -> ï¼ ï¼ When the Primary Node panic,the Secondary Node qemu hang. -> -> ï¼ ï¼ hang at recvmsg in qio_channel_socket_readv. -> -> ï¼ ï¼ And I run { 'execute': 'nbd-server-stop' } and { "execute": -> -> ï¼ ï¼ "x-colo-lost-heartbeat" } in Secondary VM's -> -> ï¼ ï¼ monitor,the Secondary Node qemu still hang at recvmsg . -> -> ï¼ ï¼ -> -> ï¼ ï¼ I found that the colo in qemu is not complete yet. -> -> ï¼ ï¼ Do the colo have any plan for development? -> -> ï¼ -> -> ï¼ Yes, We are developing. You can see some of patch we pushing. -> -> ï¼ -> -> ï¼ ï¼ Has anyone ever run it successfully? Any help is appreciated! -> -> ï¼ -> -> ï¼ In our internal version can run it successfully, -> -> ï¼ The failover detail you can ask Zhanghailiang for help. -> -> ï¼ Next time if you have some question about COLO, -> -> ï¼ please cc me and zhanghailiang address@hidden -> -> ï¼ -> -> ï¼ -> -> ï¼ Thanks -> -> ï¼ Zhang Chen -> -> ï¼ -> -> ï¼ -> -> ï¼ ï¼ -> -> ï¼ ï¼ -> -> ï¼ ï¼ -> -> ï¼ ï¼ centos7.2+qemu2.7.50 -> -> ï¼ ï¼ (gdb) bt -> -> ï¼ ï¼ #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 -> -> ï¼ ï¼ #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=ï¼optimized outï¼, -> -> ï¼ ï¼ iov=ï¼optimized outï¼, niov=ï¼optimized outï¼, fds=0x0, nfds=0x0, errp=0x0) -> -> at -> -> ï¼ ï¼ io/channel-socket.c:497 -> -> ï¼ ï¼ #2 0x00007f3e03329472 in qio_channel_read (address@hidden, -> -> ï¼ ï¼ address@hidden "", address@hidden, -> -> ï¼ ï¼ address@hidden) at io/channel.c:97 -> -> ï¼ ï¼ #3 0x00007f3e032750e0 in channel_get_buffer (opaque=ï¼optimized outï¼, -> -> ï¼ ï¼ buf=0x7f3e05910f38 "", pos=ï¼optimized outï¼, size=32768) at -> -> ï¼ ï¼ migration/qemu-file-channel.c:78 -> -> ï¼ ï¼ #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at -> -> ï¼ ï¼ migration/qemu-file.c:257 -> -> ï¼ ï¼ #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, -> -> ï¼ ï¼ address@hidden) at migration/qemu-file.c:510 -> -> ï¼ ï¼ #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at -> -> ï¼ ï¼ migration/qemu-file.c:523 -> -> ï¼ ï¼ #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at -> -> ï¼ ï¼ migration/qemu-file.c:603 -> -> ï¼ ï¼ #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, -> -> ï¼ ï¼ address@hidden) at migration/colo.c:215 -> -> ï¼ ï¼ #9 0x00007f3e0327250d in colo_wait_handle_message (errp=0x7f3d62bfaa48, -> -> ï¼ ï¼ checkpoint_request=ï¼synthetic pointerï¼, f=ï¼optimized outï¼) at -> -> ï¼ ï¼ migration/colo.c:546 -> -> ï¼ ï¼ #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at -> -> ï¼ ï¼ migration/colo.c:649 -> -> ï¼ ï¼ #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 -> -> ï¼ ï¼ #12 0x00007f3dfc9c03ed in clone () from /lib64/libc.so.6 -> -> ï¼ ï¼ -> -> ï¼ ï¼ -> -> ï¼ ï¼ -> -> ï¼ ï¼ -> -> ï¼ ï¼ -> -> ï¼ ï¼ -- -> -> ï¼ ï¼ View this message in context: -> -> -http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html -> -> ï¼ ï¼ Sent from the Developer mailing list archive at Nabble.com. -> -> ï¼ ï¼ -> -> ï¼ ï¼ -> -> ï¼ ï¼ -> -> ï¼ ï¼ -> -> ï¼ -> -> ï¼ -- -> -> ï¼ Thanks -> -> ï¼ Zhang Chen -> -> ï¼ -> -> ï¼ -> -> ï¼ -> -> ï¼ -> -> ï¼ -> -> -> --- -Dr. David Alan Gilbert / address@hidden / Manchester, UK - -On 2017/3/21 19:56, Dr. David Alan Gilbert wrote: -* Hailiang Zhang (address@hidden) wrote: -Hi, - -Thanks for reporting this, and i confirmed it in my test, and it is a bug. - -Though we tried to call qemu_file_shutdown() to shutdown the related fd, in -case COLO thread/incoming thread is stuck in read/write() while do failover, -but it didn't take effect, because all the fd used by COLO (also migration) -has been wrapped by qio channel, and it will not call the shutdown API if -we didn't qio_channel_set_feature(QIO_CHANNEL(sioc), -QIO_CHANNEL_FEATURE_SHUTDOWN). - -Cc: Dr. David Alan Gilbert <address@hidden> - -I doubted migration cancel has the same problem, it may be stuck in write() -if we tried to cancel migration. - -void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error -**errp) -{ - qio_channel_set_name(QIO_CHANNEL(ioc), "migration-fd-outgoing"); - migration_channel_connect(s, ioc, NULL); - ... ... -We didn't call qio_channel_set_feature(QIO_CHANNEL(sioc), -QIO_CHANNEL_FEATURE_SHUTDOWN) above, -and the -migrate_fd_cancel() -{ - ... ... - if (s->state == MIGRATION_STATUS_CANCELLING && f) { - qemu_file_shutdown(f); --> This will not take effect. No ? - } -} -(cc'd in Daniel Berrange). -I see that we call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN); -at the -top of qio_channel_socket_new; so I think that's safe isn't it? -Hmm, you are right, this problem is only exist for the migration incoming fd, -thanks. -Dave -Thanks, -Hailiang - -On 2017/3/21 16:10, address@hidden wrote: -Thank youã - -I have test areadyã - -When the Primary Node panic,the Secondary Node qemu hang at the same placeã - -Incorrding -http://wiki.qemu-project.org/Features/COLO -ï¼kill Primary Node qemu -will not produce the problem,but Primary Node panic canã - -I think due to the feature of channel does not support -QIO_CHANNEL_FEATURE_SHUTDOWN. - - -when failover,channel_shutdown could not shut down the channel. - - -so the colo_process_incoming_thread will hang at recvmsg. - - -I test a patch: - - -diff --git a/migration/socket.c b/migration/socket.c - - -index 13966f1..d65a0ea 100644 - - ---- a/migration/socket.c - - -+++ b/migration/socket.c - - -@@ -147,8 +147,9 @@ static gboolean socket_accept_incoming_migration(QIOChannel -*ioc, - - - } - - - - - - trace_migration_socket_incoming_accepted() - - - - - - qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") - - -+ qio_channel_set_feature(QIO_CHANNEL(sioc), QIO_CHANNEL_FEATURE_SHUTDOWN) - - - migration_channel_process_incoming(migrate_get_current(), - - - QIO_CHANNEL(sioc)) - - - object_unref(OBJECT(sioc)) - - - - -My test will not hang any more. - - - - - - - - - - - - - - - - - -åå§é®ä»¶ - - - -åä»¶äººï¼ address@hidden -æ¶ä»¶äººï¼ç广10165992 address@hidden -æéäººï¼ address@hidden address@hidden -æ¥ æ ï¼2017å¹´03æ21æ¥ 15:58 -主 é¢ ï¼Re: [Qemu-devel] çå¤: Re: [BUG]COLO failover hang - - - - - -Hi,Wang. - -You can test this branch: -https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk -and please follow wiki ensure your own configuration correctly. -http://wiki.qemu-project.org/Features/COLO -Thanks - -Zhang Chen - - -On 03/21/2017 03:27 PM, address@hidden wrote: -ï¼ -ï¼ hi. -ï¼ -ï¼ I test the git qemu master have the same problem. -ï¼ -ï¼ (gdb) bt -ï¼ -ï¼ #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, -ï¼ niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 -ï¼ -ï¼ #1 0x00007f658e4aa0c2 in qio_channel_read -ï¼ (address@hidden, address@hidden "", -ï¼ address@hidden, address@hidden) at io/channel.c:114 -ï¼ -ï¼ #2 0x00007f658e3ea990 in channel_get_buffer (opaque=ï¼optimized outï¼, -ï¼ buf=0x7f65907cb838 "", pos=ï¼optimized outï¼, size=32768) at -ï¼ migration/qemu-file-channel.c:78 -ï¼ -ï¼ #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at -ï¼ migration/qemu-file.c:295 -ï¼ -ï¼ #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, -ï¼ address@hidden) at migration/qemu-file.c:555 -ï¼ -ï¼ #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at -ï¼ migration/qemu-file.c:568 -ï¼ -ï¼ #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at -ï¼ migration/qemu-file.c:648 -ï¼ -ï¼ #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, -ï¼ address@hidden) at migration/colo.c:244 -ï¼ -ï¼ #8 0x00007f658e3e681e in colo_receive_check_message (f=ï¼optimized -ï¼ outï¼, address@hidden, -ï¼ address@hidden) -ï¼ -ï¼ at migration/colo.c:264 -ï¼ -ï¼ #9 0x00007f658e3e740e in colo_process_incoming_thread -ï¼ (opaque=0x7f658eb30360 ï¼mis_current.31286ï¼) at migration/colo.c:577 -ï¼ -ï¼ #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 -ï¼ -ï¼ #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 -ï¼ -ï¼ (gdb) p ioc-ï¼name -ï¼ -ï¼ $2 = 0x7f658ff7d5c0 "migration-socket-incoming" -ï¼ -ï¼ (gdb) p ioc-ï¼features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN -ï¼ -ï¼ $3 = 0 -ï¼ -ï¼ -ï¼ (gdb) bt -ï¼ -ï¼ #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, -ï¼ condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 -ï¼ -ï¼ #1 0x00007fdcc6966350 in g_main_dispatch (context=ï¼optimized outï¼) at -ï¼ gmain.c:3054 -ï¼ -ï¼ #2 g_main_context_dispatch (context=ï¼optimized outï¼, -ï¼ address@hidden) at gmain.c:3630 -ï¼ -ï¼ #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 -ï¼ -ï¼ #4 os_host_main_loop_wait (timeout=ï¼optimized outï¼) at -ï¼ util/main-loop.c:258 -ï¼ -ï¼ #5 main_loop_wait (address@hidden) at -ï¼ util/main-loop.c:506 -ï¼ -ï¼ #6 0x00007fdccb526187 in main_loop () at vl.c:1898 -ï¼ -ï¼ #7 main (argc=ï¼optimized outï¼, argv=ï¼optimized outï¼, envp=ï¼optimized -ï¼ outï¼) at vl.c:4709 -ï¼ -ï¼ (gdb) p ioc-ï¼features -ï¼ -ï¼ $1 = 6 -ï¼ -ï¼ (gdb) p ioc-ï¼name -ï¼ -ï¼ $2 = 0x7fdcce1b1ab0 "migration-socket-listener" -ï¼ -ï¼ -ï¼ May be socket_accept_incoming_migration should -ï¼ call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? -ï¼ -ï¼ -ï¼ thank you. -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ åå§é®ä»¶ -ï¼ address@hidden -ï¼ address@hidden -ï¼ address@hidden@huawei.comï¼ -ï¼ *æ¥ æ ï¼*2017å¹´03æ16æ¥ 14:46 -ï¼ *主 é¢ ï¼**Re: [Qemu-devel] COLO failover hang* -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ On 03/15/2017 05:06 PM, wangguang wrote: -ï¼ ï¼ am testing QEMU COLO feature described here [QEMU -ï¼ ï¼ Wiki]( -http://wiki.qemu-project.org/Features/COLO -). -ï¼ ï¼ -ï¼ ï¼ When the Primary Node panic,the Secondary Node qemu hang. -ï¼ ï¼ hang at recvmsg in qio_channel_socket_readv. -ï¼ ï¼ And I run { 'execute': 'nbd-server-stop' } and { "execute": -ï¼ ï¼ "x-colo-lost-heartbeat" } in Secondary VM's -ï¼ ï¼ monitor,the Secondary Node qemu still hang at recvmsg . -ï¼ ï¼ -ï¼ ï¼ I found that the colo in qemu is not complete yet. -ï¼ ï¼ Do the colo have any plan for development? -ï¼ -ï¼ Yes, We are developing. You can see some of patch we pushing. -ï¼ -ï¼ ï¼ Has anyone ever run it successfully? Any help is appreciated! -ï¼ -ï¼ In our internal version can run it successfully, -ï¼ The failover detail you can ask Zhanghailiang for help. -ï¼ Next time if you have some question about COLO, -ï¼ please cc me and zhanghailiang address@hidden -ï¼ -ï¼ -ï¼ Thanks -ï¼ Zhang Chen -ï¼ -ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ centos7.2+qemu2.7.50 -ï¼ ï¼ (gdb) bt -ï¼ ï¼ #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 -ï¼ ï¼ #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=ï¼optimized outï¼, -ï¼ ï¼ iov=ï¼optimized outï¼, niov=ï¼optimized outï¼, fds=0x0, nfds=0x0, errp=0x0) at -ï¼ ï¼ io/channel-socket.c:497 -ï¼ ï¼ #2 0x00007f3e03329472 in qio_channel_read (address@hidden, -ï¼ ï¼ address@hidden "", address@hidden, -ï¼ ï¼ address@hidden) at io/channel.c:97 -ï¼ ï¼ #3 0x00007f3e032750e0 in channel_get_buffer (opaque=ï¼optimized outï¼, -ï¼ ï¼ buf=0x7f3e05910f38 "", pos=ï¼optimized outï¼, size=32768) at -ï¼ ï¼ migration/qemu-file-channel.c:78 -ï¼ ï¼ #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at -ï¼ ï¼ migration/qemu-file.c:257 -ï¼ ï¼ #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, -ï¼ ï¼ address@hidden) at migration/qemu-file.c:510 -ï¼ ï¼ #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at -ï¼ ï¼ migration/qemu-file.c:523 -ï¼ ï¼ #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at -ï¼ ï¼ migration/qemu-file.c:603 -ï¼ ï¼ #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, -ï¼ ï¼ address@hidden) at migration/colo.c:215 -ï¼ ï¼ #9 0x00007f3e0327250d in colo_wait_handle_message (errp=0x7f3d62bfaa48, -ï¼ ï¼ checkpoint_request=ï¼synthetic pointerï¼, f=ï¼optimized outï¼) at -ï¼ ï¼ migration/colo.c:546 -ï¼ ï¼ #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at -ï¼ ï¼ migration/colo.c:649 -ï¼ ï¼ #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 -ï¼ ï¼ #12 0x00007f3dfc9c03ed in clone () from /lib64/libc.so.6 -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -- -ï¼ ï¼ View this message in context: -http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html -ï¼ ï¼ Sent from the Developer mailing list archive at Nabble.com. -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ ï¼ -ï¼ -ï¼ -- -ï¼ Thanks -ï¼ Zhang Chen -ï¼ -ï¼ -ï¼ -ï¼ -ï¼ --- -Dr. David Alan Gilbert / address@hidden / Manchester, UK - -. - -* Hailiang Zhang (address@hidden) wrote: -> -On 2017/3/21 19:56, Dr. David Alan Gilbert wrote: -> -> * Hailiang Zhang (address@hidden) wrote: -> -> > Hi, -> -> > -> -> > Thanks for reporting this, and i confirmed it in my test, and it is a bug. -> -> > -> -> > Though we tried to call qemu_file_shutdown() to shutdown the related fd, -> -> > in -> -> > case COLO thread/incoming thread is stuck in read/write() while do -> -> > failover, -> -> > but it didn't take effect, because all the fd used by COLO (also -> -> > migration) -> -> > has been wrapped by qio channel, and it will not call the shutdown API if -> -> > we didn't qio_channel_set_feature(QIO_CHANNEL(sioc), -> -> > QIO_CHANNEL_FEATURE_SHUTDOWN). -> -> > -> -> > Cc: Dr. David Alan Gilbert <address@hidden> -> -> > -> -> > I doubted migration cancel has the same problem, it may be stuck in -> -> > write() -> -> > if we tried to cancel migration. -> -> > -> -> > void fd_start_outgoing_migration(MigrationState *s, const char *fdname, -> -> > Error **errp) -> -> > { -> -> > qio_channel_set_name(QIO_CHANNEL(ioc), "migration-fd-outgoing"); -> -> > migration_channel_connect(s, ioc, NULL); -> -> > ... ... -> -> > We didn't call qio_channel_set_feature(QIO_CHANNEL(sioc), -> -> > QIO_CHANNEL_FEATURE_SHUTDOWN) above, -> -> > and the -> -> > migrate_fd_cancel() -> -> > { -> -> > ... ... -> -> > if (s->state == MIGRATION_STATUS_CANCELLING && f) { -> -> > qemu_file_shutdown(f); --> This will not take effect. No ? -> -> > } -> -> > } -> -> -> -> (cc'd in Daniel Berrange). -> -> I see that we call qio_channel_set_feature(ioc, -> -> QIO_CHANNEL_FEATURE_SHUTDOWN); at the -> -> top of qio_channel_socket_new; so I think that's safe isn't it? -> -> -> -> -Hmm, you are right, this problem is only exist for the migration incoming fd, -> -thanks. -Yes, and I don't think we normally do a cancel on the incoming side of a -migration. - -Dave - -> -> Dave -> -> -> -> > Thanks, -> -> > Hailiang -> -> > -> -> > On 2017/3/21 16:10, address@hidden wrote: -> -> > > Thank youã -> -> > > -> -> > > I have test areadyã -> -> > > -> -> > > When the Primary Node panic,the Secondary Node qemu hang at the same -> -> > > placeã -> -> > > -> -> > > Incorrding -http://wiki.qemu-project.org/Features/COLO -ï¼kill Primary -> -> > > Node qemu will not produce the problem,but Primary Node panic canã -> -> > > -> -> > > I think due to the feature of channel does not support -> -> > > QIO_CHANNEL_FEATURE_SHUTDOWN. -> -> > > -> -> > > -> -> > > when failover,channel_shutdown could not shut down the channel. -> -> > > -> -> > > -> -> > > so the colo_process_incoming_thread will hang at recvmsg. -> -> > > -> -> > > -> -> > > I test a patch: -> -> > > -> -> > > -> -> > > diff --git a/migration/socket.c b/migration/socket.c -> -> > > -> -> > > -> -> > > index 13966f1..d65a0ea 100644 -> -> > > -> -> > > -> -> > > --- a/migration/socket.c -> -> > > -> -> > > -> -> > > +++ b/migration/socket.c -> -> > > -> -> > > -> -> > > @@ -147,8 +147,9 @@ static gboolean -> -> > > socket_accept_incoming_migration(QIOChannel *ioc, -> -> > > -> -> > > -> -> > > } -> -> > > -> -> > > -> -> > > -> -> > > -> -> > > -> -> > > trace_migration_socket_incoming_accepted() -> -> > > -> -> > > -> -> > > -> -> > > -> -> > > -> -> > > qio_channel_set_name(QIO_CHANNEL(sioc), -> -> > > "migration-socket-incoming") -> -> > > -> -> > > -> -> > > + qio_channel_set_feature(QIO_CHANNEL(sioc), -> -> > > QIO_CHANNEL_FEATURE_SHUTDOWN) -> -> > > -> -> > > -> -> > > migration_channel_process_incoming(migrate_get_current(), -> -> > > -> -> > > -> -> > > QIO_CHANNEL(sioc)) -> -> > > -> -> > > -> -> > > object_unref(OBJECT(sioc)) -> -> > > -> -> > > -> -> > > -> -> > > -> -> > > My test will not hang any more. -> -> > > -> -> > > -> -> > > -> -> > > -> -> > > -> -> > > -> -> > > -> -> > > -> -> > > -> -> > > -> -> > > -> -> > > -> -> > > -> -> > > -> -> > > -> -> > > -> -> > > -> -> > > åå§é®ä»¶ -> -> > > -> -> > > -> -> > > -> -> > > åä»¶äººï¼ address@hidden -> -> > > æ¶ä»¶äººï¼ç广10165992 address@hidden -> -> > > æéäººï¼ address@hidden address@hidden -> -> > > æ¥ æ ï¼2017å¹´03æ21æ¥ 15:58 -> -> > > 主 é¢ ï¼Re: [Qemu-devel] çå¤: Re: [BUG]COLO failover hang -> -> > > -> -> > > -> -> > > -> -> > > -> -> > > -> -> > > Hi,Wang. -> -> > > -> -> > > You can test this branch: -> -> > > -> -> > > -https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk -> -> > > -> -> > > and please follow wiki ensure your own configuration correctly. -> -> > > -> -> > > -http://wiki.qemu-project.org/Features/COLO -> -> > > -> -> > > -> -> > > Thanks -> -> > > -> -> > > Zhang Chen -> -> > > -> -> > > -> -> > > On 03/21/2017 03:27 PM, address@hidden wrote: -> -> > > ï¼ -> -> > > ï¼ hi. -> -> > > ï¼ -> -> > > ï¼ I test the git qemu master have the same problem. -> -> > > ï¼ -> -> > > ï¼ (gdb) bt -> -> > > ï¼ -> -> > > ï¼ #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, -> -> > > ï¼ niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 -> -> > > ï¼ -> -> > > ï¼ #1 0x00007f658e4aa0c2 in qio_channel_read -> -> > > ï¼ (address@hidden, address@hidden "", -> -> > > ï¼ address@hidden, address@hidden) at io/channel.c:114 -> -> > > ï¼ -> -> > > ï¼ #2 0x00007f658e3ea990 in channel_get_buffer (opaque=ï¼optimized outï¼, -> -> > > ï¼ buf=0x7f65907cb838 "", pos=ï¼optimized outï¼, size=32768) at -> -> > > ï¼ migration/qemu-file-channel.c:78 -> -> > > ï¼ -> -> > > ï¼ #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at -> -> > > ï¼ migration/qemu-file.c:295 -> -> > > ï¼ -> -> > > ï¼ #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, -> -> > > ï¼ address@hidden) at migration/qemu-file.c:555 -> -> > > ï¼ -> -> > > ï¼ #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at -> -> > > ï¼ migration/qemu-file.c:568 -> -> > > ï¼ -> -> > > ï¼ #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at -> -> > > ï¼ migration/qemu-file.c:648 -> -> > > ï¼ -> -> > > ï¼ #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, -> -> > > ï¼ address@hidden) at migration/colo.c:244 -> -> > > ï¼ -> -> > > ï¼ #8 0x00007f658e3e681e in colo_receive_check_message (f=ï¼optimized -> -> > > ï¼ outï¼, address@hidden, -> -> > > ï¼ address@hidden) -> -> > > ï¼ -> -> > > ï¼ at migration/colo.c:264 -> -> > > ï¼ -> -> > > ï¼ #9 0x00007f658e3e740e in colo_process_incoming_thread -> -> > > ï¼ (opaque=0x7f658eb30360 ï¼mis_current.31286ï¼) at migration/colo.c:577 -> -> > > ï¼ -> -> > > ï¼ #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 -> -> > > ï¼ -> -> > > ï¼ #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 -> -> > > ï¼ -> -> > > ï¼ (gdb) p ioc-ï¼name -> -> > > ï¼ -> -> > > ï¼ $2 = 0x7f658ff7d5c0 "migration-socket-incoming" -> -> > > ï¼ -> -> > > ï¼ (gdb) p ioc-ï¼features Do not support -> -> > > QIO_CHANNEL_FEATURE_SHUTDOWN -> -> > > ï¼ -> -> > > ï¼ $3 = 0 -> -> > > ï¼ -> -> > > ï¼ -> -> > > ï¼ (gdb) bt -> -> > > ï¼ -> -> > > ï¼ #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, -> -> > > ï¼ condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 -> -> > > ï¼ -> -> > > ï¼ #1 0x00007fdcc6966350 in g_main_dispatch (context=ï¼optimized outï¼) at -> -> > > ï¼ gmain.c:3054 -> -> > > ï¼ -> -> > > ï¼ #2 g_main_context_dispatch (context=ï¼optimized outï¼, -> -> > > ï¼ address@hidden) at gmain.c:3630 -> -> > > ï¼ -> -> > > ï¼ #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 -> -> > > ï¼ -> -> > > ï¼ #4 os_host_main_loop_wait (timeout=ï¼optimized outï¼) at -> -> > > ï¼ util/main-loop.c:258 -> -> > > ï¼ -> -> > > ï¼ #5 main_loop_wait (address@hidden) at -> -> > > ï¼ util/main-loop.c:506 -> -> > > ï¼ -> -> > > ï¼ #6 0x00007fdccb526187 in main_loop () at vl.c:1898 -> -> > > ï¼ -> -> > > ï¼ #7 main (argc=ï¼optimized outï¼, argv=ï¼optimized outï¼, envp=ï¼optimized -> -> > > ï¼ outï¼) at vl.c:4709 -> -> > > ï¼ -> -> > > ï¼ (gdb) p ioc-ï¼features -> -> > > ï¼ -> -> > > ï¼ $1 = 6 -> -> > > ï¼ -> -> > > ï¼ (gdb) p ioc-ï¼name -> -> > > ï¼ -> -> > > ï¼ $2 = 0x7fdcce1b1ab0 "migration-socket-listener" -> -> > > ï¼ -> -> > > ï¼ -> -> > > ï¼ May be socket_accept_incoming_migration should -> -> > > ï¼ call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? -> -> > > ï¼ -> -> > > ï¼ -> -> > > ï¼ thank you. -> -> > > ï¼ -> -> > > ï¼ -> -> > > ï¼ -> -> > > ï¼ -> -> > > ï¼ -> -> > > ï¼ åå§é®ä»¶ -> -> > > ï¼ address@hidden -> -> > > ï¼ address@hidden -> -> > > ï¼ address@hidden@huawei.comï¼ -> -> > > ï¼ *æ¥ æ ï¼*2017å¹´03æ16æ¥ 14:46 -> -> > > ï¼ *主 é¢ ï¼**Re: [Qemu-devel] COLO failover hang* -> -> > > ï¼ -> -> > > ï¼ -> -> > > ï¼ -> -> > > ï¼ -> -> > > ï¼ On 03/15/2017 05:06 PM, wangguang wrote: -> -> > > ï¼ ï¼ am testing QEMU COLO feature described here [QEMU -> -> > > ï¼ ï¼ Wiki]( -http://wiki.qemu-project.org/Features/COLO -). -> -> > > ï¼ ï¼ -> -> > > ï¼ ï¼ When the Primary Node panic,the Secondary Node qemu hang. -> -> > > ï¼ ï¼ hang at recvmsg in qio_channel_socket_readv. -> -> > > ï¼ ï¼ And I run { 'execute': 'nbd-server-stop' } and { "execute": -> -> > > ï¼ ï¼ "x-colo-lost-heartbeat" } in Secondary VM's -> -> > > ï¼ ï¼ monitor,the Secondary Node qemu still hang at recvmsg . -> -> > > ï¼ ï¼ -> -> > > ï¼ ï¼ I found that the colo in qemu is not complete yet. -> -> > > ï¼ ï¼ Do the colo have any plan for development? -> -> > > ï¼ -> -> > > ï¼ Yes, We are developing. You can see some of patch we pushing. -> -> > > ï¼ -> -> > > ï¼ ï¼ Has anyone ever run it successfully? Any help is appreciated! -> -> > > ï¼ -> -> > > ï¼ In our internal version can run it successfully, -> -> > > ï¼ The failover detail you can ask Zhanghailiang for help. -> -> > > ï¼ Next time if you have some question about COLO, -> -> > > ï¼ please cc me and zhanghailiang address@hidden -> -> > > ï¼ -> -> > > ï¼ -> -> > > ï¼ Thanks -> -> > > ï¼ Zhang Chen -> -> > > ï¼ -> -> > > ï¼ -> -> > > ï¼ ï¼ -> -> > > ï¼ ï¼ -> -> > > ï¼ ï¼ -> -> > > ï¼ ï¼ centos7.2+qemu2.7.50 -> -> > > ï¼ ï¼ (gdb) bt -> -> > > ï¼ ï¼ #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 -> -> > > ï¼ ï¼ #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=ï¼optimized -> -> > > outï¼, -> -> > > ï¼ ï¼ iov=ï¼optimized outï¼, niov=ï¼optimized outï¼, fds=0x0, nfds=0x0, -> -> > > errp=0x0) at -> -> > > ï¼ ï¼ io/channel-socket.c:497 -> -> > > ï¼ ï¼ #2 0x00007f3e03329472 in qio_channel_read (address@hidden, -> -> > > ï¼ ï¼ address@hidden "", address@hidden, -> -> > > ï¼ ï¼ address@hidden) at io/channel.c:97 -> -> > > ï¼ ï¼ #3 0x00007f3e032750e0 in channel_get_buffer (opaque=ï¼optimized -> -> > > outï¼, -> -> > > ï¼ ï¼ buf=0x7f3e05910f38 "", pos=ï¼optimized outï¼, size=32768) at -> -> > > ï¼ ï¼ migration/qemu-file-channel.c:78 -> -> > > ï¼ ï¼ #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at -> -> > > ï¼ ï¼ migration/qemu-file.c:257 -> -> > > ï¼ ï¼ #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, -> -> > > ï¼ ï¼ address@hidden) at migration/qemu-file.c:510 -> -> > > ï¼ ï¼ #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at -> -> > > ï¼ ï¼ migration/qemu-file.c:523 -> -> > > ï¼ ï¼ #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at -> -> > > ï¼ ï¼ migration/qemu-file.c:603 -> -> > > ï¼ ï¼ #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, -> -> > > ï¼ ï¼ address@hidden) at migration/colo.c:215 -> -> > > ï¼ ï¼ #9 0x00007f3e0327250d in colo_wait_handle_message -> -> > > (errp=0x7f3d62bfaa48, -> -> > > ï¼ ï¼ checkpoint_request=ï¼synthetic pointerï¼, f=ï¼optimized outï¼) at -> -> > > ï¼ ï¼ migration/colo.c:546 -> -> > > ï¼ ï¼ #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at -> -> > > ï¼ ï¼ migration/colo.c:649 -> -> > > ï¼ ï¼ #11 0x00007f3e00cc1df3 in start_thread () from -> -> > > /lib64/libpthread.so.0 -> -> > > ï¼ ï¼ #12 0x00007f3dfc9c03ed in clone () from /lib64/libc.so.6 -> -> > > ï¼ ï¼ -> -> > > ï¼ ï¼ -> -> > > ï¼ ï¼ -> -> > > ï¼ ï¼ -> -> > > ï¼ ï¼ -> -> > > ï¼ ï¼ -- -> -> > > ï¼ ï¼ View this message in context: -> -> > > -http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html -> -> > > ï¼ ï¼ Sent from the Developer mailing list archive at Nabble.com. -> -> > > ï¼ ï¼ -> -> > > ï¼ ï¼ -> -> > > ï¼ ï¼ -> -> > > ï¼ ï¼ -> -> > > ï¼ -> -> > > ï¼ -- -> -> > > ï¼ Thanks -> -> > > ï¼ Zhang Chen -> -> > > ï¼ -> -> > > ï¼ -> -> > > ï¼ -> -> > > ï¼ -> -> > > ï¼ -> -> > > -> -> > -> -> -- -> -> Dr. David Alan Gilbert / address@hidden / Manchester, UK -> -> -> -> . -> -> -> --- -Dr. David Alan Gilbert / address@hidden / Manchester, UK - |