diff options
Diffstat (limited to 'results/classifier/zero-shot/013/risc-v/65781993')
| -rw-r--r-- | results/classifier/zero-shot/013/risc-v/65781993 | 2821 |
1 files changed, 2821 insertions, 0 deletions
diff --git a/results/classifier/zero-shot/013/risc-v/65781993 b/results/classifier/zero-shot/013/risc-v/65781993 new file mode 100644 index 000000000..2a66aa6c4 --- /dev/null +++ b/results/classifier/zero-shot/013/risc-v/65781993 @@ -0,0 +1,2821 @@ +risc-v: 0.745 +user-level: 0.697 +PID: 0.673 +debug: 0.673 +arm: 0.672 +virtual: 0.670 +assembly: 0.666 +semantic: 0.665 +graphic: 0.664 +alpha: 0.662 +socket: 0.660 +operating system: 0.660 +register: 0.659 +permissions: 0.658 +architecture: 0.658 +network: 0.657 +files: 0.657 +kernel: 0.656 +mistranslation: 0.650 +device: 0.647 +performance: 0.636 +boot: 0.635 +KVM: 0.627 +system: 0.624 +peripherals: 0.624 +i386: 0.612 +VMM: 0.612 +TCG: 0.607 +vnc: 0.590 +hypervisor: 0.586 +x86: 0.579 +ppc: 0.557 + +[Qemu-devel] 答复: Re: 答复: Re: [BUG]COLO failover hang + +Thank youã + +I have test areadyã + +When the Primary Node panic,the Secondary Node qemu hang at the same placeã + +Incorrding +http://wiki.qemu-project.org/Features/COLO +ï¼kill Primary Node qemu +will not produce the problem,but Primary Node panic canã + +I think due to the feature of channel does not support +QIO_CHANNEL_FEATURE_SHUTDOWN. + + +when failover,channel_shutdown could not shut down the channel. + + +so the colo_process_incoming_thread will hang at recvmsg. + + +I test a patch: + + +diff --git a/migration/socket.c b/migration/socket.c + + +index 13966f1..d65a0ea 100644 + + +--- a/migration/socket.c + + ++++ b/migration/socket.c + + +@@ -147,8 +147,9 @@ static gboolean socket_accept_incoming_migration(QIOChannel +*ioc, + + + } + + + + + + trace_migration_socket_incoming_accepted() + + + + + + qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") + + ++ qio_channel_set_feature(QIO_CHANNEL(sioc), QIO_CHANNEL_FEATURE_SHUTDOWN) + + + migration_channel_process_incoming(migrate_get_current(), + + + QIO_CHANNEL(sioc)) + + + object_unref(OBJECT(sioc)) + + + + +My test will not hang any more. + + + + + + + + + + + + + + + + + +åå§é®ä»¶ + + + +åä»¶äººï¼ address@hidden +æ¶ä»¶äººï¼ç广10165992 address@hidden +æéäººï¼ address@hidden address@hidden +æ¥ æ ï¼2017å¹´03æ21æ¥ 15:58 +主 é¢ ï¼Re: [Qemu-devel] çå¤: Re: [BUG]COLO failover hang + + + + + +Hi,Wang. + +You can test this branch: +https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk +and please follow wiki ensure your own configuration correctly. +http://wiki.qemu-project.org/Features/COLO +Thanks + +Zhang Chen + + +On 03/21/2017 03:27 PM, address@hidden wrote: +ï¼ +ï¼ hi. +ï¼ +ï¼ I test the git qemu master have the same problem. +ï¼ +ï¼ (gdb) bt +ï¼ +ï¼ #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, +ï¼ niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 +ï¼ +ï¼ #1 0x00007f658e4aa0c2 in qio_channel_read +ï¼ (address@hidden, address@hidden "", +ï¼ address@hidden, address@hidden) at io/channel.c:114 +ï¼ +ï¼ #2 0x00007f658e3ea990 in channel_get_buffer (opaque=ï¼optimized outï¼, +ï¼ buf=0x7f65907cb838 "", pos=ï¼optimized outï¼, size=32768) at +ï¼ migration/qemu-file-channel.c:78 +ï¼ +ï¼ #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at +ï¼ migration/qemu-file.c:295 +ï¼ +ï¼ #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, +ï¼ address@hidden) at migration/qemu-file.c:555 +ï¼ +ï¼ #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at +ï¼ migration/qemu-file.c:568 +ï¼ +ï¼ #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at +ï¼ migration/qemu-file.c:648 +ï¼ +ï¼ #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, +ï¼ address@hidden) at migration/colo.c:244 +ï¼ +ï¼ #8 0x00007f658e3e681e in colo_receive_check_message (f=ï¼optimized +ï¼ outï¼, address@hidden, +ï¼ address@hidden) +ï¼ +ï¼ at migration/colo.c:264 +ï¼ +ï¼ #9 0x00007f658e3e740e in colo_process_incoming_thread +ï¼ (opaque=0x7f658eb30360 ï¼mis_current.31286ï¼) at migration/colo.c:577 +ï¼ +ï¼ #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 +ï¼ +ï¼ #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 +ï¼ +ï¼ (gdb) p ioc-ï¼name +ï¼ +ï¼ $2 = 0x7f658ff7d5c0 "migration-socket-incoming" +ï¼ +ï¼ (gdb) p ioc-ï¼features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN +ï¼ +ï¼ $3 = 0 +ï¼ +ï¼ +ï¼ (gdb) bt +ï¼ +ï¼ #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, +ï¼ condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 +ï¼ +ï¼ #1 0x00007fdcc6966350 in g_main_dispatch (context=ï¼optimized outï¼) at +ï¼ gmain.c:3054 +ï¼ +ï¼ #2 g_main_context_dispatch (context=ï¼optimized outï¼, +ï¼ address@hidden) at gmain.c:3630 +ï¼ +ï¼ #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 +ï¼ +ï¼ #4 os_host_main_loop_wait (timeout=ï¼optimized outï¼) at +ï¼ util/main-loop.c:258 +ï¼ +ï¼ #5 main_loop_wait (address@hidden) at +ï¼ util/main-loop.c:506 +ï¼ +ï¼ #6 0x00007fdccb526187 in main_loop () at vl.c:1898 +ï¼ +ï¼ #7 main (argc=ï¼optimized outï¼, argv=ï¼optimized outï¼, envp=ï¼optimized +ï¼ outï¼) at vl.c:4709 +ï¼ +ï¼ (gdb) p ioc-ï¼features +ï¼ +ï¼ $1 = 6 +ï¼ +ï¼ (gdb) p ioc-ï¼name +ï¼ +ï¼ $2 = 0x7fdcce1b1ab0 "migration-socket-listener" +ï¼ +ï¼ +ï¼ May be socket_accept_incoming_migration should +ï¼ call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? +ï¼ +ï¼ +ï¼ thank you. +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ åå§é®ä»¶ +ï¼ address@hidden +ï¼ address@hidden +ï¼ address@hidden@huawei.comï¼ +ï¼ *æ¥ æ ï¼*2017å¹´03æ16æ¥ 14:46 +ï¼ *主 é¢ ï¼**Re: [Qemu-devel] COLO failover hang* +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ On 03/15/2017 05:06 PM, wangguang wrote: +ï¼ ï¼ am testing QEMU COLO feature described here [QEMU +ï¼ ï¼ Wiki]( +http://wiki.qemu-project.org/Features/COLO +). +ï¼ ï¼ +ï¼ ï¼ When the Primary Node panic,the Secondary Node qemu hang. +ï¼ ï¼ hang at recvmsg in qio_channel_socket_readv. +ï¼ ï¼ And I run { 'execute': 'nbd-server-stop' } and { "execute": +ï¼ ï¼ "x-colo-lost-heartbeat" } in Secondary VM's +ï¼ ï¼ monitor,the Secondary Node qemu still hang at recvmsg . +ï¼ ï¼ +ï¼ ï¼ I found that the colo in qemu is not complete yet. +ï¼ ï¼ Do the colo have any plan for development? +ï¼ +ï¼ Yes, We are developing. You can see some of patch we pushing. +ï¼ +ï¼ ï¼ Has anyone ever run it successfully? Any help is appreciated! +ï¼ +ï¼ In our internal version can run it successfully, +ï¼ The failover detail you can ask Zhanghailiang for help. +ï¼ Next time if you have some question about COLO, +ï¼ please cc me and zhanghailiang address@hidden +ï¼ +ï¼ +ï¼ Thanks +ï¼ Zhang Chen +ï¼ +ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ centos7.2+qemu2.7.50 +ï¼ ï¼ (gdb) bt +ï¼ ï¼ #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 +ï¼ ï¼ #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=ï¼optimized outï¼, +ï¼ ï¼ iov=ï¼optimized outï¼, niov=ï¼optimized outï¼, fds=0x0, nfds=0x0, errp=0x0) at +ï¼ ï¼ io/channel-socket.c:497 +ï¼ ï¼ #2 0x00007f3e03329472 in qio_channel_read (address@hidden, +ï¼ ï¼ address@hidden "", address@hidden, +ï¼ ï¼ address@hidden) at io/channel.c:97 +ï¼ ï¼ #3 0x00007f3e032750e0 in channel_get_buffer (opaque=ï¼optimized outï¼, +ï¼ ï¼ buf=0x7f3e05910f38 "", pos=ï¼optimized outï¼, size=32768) at +ï¼ ï¼ migration/qemu-file-channel.c:78 +ï¼ ï¼ #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at +ï¼ ï¼ migration/qemu-file.c:257 +ï¼ ï¼ #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, +ï¼ ï¼ address@hidden) at migration/qemu-file.c:510 +ï¼ ï¼ #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at +ï¼ ï¼ migration/qemu-file.c:523 +ï¼ ï¼ #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at +ï¼ ï¼ migration/qemu-file.c:603 +ï¼ ï¼ #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, +ï¼ ï¼ address@hidden) at migration/colo.c:215 +ï¼ ï¼ #9 0x00007f3e0327250d in colo_wait_handle_message (errp=0x7f3d62bfaa48, +ï¼ ï¼ checkpoint_request=ï¼synthetic pointerï¼, f=ï¼optimized outï¼) at +ï¼ ï¼ migration/colo.c:546 +ï¼ ï¼ #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at +ï¼ ï¼ migration/colo.c:649 +ï¼ ï¼ #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 +ï¼ ï¼ #12 0x00007f3dfc9c03ed in clone () from /lib64/libc.so.6 +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ -- +ï¼ ï¼ View this message in context: +http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html +ï¼ ï¼ Sent from the Developer mailing list archive at Nabble.com. +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ +ï¼ -- +ï¼ Thanks +ï¼ Zhang Chen +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ + +-- +Thanks +Zhang Chen + +Hi, + +On 2017/3/21 16:10, address@hidden wrote: +Thank youã + +I have test areadyã + +When the Primary Node panic,the Secondary Node qemu hang at the same placeã + +Incorrding +http://wiki.qemu-project.org/Features/COLO +ï¼kill Primary Node qemu +will not produce the problem,but Primary Node panic canã + +I think due to the feature of channel does not support +QIO_CHANNEL_FEATURE_SHUTDOWN. +Yes, you are right, when we do failover for primary/secondary VM, we will +shutdown the related +fd in case it is stuck in the read/write fd. + +It seems that you didn't follow the above introduction exactly to do the test. +Could you +share your test procedures ? Especially the commands used in the test. + +Thanks, +Hailiang +when failover,channel_shutdown could not shut down the channel. + + +so the colo_process_incoming_thread will hang at recvmsg. + + +I test a patch: + + +diff --git a/migration/socket.c b/migration/socket.c + + +index 13966f1..d65a0ea 100644 + + +--- a/migration/socket.c + + ++++ b/migration/socket.c + + +@@ -147,8 +147,9 @@ static gboolean socket_accept_incoming_migration(QIOChannel +*ioc, + + + } + + + + + + trace_migration_socket_incoming_accepted() + + + + + + qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") + + ++ qio_channel_set_feature(QIO_CHANNEL(sioc), QIO_CHANNEL_FEATURE_SHUTDOWN) + + + migration_channel_process_incoming(migrate_get_current(), + + + QIO_CHANNEL(sioc)) + + + object_unref(OBJECT(sioc)) + + + + +My test will not hang any more. + + + + + + + + + + + + + + + + + +åå§é®ä»¶ + + + +åä»¶äººï¼ address@hidden +æ¶ä»¶äººï¼ç广10165992 address@hidden +æéäººï¼ address@hidden address@hidden +æ¥ æ ï¼2017å¹´03æ21æ¥ 15:58 +主 é¢ ï¼Re: [Qemu-devel] çå¤: Re: [BUG]COLO failover hang + + + + + +Hi,Wang. + +You can test this branch: +https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk +and please follow wiki ensure your own configuration correctly. +http://wiki.qemu-project.org/Features/COLO +Thanks + +Zhang Chen + + +On 03/21/2017 03:27 PM, address@hidden wrote: +ï¼ +ï¼ hi. +ï¼ +ï¼ I test the git qemu master have the same problem. +ï¼ +ï¼ (gdb) bt +ï¼ +ï¼ #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, +ï¼ niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 +ï¼ +ï¼ #1 0x00007f658e4aa0c2 in qio_channel_read +ï¼ (address@hidden, address@hidden "", +ï¼ address@hidden, address@hidden) at io/channel.c:114 +ï¼ +ï¼ #2 0x00007f658e3ea990 in channel_get_buffer (opaque=ï¼optimized outï¼, +ï¼ buf=0x7f65907cb838 "", pos=ï¼optimized outï¼, size=32768) at +ï¼ migration/qemu-file-channel.c:78 +ï¼ +ï¼ #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at +ï¼ migration/qemu-file.c:295 +ï¼ +ï¼ #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, +ï¼ address@hidden) at migration/qemu-file.c:555 +ï¼ +ï¼ #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at +ï¼ migration/qemu-file.c:568 +ï¼ +ï¼ #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at +ï¼ migration/qemu-file.c:648 +ï¼ +ï¼ #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, +ï¼ address@hidden) at migration/colo.c:244 +ï¼ +ï¼ #8 0x00007f658e3e681e in colo_receive_check_message (f=ï¼optimized +ï¼ outï¼, address@hidden, +ï¼ address@hidden) +ï¼ +ï¼ at migration/colo.c:264 +ï¼ +ï¼ #9 0x00007f658e3e740e in colo_process_incoming_thread +ï¼ (opaque=0x7f658eb30360 ï¼mis_current.31286ï¼) at migration/colo.c:577 +ï¼ +ï¼ #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 +ï¼ +ï¼ #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 +ï¼ +ï¼ (gdb) p ioc-ï¼name +ï¼ +ï¼ $2 = 0x7f658ff7d5c0 "migration-socket-incoming" +ï¼ +ï¼ (gdb) p ioc-ï¼features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN +ï¼ +ï¼ $3 = 0 +ï¼ +ï¼ +ï¼ (gdb) bt +ï¼ +ï¼ #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, +ï¼ condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 +ï¼ +ï¼ #1 0x00007fdcc6966350 in g_main_dispatch (context=ï¼optimized outï¼) at +ï¼ gmain.c:3054 +ï¼ +ï¼ #2 g_main_context_dispatch (context=ï¼optimized outï¼, +ï¼ address@hidden) at gmain.c:3630 +ï¼ +ï¼ #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 +ï¼ +ï¼ #4 os_host_main_loop_wait (timeout=ï¼optimized outï¼) at +ï¼ util/main-loop.c:258 +ï¼ +ï¼ #5 main_loop_wait (address@hidden) at +ï¼ util/main-loop.c:506 +ï¼ +ï¼ #6 0x00007fdccb526187 in main_loop () at vl.c:1898 +ï¼ +ï¼ #7 main (argc=ï¼optimized outï¼, argv=ï¼optimized outï¼, envp=ï¼optimized +ï¼ outï¼) at vl.c:4709 +ï¼ +ï¼ (gdb) p ioc-ï¼features +ï¼ +ï¼ $1 = 6 +ï¼ +ï¼ (gdb) p ioc-ï¼name +ï¼ +ï¼ $2 = 0x7fdcce1b1ab0 "migration-socket-listener" +ï¼ +ï¼ +ï¼ May be socket_accept_incoming_migration should +ï¼ call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? +ï¼ +ï¼ +ï¼ thank you. +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ åå§é®ä»¶ +ï¼ address@hidden +ï¼ address@hidden +ï¼ address@hidden@huawei.comï¼ +ï¼ *æ¥ æ ï¼*2017å¹´03æ16æ¥ 14:46 +ï¼ *主 é¢ ï¼**Re: [Qemu-devel] COLO failover hang* +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ On 03/15/2017 05:06 PM, wangguang wrote: +ï¼ ï¼ am testing QEMU COLO feature described here [QEMU +ï¼ ï¼ Wiki]( +http://wiki.qemu-project.org/Features/COLO +). +ï¼ ï¼ +ï¼ ï¼ When the Primary Node panic,the Secondary Node qemu hang. +ï¼ ï¼ hang at recvmsg in qio_channel_socket_readv. +ï¼ ï¼ And I run { 'execute': 'nbd-server-stop' } and { "execute": +ï¼ ï¼ "x-colo-lost-heartbeat" } in Secondary VM's +ï¼ ï¼ monitor,the Secondary Node qemu still hang at recvmsg . +ï¼ ï¼ +ï¼ ï¼ I found that the colo in qemu is not complete yet. +ï¼ ï¼ Do the colo have any plan for development? +ï¼ +ï¼ Yes, We are developing. You can see some of patch we pushing. +ï¼ +ï¼ ï¼ Has anyone ever run it successfully? Any help is appreciated! +ï¼ +ï¼ In our internal version can run it successfully, +ï¼ The failover detail you can ask Zhanghailiang for help. +ï¼ Next time if you have some question about COLO, +ï¼ please cc me and zhanghailiang address@hidden +ï¼ +ï¼ +ï¼ Thanks +ï¼ Zhang Chen +ï¼ +ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ centos7.2+qemu2.7.50 +ï¼ ï¼ (gdb) bt +ï¼ ï¼ #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 +ï¼ ï¼ #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=ï¼optimized outï¼, +ï¼ ï¼ iov=ï¼optimized outï¼, niov=ï¼optimized outï¼, fds=0x0, nfds=0x0, errp=0x0) at +ï¼ ï¼ io/channel-socket.c:497 +ï¼ ï¼ #2 0x00007f3e03329472 in qio_channel_read (address@hidden, +ï¼ ï¼ address@hidden "", address@hidden, +ï¼ ï¼ address@hidden) at io/channel.c:97 +ï¼ ï¼ #3 0x00007f3e032750e0 in channel_get_buffer (opaque=ï¼optimized outï¼, +ï¼ ï¼ buf=0x7f3e05910f38 "", pos=ï¼optimized outï¼, size=32768) at +ï¼ ï¼ migration/qemu-file-channel.c:78 +ï¼ ï¼ #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at +ï¼ ï¼ migration/qemu-file.c:257 +ï¼ ï¼ #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, +ï¼ ï¼ address@hidden) at migration/qemu-file.c:510 +ï¼ ï¼ #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at +ï¼ ï¼ migration/qemu-file.c:523 +ï¼ ï¼ #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at +ï¼ ï¼ migration/qemu-file.c:603 +ï¼ ï¼ #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, +ï¼ ï¼ address@hidden) at migration/colo.c:215 +ï¼ ï¼ #9 0x00007f3e0327250d in colo_wait_handle_message (errp=0x7f3d62bfaa48, +ï¼ ï¼ checkpoint_request=ï¼synthetic pointerï¼, f=ï¼optimized outï¼) at +ï¼ ï¼ migration/colo.c:546 +ï¼ ï¼ #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at +ï¼ ï¼ migration/colo.c:649 +ï¼ ï¼ #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 +ï¼ ï¼ #12 0x00007f3dfc9c03ed in clone () from /lib64/libc.so.6 +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ -- +ï¼ ï¼ View this message in context: +http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html +ï¼ ï¼ Sent from the Developer mailing list archive at Nabble.com. +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ +ï¼ -- +ï¼ Thanks +ï¼ Zhang Chen +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ + +Hi, + +Thanks for reporting this, and i confirmed it in my test, and it is a bug. + +Though we tried to call qemu_file_shutdown() to shutdown the related fd, in +case COLO thread/incoming thread is stuck in read/write() while do failover, +but it didn't take effect, because all the fd used by COLO (also migration) +has been wrapped by qio channel, and it will not call the shutdown API if +we didn't qio_channel_set_feature(QIO_CHANNEL(sioc), +QIO_CHANNEL_FEATURE_SHUTDOWN). + +Cc: Dr. David Alan Gilbert <address@hidden> + +I doubted migration cancel has the same problem, it may be stuck in write() +if we tried to cancel migration. + +void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error +**errp) +{ + qio_channel_set_name(QIO_CHANNEL(ioc), "migration-fd-outgoing"); + migration_channel_connect(s, ioc, NULL); + ... ... +We didn't call qio_channel_set_feature(QIO_CHANNEL(sioc), +QIO_CHANNEL_FEATURE_SHUTDOWN) above, +and the +migrate_fd_cancel() +{ + ... ... + if (s->state == MIGRATION_STATUS_CANCELLING && f) { + qemu_file_shutdown(f); --> This will not take effect. No ? + } +} + +Thanks, +Hailiang + +On 2017/3/21 16:10, address@hidden wrote: +Thank youã + +I have test areadyã + +When the Primary Node panic,the Secondary Node qemu hang at the same placeã + +Incorrding +http://wiki.qemu-project.org/Features/COLO +ï¼kill Primary Node qemu +will not produce the problem,but Primary Node panic canã + +I think due to the feature of channel does not support +QIO_CHANNEL_FEATURE_SHUTDOWN. + + +when failover,channel_shutdown could not shut down the channel. + + +so the colo_process_incoming_thread will hang at recvmsg. + + +I test a patch: + + +diff --git a/migration/socket.c b/migration/socket.c + + +index 13966f1..d65a0ea 100644 + + +--- a/migration/socket.c + + ++++ b/migration/socket.c + + +@@ -147,8 +147,9 @@ static gboolean socket_accept_incoming_migration(QIOChannel +*ioc, + + + } + + + + + + trace_migration_socket_incoming_accepted() + + + + + + qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") + + ++ qio_channel_set_feature(QIO_CHANNEL(sioc), QIO_CHANNEL_FEATURE_SHUTDOWN) + + + migration_channel_process_incoming(migrate_get_current(), + + + QIO_CHANNEL(sioc)) + + + object_unref(OBJECT(sioc)) + + + + +My test will not hang any more. + + + + + + + + + + + + + + + + + +åå§é®ä»¶ + + + +åä»¶äººï¼ address@hidden +æ¶ä»¶äººï¼ç广10165992 address@hidden +æéäººï¼ address@hidden address@hidden +æ¥ æ ï¼2017å¹´03æ21æ¥ 15:58 +主 é¢ ï¼Re: [Qemu-devel] çå¤: Re: [BUG]COLO failover hang + + + + + +Hi,Wang. + +You can test this branch: +https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk +and please follow wiki ensure your own configuration correctly. +http://wiki.qemu-project.org/Features/COLO +Thanks + +Zhang Chen + + +On 03/21/2017 03:27 PM, address@hidden wrote: +ï¼ +ï¼ hi. +ï¼ +ï¼ I test the git qemu master have the same problem. +ï¼ +ï¼ (gdb) bt +ï¼ +ï¼ #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, +ï¼ niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 +ï¼ +ï¼ #1 0x00007f658e4aa0c2 in qio_channel_read +ï¼ (address@hidden, address@hidden "", +ï¼ address@hidden, address@hidden) at io/channel.c:114 +ï¼ +ï¼ #2 0x00007f658e3ea990 in channel_get_buffer (opaque=ï¼optimized outï¼, +ï¼ buf=0x7f65907cb838 "", pos=ï¼optimized outï¼, size=32768) at +ï¼ migration/qemu-file-channel.c:78 +ï¼ +ï¼ #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at +ï¼ migration/qemu-file.c:295 +ï¼ +ï¼ #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, +ï¼ address@hidden) at migration/qemu-file.c:555 +ï¼ +ï¼ #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at +ï¼ migration/qemu-file.c:568 +ï¼ +ï¼ #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at +ï¼ migration/qemu-file.c:648 +ï¼ +ï¼ #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, +ï¼ address@hidden) at migration/colo.c:244 +ï¼ +ï¼ #8 0x00007f658e3e681e in colo_receive_check_message (f=ï¼optimized +ï¼ outï¼, address@hidden, +ï¼ address@hidden) +ï¼ +ï¼ at migration/colo.c:264 +ï¼ +ï¼ #9 0x00007f658e3e740e in colo_process_incoming_thread +ï¼ (opaque=0x7f658eb30360 ï¼mis_current.31286ï¼) at migration/colo.c:577 +ï¼ +ï¼ #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 +ï¼ +ï¼ #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 +ï¼ +ï¼ (gdb) p ioc-ï¼name +ï¼ +ï¼ $2 = 0x7f658ff7d5c0 "migration-socket-incoming" +ï¼ +ï¼ (gdb) p ioc-ï¼features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN +ï¼ +ï¼ $3 = 0 +ï¼ +ï¼ +ï¼ (gdb) bt +ï¼ +ï¼ #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, +ï¼ condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 +ï¼ +ï¼ #1 0x00007fdcc6966350 in g_main_dispatch (context=ï¼optimized outï¼) at +ï¼ gmain.c:3054 +ï¼ +ï¼ #2 g_main_context_dispatch (context=ï¼optimized outï¼, +ï¼ address@hidden) at gmain.c:3630 +ï¼ +ï¼ #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 +ï¼ +ï¼ #4 os_host_main_loop_wait (timeout=ï¼optimized outï¼) at +ï¼ util/main-loop.c:258 +ï¼ +ï¼ #5 main_loop_wait (address@hidden) at +ï¼ util/main-loop.c:506 +ï¼ +ï¼ #6 0x00007fdccb526187 in main_loop () at vl.c:1898 +ï¼ +ï¼ #7 main (argc=ï¼optimized outï¼, argv=ï¼optimized outï¼, envp=ï¼optimized +ï¼ outï¼) at vl.c:4709 +ï¼ +ï¼ (gdb) p ioc-ï¼features +ï¼ +ï¼ $1 = 6 +ï¼ +ï¼ (gdb) p ioc-ï¼name +ï¼ +ï¼ $2 = 0x7fdcce1b1ab0 "migration-socket-listener" +ï¼ +ï¼ +ï¼ May be socket_accept_incoming_migration should +ï¼ call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? +ï¼ +ï¼ +ï¼ thank you. +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ åå§é®ä»¶ +ï¼ address@hidden +ï¼ address@hidden +ï¼ address@hidden@huawei.comï¼ +ï¼ *æ¥ æ ï¼*2017å¹´03æ16æ¥ 14:46 +ï¼ *主 é¢ ï¼**Re: [Qemu-devel] COLO failover hang* +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ On 03/15/2017 05:06 PM, wangguang wrote: +ï¼ ï¼ am testing QEMU COLO feature described here [QEMU +ï¼ ï¼ Wiki]( +http://wiki.qemu-project.org/Features/COLO +). +ï¼ ï¼ +ï¼ ï¼ When the Primary Node panic,the Secondary Node qemu hang. +ï¼ ï¼ hang at recvmsg in qio_channel_socket_readv. +ï¼ ï¼ And I run { 'execute': 'nbd-server-stop' } and { "execute": +ï¼ ï¼ "x-colo-lost-heartbeat" } in Secondary VM's +ï¼ ï¼ monitor,the Secondary Node qemu still hang at recvmsg . +ï¼ ï¼ +ï¼ ï¼ I found that the colo in qemu is not complete yet. +ï¼ ï¼ Do the colo have any plan for development? +ï¼ +ï¼ Yes, We are developing. You can see some of patch we pushing. +ï¼ +ï¼ ï¼ Has anyone ever run it successfully? Any help is appreciated! +ï¼ +ï¼ In our internal version can run it successfully, +ï¼ The failover detail you can ask Zhanghailiang for help. +ï¼ Next time if you have some question about COLO, +ï¼ please cc me and zhanghailiang address@hidden +ï¼ +ï¼ +ï¼ Thanks +ï¼ Zhang Chen +ï¼ +ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ centos7.2+qemu2.7.50 +ï¼ ï¼ (gdb) bt +ï¼ ï¼ #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 +ï¼ ï¼ #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=ï¼optimized outï¼, +ï¼ ï¼ iov=ï¼optimized outï¼, niov=ï¼optimized outï¼, fds=0x0, nfds=0x0, errp=0x0) at +ï¼ ï¼ io/channel-socket.c:497 +ï¼ ï¼ #2 0x00007f3e03329472 in qio_channel_read (address@hidden, +ï¼ ï¼ address@hidden "", address@hidden, +ï¼ ï¼ address@hidden) at io/channel.c:97 +ï¼ ï¼ #3 0x00007f3e032750e0 in channel_get_buffer (opaque=ï¼optimized outï¼, +ï¼ ï¼ buf=0x7f3e05910f38 "", pos=ï¼optimized outï¼, size=32768) at +ï¼ ï¼ migration/qemu-file-channel.c:78 +ï¼ ï¼ #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at +ï¼ ï¼ migration/qemu-file.c:257 +ï¼ ï¼ #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, +ï¼ ï¼ address@hidden) at migration/qemu-file.c:510 +ï¼ ï¼ #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at +ï¼ ï¼ migration/qemu-file.c:523 +ï¼ ï¼ #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at +ï¼ ï¼ migration/qemu-file.c:603 +ï¼ ï¼ #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, +ï¼ ï¼ address@hidden) at migration/colo.c:215 +ï¼ ï¼ #9 0x00007f3e0327250d in colo_wait_handle_message (errp=0x7f3d62bfaa48, +ï¼ ï¼ checkpoint_request=ï¼synthetic pointerï¼, f=ï¼optimized outï¼) at +ï¼ ï¼ migration/colo.c:546 +ï¼ ï¼ #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at +ï¼ ï¼ migration/colo.c:649 +ï¼ ï¼ #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 +ï¼ ï¼ #12 0x00007f3dfc9c03ed in clone () from /lib64/libc.so.6 +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ -- +ï¼ ï¼ View this message in context: +http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html +ï¼ ï¼ Sent from the Developer mailing list archive at Nabble.com. +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ +ï¼ -- +ï¼ Thanks +ï¼ Zhang Chen +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ + +* Hailiang Zhang (address@hidden) wrote: +> +Hi, +> +> +Thanks for reporting this, and i confirmed it in my test, and it is a bug. +> +> +Though we tried to call qemu_file_shutdown() to shutdown the related fd, in +> +case COLO thread/incoming thread is stuck in read/write() while do failover, +> +but it didn't take effect, because all the fd used by COLO (also migration) +> +has been wrapped by qio channel, and it will not call the shutdown API if +> +we didn't qio_channel_set_feature(QIO_CHANNEL(sioc), +> +QIO_CHANNEL_FEATURE_SHUTDOWN). +> +> +Cc: Dr. David Alan Gilbert <address@hidden> +> +> +I doubted migration cancel has the same problem, it may be stuck in write() +> +if we tried to cancel migration. +> +> +void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error +> +**errp) +> +{ +> +qio_channel_set_name(QIO_CHANNEL(ioc), "migration-fd-outgoing"); +> +migration_channel_connect(s, ioc, NULL); +> +... ... +> +We didn't call qio_channel_set_feature(QIO_CHANNEL(sioc), +> +QIO_CHANNEL_FEATURE_SHUTDOWN) above, +> +and the +> +migrate_fd_cancel() +> +{ +> +... ... +> +if (s->state == MIGRATION_STATUS_CANCELLING && f) { +> +qemu_file_shutdown(f); --> This will not take effect. No ? +> +} +> +} +(cc'd in Daniel Berrange). +I see that we call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN); +at the +top of qio_channel_socket_new; so I think that's safe isn't it? + +Dave + +> +Thanks, +> +Hailiang +> +> +On 2017/3/21 16:10, address@hidden wrote: +> +> Thank youã +> +> +> +> I have test areadyã +> +> +> +> When the Primary Node panic,the Secondary Node qemu hang at the same placeã +> +> +> +> Incorrding +http://wiki.qemu-project.org/Features/COLO +ï¼kill Primary Node +> +> qemu will not produce the problem,but Primary Node panic canã +> +> +> +> I think due to the feature of channel does not support +> +> QIO_CHANNEL_FEATURE_SHUTDOWN. +> +> +> +> +> +> when failover,channel_shutdown could not shut down the channel. +> +> +> +> +> +> so the colo_process_incoming_thread will hang at recvmsg. +> +> +> +> +> +> I test a patch: +> +> +> +> +> +> diff --git a/migration/socket.c b/migration/socket.c +> +> +> +> +> +> index 13966f1..d65a0ea 100644 +> +> +> +> +> +> --- a/migration/socket.c +> +> +> +> +> +> +++ b/migration/socket.c +> +> +> +> +> +> @@ -147,8 +147,9 @@ static gboolean +> +> socket_accept_incoming_migration(QIOChannel *ioc, +> +> +> +> +> +> } +> +> +> +> +> +> +> +> +> +> +> +> trace_migration_socket_incoming_accepted() +> +> +> +> +> +> +> +> +> +> +> +> qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") +> +> +> +> +> +> + qio_channel_set_feature(QIO_CHANNEL(sioc), +> +> QIO_CHANNEL_FEATURE_SHUTDOWN) +> +> +> +> +> +> migration_channel_process_incoming(migrate_get_current(), +> +> +> +> +> +> QIO_CHANNEL(sioc)) +> +> +> +> +> +> object_unref(OBJECT(sioc)) +> +> +> +> +> +> +> +> +> +> My test will not hang any more. +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> +> åå§é®ä»¶ +> +> +> +> +> +> +> +> åä»¶äººï¼ address@hidden +> +> æ¶ä»¶äººï¼ç广10165992 address@hidden +> +> æéäººï¼ address@hidden address@hidden +> +> æ¥ æ ï¼2017å¹´03æ21æ¥ 15:58 +> +> 主 é¢ ï¼Re: [Qemu-devel] çå¤: Re: [BUG]COLO failover hang +> +> +> +> +> +> +> +> +> +> +> +> Hi,Wang. +> +> +> +> You can test this branch: +> +> +> +> +https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk +> +> +> +> and please follow wiki ensure your own configuration correctly. +> +> +> +> +http://wiki.qemu-project.org/Features/COLO +> +> +> +> +> +> Thanks +> +> +> +> Zhang Chen +> +> +> +> +> +> On 03/21/2017 03:27 PM, address@hidden wrote: +> +> ï¼ +> +> ï¼ hi. +> +> ï¼ +> +> ï¼ I test the git qemu master have the same problem. +> +> ï¼ +> +> ï¼ (gdb) bt +> +> ï¼ +> +> ï¼ #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, +> +> ï¼ niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 +> +> ï¼ +> +> ï¼ #1 0x00007f658e4aa0c2 in qio_channel_read +> +> ï¼ (address@hidden, address@hidden "", +> +> ï¼ address@hidden, address@hidden) at io/channel.c:114 +> +> ï¼ +> +> ï¼ #2 0x00007f658e3ea990 in channel_get_buffer (opaque=ï¼optimized outï¼, +> +> ï¼ buf=0x7f65907cb838 "", pos=ï¼optimized outï¼, size=32768) at +> +> ï¼ migration/qemu-file-channel.c:78 +> +> ï¼ +> +> ï¼ #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at +> +> ï¼ migration/qemu-file.c:295 +> +> ï¼ +> +> ï¼ #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, +> +> ï¼ address@hidden) at migration/qemu-file.c:555 +> +> ï¼ +> +> ï¼ #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at +> +> ï¼ migration/qemu-file.c:568 +> +> ï¼ +> +> ï¼ #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at +> +> ï¼ migration/qemu-file.c:648 +> +> ï¼ +> +> ï¼ #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, +> +> ï¼ address@hidden) at migration/colo.c:244 +> +> ï¼ +> +> ï¼ #8 0x00007f658e3e681e in colo_receive_check_message (f=ï¼optimized +> +> ï¼ outï¼, address@hidden, +> +> ï¼ address@hidden) +> +> ï¼ +> +> ï¼ at migration/colo.c:264 +> +> ï¼ +> +> ï¼ #9 0x00007f658e3e740e in colo_process_incoming_thread +> +> ï¼ (opaque=0x7f658eb30360 ï¼mis_current.31286ï¼) at migration/colo.c:577 +> +> ï¼ +> +> ï¼ #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 +> +> ï¼ +> +> ï¼ #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 +> +> ï¼ +> +> ï¼ (gdb) p ioc-ï¼name +> +> ï¼ +> +> ï¼ $2 = 0x7f658ff7d5c0 "migration-socket-incoming" +> +> ï¼ +> +> ï¼ (gdb) p ioc-ï¼features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN +> +> ï¼ +> +> ï¼ $3 = 0 +> +> ï¼ +> +> ï¼ +> +> ï¼ (gdb) bt +> +> ï¼ +> +> ï¼ #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, +> +> ï¼ condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 +> +> ï¼ +> +> ï¼ #1 0x00007fdcc6966350 in g_main_dispatch (context=ï¼optimized outï¼) at +> +> ï¼ gmain.c:3054 +> +> ï¼ +> +> ï¼ #2 g_main_context_dispatch (context=ï¼optimized outï¼, +> +> ï¼ address@hidden) at gmain.c:3630 +> +> ï¼ +> +> ï¼ #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 +> +> ï¼ +> +> ï¼ #4 os_host_main_loop_wait (timeout=ï¼optimized outï¼) at +> +> ï¼ util/main-loop.c:258 +> +> ï¼ +> +> ï¼ #5 main_loop_wait (address@hidden) at +> +> ï¼ util/main-loop.c:506 +> +> ï¼ +> +> ï¼ #6 0x00007fdccb526187 in main_loop () at vl.c:1898 +> +> ï¼ +> +> ï¼ #7 main (argc=ï¼optimized outï¼, argv=ï¼optimized outï¼, envp=ï¼optimized +> +> ï¼ outï¼) at vl.c:4709 +> +> ï¼ +> +> ï¼ (gdb) p ioc-ï¼features +> +> ï¼ +> +> ï¼ $1 = 6 +> +> ï¼ +> +> ï¼ (gdb) p ioc-ï¼name +> +> ï¼ +> +> ï¼ $2 = 0x7fdcce1b1ab0 "migration-socket-listener" +> +> ï¼ +> +> ï¼ +> +> ï¼ May be socket_accept_incoming_migration should +> +> ï¼ call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? +> +> ï¼ +> +> ï¼ +> +> ï¼ thank you. +> +> ï¼ +> +> ï¼ +> +> ï¼ +> +> ï¼ +> +> ï¼ +> +> ï¼ åå§é®ä»¶ +> +> ï¼ address@hidden +> +> ï¼ address@hidden +> +> ï¼ address@hidden@huawei.comï¼ +> +> ï¼ *æ¥ æ ï¼*2017å¹´03æ16æ¥ 14:46 +> +> ï¼ *主 é¢ ï¼**Re: [Qemu-devel] COLO failover hang* +> +> ï¼ +> +> ï¼ +> +> ï¼ +> +> ï¼ +> +> ï¼ On 03/15/2017 05:06 PM, wangguang wrote: +> +> ï¼ ï¼ am testing QEMU COLO feature described here [QEMU +> +> ï¼ ï¼ Wiki]( +http://wiki.qemu-project.org/Features/COLO +). +> +> ï¼ ï¼ +> +> ï¼ ï¼ When the Primary Node panic,the Secondary Node qemu hang. +> +> ï¼ ï¼ hang at recvmsg in qio_channel_socket_readv. +> +> ï¼ ï¼ And I run { 'execute': 'nbd-server-stop' } and { "execute": +> +> ï¼ ï¼ "x-colo-lost-heartbeat" } in Secondary VM's +> +> ï¼ ï¼ monitor,the Secondary Node qemu still hang at recvmsg . +> +> ï¼ ï¼ +> +> ï¼ ï¼ I found that the colo in qemu is not complete yet. +> +> ï¼ ï¼ Do the colo have any plan for development? +> +> ï¼ +> +> ï¼ Yes, We are developing. You can see some of patch we pushing. +> +> ï¼ +> +> ï¼ ï¼ Has anyone ever run it successfully? Any help is appreciated! +> +> ï¼ +> +> ï¼ In our internal version can run it successfully, +> +> ï¼ The failover detail you can ask Zhanghailiang for help. +> +> ï¼ Next time if you have some question about COLO, +> +> ï¼ please cc me and zhanghailiang address@hidden +> +> ï¼ +> +> ï¼ +> +> ï¼ Thanks +> +> ï¼ Zhang Chen +> +> ï¼ +> +> ï¼ +> +> ï¼ ï¼ +> +> ï¼ ï¼ +> +> ï¼ ï¼ +> +> ï¼ ï¼ centos7.2+qemu2.7.50 +> +> ï¼ ï¼ (gdb) bt +> +> ï¼ ï¼ #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 +> +> ï¼ ï¼ #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=ï¼optimized outï¼, +> +> ï¼ ï¼ iov=ï¼optimized outï¼, niov=ï¼optimized outï¼, fds=0x0, nfds=0x0, errp=0x0) +> +> at +> +> ï¼ ï¼ io/channel-socket.c:497 +> +> ï¼ ï¼ #2 0x00007f3e03329472 in qio_channel_read (address@hidden, +> +> ï¼ ï¼ address@hidden "", address@hidden, +> +> ï¼ ï¼ address@hidden) at io/channel.c:97 +> +> ï¼ ï¼ #3 0x00007f3e032750e0 in channel_get_buffer (opaque=ï¼optimized outï¼, +> +> ï¼ ï¼ buf=0x7f3e05910f38 "", pos=ï¼optimized outï¼, size=32768) at +> +> ï¼ ï¼ migration/qemu-file-channel.c:78 +> +> ï¼ ï¼ #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at +> +> ï¼ ï¼ migration/qemu-file.c:257 +> +> ï¼ ï¼ #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, +> +> ï¼ ï¼ address@hidden) at migration/qemu-file.c:510 +> +> ï¼ ï¼ #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at +> +> ï¼ ï¼ migration/qemu-file.c:523 +> +> ï¼ ï¼ #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at +> +> ï¼ ï¼ migration/qemu-file.c:603 +> +> ï¼ ï¼ #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, +> +> ï¼ ï¼ address@hidden) at migration/colo.c:215 +> +> ï¼ ï¼ #9 0x00007f3e0327250d in colo_wait_handle_message (errp=0x7f3d62bfaa48, +> +> ï¼ ï¼ checkpoint_request=ï¼synthetic pointerï¼, f=ï¼optimized outï¼) at +> +> ï¼ ï¼ migration/colo.c:546 +> +> ï¼ ï¼ #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at +> +> ï¼ ï¼ migration/colo.c:649 +> +> ï¼ ï¼ #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 +> +> ï¼ ï¼ #12 0x00007f3dfc9c03ed in clone () from /lib64/libc.so.6 +> +> ï¼ ï¼ +> +> ï¼ ï¼ +> +> ï¼ ï¼ +> +> ï¼ ï¼ +> +> ï¼ ï¼ +> +> ï¼ ï¼ -- +> +> ï¼ ï¼ View this message in context: +> +> +http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html +> +> ï¼ ï¼ Sent from the Developer mailing list archive at Nabble.com. +> +> ï¼ ï¼ +> +> ï¼ ï¼ +> +> ï¼ ï¼ +> +> ï¼ ï¼ +> +> ï¼ +> +> ï¼ -- +> +> ï¼ Thanks +> +> ï¼ Zhang Chen +> +> ï¼ +> +> ï¼ +> +> ï¼ +> +> ï¼ +> +> ï¼ +> +> +> +-- +Dr. David Alan Gilbert / address@hidden / Manchester, UK + +On 2017/3/21 19:56, Dr. David Alan Gilbert wrote: +* Hailiang Zhang (address@hidden) wrote: +Hi, + +Thanks for reporting this, and i confirmed it in my test, and it is a bug. + +Though we tried to call qemu_file_shutdown() to shutdown the related fd, in +case COLO thread/incoming thread is stuck in read/write() while do failover, +but it didn't take effect, because all the fd used by COLO (also migration) +has been wrapped by qio channel, and it will not call the shutdown API if +we didn't qio_channel_set_feature(QIO_CHANNEL(sioc), +QIO_CHANNEL_FEATURE_SHUTDOWN). + +Cc: Dr. David Alan Gilbert <address@hidden> + +I doubted migration cancel has the same problem, it may be stuck in write() +if we tried to cancel migration. + +void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error +**errp) +{ + qio_channel_set_name(QIO_CHANNEL(ioc), "migration-fd-outgoing"); + migration_channel_connect(s, ioc, NULL); + ... ... +We didn't call qio_channel_set_feature(QIO_CHANNEL(sioc), +QIO_CHANNEL_FEATURE_SHUTDOWN) above, +and the +migrate_fd_cancel() +{ + ... ... + if (s->state == MIGRATION_STATUS_CANCELLING && f) { + qemu_file_shutdown(f); --> This will not take effect. No ? + } +} +(cc'd in Daniel Berrange). +I see that we call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN); +at the +top of qio_channel_socket_new; so I think that's safe isn't it? +Hmm, you are right, this problem is only exist for the migration incoming fd, +thanks. +Dave +Thanks, +Hailiang + +On 2017/3/21 16:10, address@hidden wrote: +Thank youã + +I have test areadyã + +When the Primary Node panic,the Secondary Node qemu hang at the same placeã + +Incorrding +http://wiki.qemu-project.org/Features/COLO +ï¼kill Primary Node qemu +will not produce the problem,but Primary Node panic canã + +I think due to the feature of channel does not support +QIO_CHANNEL_FEATURE_SHUTDOWN. + + +when failover,channel_shutdown could not shut down the channel. + + +so the colo_process_incoming_thread will hang at recvmsg. + + +I test a patch: + + +diff --git a/migration/socket.c b/migration/socket.c + + +index 13966f1..d65a0ea 100644 + + +--- a/migration/socket.c + + ++++ b/migration/socket.c + + +@@ -147,8 +147,9 @@ static gboolean socket_accept_incoming_migration(QIOChannel +*ioc, + + + } + + + + + + trace_migration_socket_incoming_accepted() + + + + + + qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming") + + ++ qio_channel_set_feature(QIO_CHANNEL(sioc), QIO_CHANNEL_FEATURE_SHUTDOWN) + + + migration_channel_process_incoming(migrate_get_current(), + + + QIO_CHANNEL(sioc)) + + + object_unref(OBJECT(sioc)) + + + + +My test will not hang any more. + + + + + + + + + + + + + + + + + +åå§é®ä»¶ + + + +åä»¶äººï¼ address@hidden +æ¶ä»¶äººï¼ç广10165992 address@hidden +æéäººï¼ address@hidden address@hidden +æ¥ æ ï¼2017å¹´03æ21æ¥ 15:58 +主 é¢ ï¼Re: [Qemu-devel] çå¤: Re: [BUG]COLO failover hang + + + + + +Hi,Wang. + +You can test this branch: +https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk +and please follow wiki ensure your own configuration correctly. +http://wiki.qemu-project.org/Features/COLO +Thanks + +Zhang Chen + + +On 03/21/2017 03:27 PM, address@hidden wrote: +ï¼ +ï¼ hi. +ï¼ +ï¼ I test the git qemu master have the same problem. +ï¼ +ï¼ (gdb) bt +ï¼ +ï¼ #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, +ï¼ niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 +ï¼ +ï¼ #1 0x00007f658e4aa0c2 in qio_channel_read +ï¼ (address@hidden, address@hidden "", +ï¼ address@hidden, address@hidden) at io/channel.c:114 +ï¼ +ï¼ #2 0x00007f658e3ea990 in channel_get_buffer (opaque=ï¼optimized outï¼, +ï¼ buf=0x7f65907cb838 "", pos=ï¼optimized outï¼, size=32768) at +ï¼ migration/qemu-file-channel.c:78 +ï¼ +ï¼ #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at +ï¼ migration/qemu-file.c:295 +ï¼ +ï¼ #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, +ï¼ address@hidden) at migration/qemu-file.c:555 +ï¼ +ï¼ #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at +ï¼ migration/qemu-file.c:568 +ï¼ +ï¼ #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at +ï¼ migration/qemu-file.c:648 +ï¼ +ï¼ #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, +ï¼ address@hidden) at migration/colo.c:244 +ï¼ +ï¼ #8 0x00007f658e3e681e in colo_receive_check_message (f=ï¼optimized +ï¼ outï¼, address@hidden, +ï¼ address@hidden) +ï¼ +ï¼ at migration/colo.c:264 +ï¼ +ï¼ #9 0x00007f658e3e740e in colo_process_incoming_thread +ï¼ (opaque=0x7f658eb30360 ï¼mis_current.31286ï¼) at migration/colo.c:577 +ï¼ +ï¼ #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 +ï¼ +ï¼ #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 +ï¼ +ï¼ (gdb) p ioc-ï¼name +ï¼ +ï¼ $2 = 0x7f658ff7d5c0 "migration-socket-incoming" +ï¼ +ï¼ (gdb) p ioc-ï¼features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN +ï¼ +ï¼ $3 = 0 +ï¼ +ï¼ +ï¼ (gdb) bt +ï¼ +ï¼ #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, +ï¼ condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 +ï¼ +ï¼ #1 0x00007fdcc6966350 in g_main_dispatch (context=ï¼optimized outï¼) at +ï¼ gmain.c:3054 +ï¼ +ï¼ #2 g_main_context_dispatch (context=ï¼optimized outï¼, +ï¼ address@hidden) at gmain.c:3630 +ï¼ +ï¼ #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 +ï¼ +ï¼ #4 os_host_main_loop_wait (timeout=ï¼optimized outï¼) at +ï¼ util/main-loop.c:258 +ï¼ +ï¼ #5 main_loop_wait (address@hidden) at +ï¼ util/main-loop.c:506 +ï¼ +ï¼ #6 0x00007fdccb526187 in main_loop () at vl.c:1898 +ï¼ +ï¼ #7 main (argc=ï¼optimized outï¼, argv=ï¼optimized outï¼, envp=ï¼optimized +ï¼ outï¼) at vl.c:4709 +ï¼ +ï¼ (gdb) p ioc-ï¼features +ï¼ +ï¼ $1 = 6 +ï¼ +ï¼ (gdb) p ioc-ï¼name +ï¼ +ï¼ $2 = 0x7fdcce1b1ab0 "migration-socket-listener" +ï¼ +ï¼ +ï¼ May be socket_accept_incoming_migration should +ï¼ call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? +ï¼ +ï¼ +ï¼ thank you. +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ åå§é®ä»¶ +ï¼ address@hidden +ï¼ address@hidden +ï¼ address@hidden@huawei.comï¼ +ï¼ *æ¥ æ ï¼*2017å¹´03æ16æ¥ 14:46 +ï¼ *主 é¢ ï¼**Re: [Qemu-devel] COLO failover hang* +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ On 03/15/2017 05:06 PM, wangguang wrote: +ï¼ ï¼ am testing QEMU COLO feature described here [QEMU +ï¼ ï¼ Wiki]( +http://wiki.qemu-project.org/Features/COLO +). +ï¼ ï¼ +ï¼ ï¼ When the Primary Node panic,the Secondary Node qemu hang. +ï¼ ï¼ hang at recvmsg in qio_channel_socket_readv. +ï¼ ï¼ And I run { 'execute': 'nbd-server-stop' } and { "execute": +ï¼ ï¼ "x-colo-lost-heartbeat" } in Secondary VM's +ï¼ ï¼ monitor,the Secondary Node qemu still hang at recvmsg . +ï¼ ï¼ +ï¼ ï¼ I found that the colo in qemu is not complete yet. +ï¼ ï¼ Do the colo have any plan for development? +ï¼ +ï¼ Yes, We are developing. You can see some of patch we pushing. +ï¼ +ï¼ ï¼ Has anyone ever run it successfully? Any help is appreciated! +ï¼ +ï¼ In our internal version can run it successfully, +ï¼ The failover detail you can ask Zhanghailiang for help. +ï¼ Next time if you have some question about COLO, +ï¼ please cc me and zhanghailiang address@hidden +ï¼ +ï¼ +ï¼ Thanks +ï¼ Zhang Chen +ï¼ +ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ centos7.2+qemu2.7.50 +ï¼ ï¼ (gdb) bt +ï¼ ï¼ #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 +ï¼ ï¼ #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=ï¼optimized outï¼, +ï¼ ï¼ iov=ï¼optimized outï¼, niov=ï¼optimized outï¼, fds=0x0, nfds=0x0, errp=0x0) at +ï¼ ï¼ io/channel-socket.c:497 +ï¼ ï¼ #2 0x00007f3e03329472 in qio_channel_read (address@hidden, +ï¼ ï¼ address@hidden "", address@hidden, +ï¼ ï¼ address@hidden) at io/channel.c:97 +ï¼ ï¼ #3 0x00007f3e032750e0 in channel_get_buffer (opaque=ï¼optimized outï¼, +ï¼ ï¼ buf=0x7f3e05910f38 "", pos=ï¼optimized outï¼, size=32768) at +ï¼ ï¼ migration/qemu-file-channel.c:78 +ï¼ ï¼ #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at +ï¼ ï¼ migration/qemu-file.c:257 +ï¼ ï¼ #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, +ï¼ ï¼ address@hidden) at migration/qemu-file.c:510 +ï¼ ï¼ #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at +ï¼ ï¼ migration/qemu-file.c:523 +ï¼ ï¼ #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at +ï¼ ï¼ migration/qemu-file.c:603 +ï¼ ï¼ #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, +ï¼ ï¼ address@hidden) at migration/colo.c:215 +ï¼ ï¼ #9 0x00007f3e0327250d in colo_wait_handle_message (errp=0x7f3d62bfaa48, +ï¼ ï¼ checkpoint_request=ï¼synthetic pointerï¼, f=ï¼optimized outï¼) at +ï¼ ï¼ migration/colo.c:546 +ï¼ ï¼ #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at +ï¼ ï¼ migration/colo.c:649 +ï¼ ï¼ #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 +ï¼ ï¼ #12 0x00007f3dfc9c03ed in clone () from /lib64/libc.so.6 +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ -- +ï¼ ï¼ View this message in context: +http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html +ï¼ ï¼ Sent from the Developer mailing list archive at Nabble.com. +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ ï¼ +ï¼ +ï¼ -- +ï¼ Thanks +ï¼ Zhang Chen +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +-- +Dr. David Alan Gilbert / address@hidden / Manchester, UK + +. + +* Hailiang Zhang (address@hidden) wrote: +> +On 2017/3/21 19:56, Dr. David Alan Gilbert wrote: +> +> * Hailiang Zhang (address@hidden) wrote: +> +> > Hi, +> +> > +> +> > Thanks for reporting this, and i confirmed it in my test, and it is a bug. +> +> > +> +> > Though we tried to call qemu_file_shutdown() to shutdown the related fd, +> +> > in +> +> > case COLO thread/incoming thread is stuck in read/write() while do +> +> > failover, +> +> > but it didn't take effect, because all the fd used by COLO (also +> +> > migration) +> +> > has been wrapped by qio channel, and it will not call the shutdown API if +> +> > we didn't qio_channel_set_feature(QIO_CHANNEL(sioc), +> +> > QIO_CHANNEL_FEATURE_SHUTDOWN). +> +> > +> +> > Cc: Dr. David Alan Gilbert <address@hidden> +> +> > +> +> > I doubted migration cancel has the same problem, it may be stuck in +> +> > write() +> +> > if we tried to cancel migration. +> +> > +> +> > void fd_start_outgoing_migration(MigrationState *s, const char *fdname, +> +> > Error **errp) +> +> > { +> +> > qio_channel_set_name(QIO_CHANNEL(ioc), "migration-fd-outgoing"); +> +> > migration_channel_connect(s, ioc, NULL); +> +> > ... ... +> +> > We didn't call qio_channel_set_feature(QIO_CHANNEL(sioc), +> +> > QIO_CHANNEL_FEATURE_SHUTDOWN) above, +> +> > and the +> +> > migrate_fd_cancel() +> +> > { +> +> > ... ... +> +> > if (s->state == MIGRATION_STATUS_CANCELLING && f) { +> +> > qemu_file_shutdown(f); --> This will not take effect. No ? +> +> > } +> +> > } +> +> +> +> (cc'd in Daniel Berrange). +> +> I see that we call qio_channel_set_feature(ioc, +> +> QIO_CHANNEL_FEATURE_SHUTDOWN); at the +> +> top of qio_channel_socket_new; so I think that's safe isn't it? +> +> +> +> +Hmm, you are right, this problem is only exist for the migration incoming fd, +> +thanks. +Yes, and I don't think we normally do a cancel on the incoming side of a +migration. + +Dave + +> +> Dave +> +> +> +> > Thanks, +> +> > Hailiang +> +> > +> +> > On 2017/3/21 16:10, address@hidden wrote: +> +> > > Thank youã +> +> > > +> +> > > I have test areadyã +> +> > > +> +> > > When the Primary Node panic,the Secondary Node qemu hang at the same +> +> > > placeã +> +> > > +> +> > > Incorrding +http://wiki.qemu-project.org/Features/COLO +ï¼kill Primary +> +> > > Node qemu will not produce the problem,but Primary Node panic canã +> +> > > +> +> > > I think due to the feature of channel does not support +> +> > > QIO_CHANNEL_FEATURE_SHUTDOWN. +> +> > > +> +> > > +> +> > > when failover,channel_shutdown could not shut down the channel. +> +> > > +> +> > > +> +> > > so the colo_process_incoming_thread will hang at recvmsg. +> +> > > +> +> > > +> +> > > I test a patch: +> +> > > +> +> > > +> +> > > diff --git a/migration/socket.c b/migration/socket.c +> +> > > +> +> > > +> +> > > index 13966f1..d65a0ea 100644 +> +> > > +> +> > > +> +> > > --- a/migration/socket.c +> +> > > +> +> > > +> +> > > +++ b/migration/socket.c +> +> > > +> +> > > +> +> > > @@ -147,8 +147,9 @@ static gboolean +> +> > > socket_accept_incoming_migration(QIOChannel *ioc, +> +> > > +> +> > > +> +> > > } +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > trace_migration_socket_incoming_accepted() +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > qio_channel_set_name(QIO_CHANNEL(sioc), +> +> > > "migration-socket-incoming") +> +> > > +> +> > > +> +> > > + qio_channel_set_feature(QIO_CHANNEL(sioc), +> +> > > QIO_CHANNEL_FEATURE_SHUTDOWN) +> +> > > +> +> > > +> +> > > migration_channel_process_incoming(migrate_get_current(), +> +> > > +> +> > > +> +> > > QIO_CHANNEL(sioc)) +> +> > > +> +> > > +> +> > > object_unref(OBJECT(sioc)) +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > My test will not hang any more. +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > åå§é®ä»¶ +> +> > > +> +> > > +> +> > > +> +> > > åä»¶äººï¼ address@hidden +> +> > > æ¶ä»¶äººï¼ç广10165992 address@hidden +> +> > > æéäººï¼ address@hidden address@hidden +> +> > > æ¥ æ ï¼2017å¹´03æ21æ¥ 15:58 +> +> > > 主 é¢ ï¼Re: [Qemu-devel] çå¤: Re: [BUG]COLO failover hang +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > +> +> > > Hi,Wang. +> +> > > +> +> > > You can test this branch: +> +> > > +> +> > > +https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk +> +> > > +> +> > > and please follow wiki ensure your own configuration correctly. +> +> > > +> +> > > +http://wiki.qemu-project.org/Features/COLO +> +> > > +> +> > > +> +> > > Thanks +> +> > > +> +> > > Zhang Chen +> +> > > +> +> > > +> +> > > On 03/21/2017 03:27 PM, address@hidden wrote: +> +> > > ï¼ +> +> > > ï¼ hi. +> +> > > ï¼ +> +> > > ï¼ I test the git qemu master have the same problem. +> +> > > ï¼ +> +> > > ï¼ (gdb) bt +> +> > > ï¼ +> +> > > ï¼ #0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, +> +> > > ï¼ niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 +> +> > > ï¼ +> +> > > ï¼ #1 0x00007f658e4aa0c2 in qio_channel_read +> +> > > ï¼ (address@hidden, address@hidden "", +> +> > > ï¼ address@hidden, address@hidden) at io/channel.c:114 +> +> > > ï¼ +> +> > > ï¼ #2 0x00007f658e3ea990 in channel_get_buffer (opaque=ï¼optimized outï¼, +> +> > > ï¼ buf=0x7f65907cb838 "", pos=ï¼optimized outï¼, size=32768) at +> +> > > ï¼ migration/qemu-file-channel.c:78 +> +> > > ï¼ +> +> > > ï¼ #3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at +> +> > > ï¼ migration/qemu-file.c:295 +> +> > > ï¼ +> +> > > ï¼ #4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, +> +> > > ï¼ address@hidden) at migration/qemu-file.c:555 +> +> > > ï¼ +> +> > > ï¼ #5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at +> +> > > ï¼ migration/qemu-file.c:568 +> +> > > ï¼ +> +> > > ï¼ #6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at +> +> > > ï¼ migration/qemu-file.c:648 +> +> > > ï¼ +> +> > > ï¼ #7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, +> +> > > ï¼ address@hidden) at migration/colo.c:244 +> +> > > ï¼ +> +> > > ï¼ #8 0x00007f658e3e681e in colo_receive_check_message (f=ï¼optimized +> +> > > ï¼ outï¼, address@hidden, +> +> > > ï¼ address@hidden) +> +> > > ï¼ +> +> > > ï¼ at migration/colo.c:264 +> +> > > ï¼ +> +> > > ï¼ #9 0x00007f658e3e740e in colo_process_incoming_thread +> +> > > ï¼ (opaque=0x7f658eb30360 ï¼mis_current.31286ï¼) at migration/colo.c:577 +> +> > > ï¼ +> +> > > ï¼ #10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 +> +> > > ï¼ +> +> > > ï¼ #11 0x00007f65881983ed in clone () from /lib64/libc.so.6 +> +> > > ï¼ +> +> > > ï¼ (gdb) p ioc-ï¼name +> +> > > ï¼ +> +> > > ï¼ $2 = 0x7f658ff7d5c0 "migration-socket-incoming" +> +> > > ï¼ +> +> > > ï¼ (gdb) p ioc-ï¼features Do not support +> +> > > QIO_CHANNEL_FEATURE_SHUTDOWN +> +> > > ï¼ +> +> > > ï¼ $3 = 0 +> +> > > ï¼ +> +> > > ï¼ +> +> > > ï¼ (gdb) bt +> +> > > ï¼ +> +> > > ï¼ #0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, +> +> > > ï¼ condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 +> +> > > ï¼ +> +> > > ï¼ #1 0x00007fdcc6966350 in g_main_dispatch (context=ï¼optimized outï¼) at +> +> > > ï¼ gmain.c:3054 +> +> > > ï¼ +> +> > > ï¼ #2 g_main_context_dispatch (context=ï¼optimized outï¼, +> +> > > ï¼ address@hidden) at gmain.c:3630 +> +> > > ï¼ +> +> > > ï¼ #3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 +> +> > > ï¼ +> +> > > ï¼ #4 os_host_main_loop_wait (timeout=ï¼optimized outï¼) at +> +> > > ï¼ util/main-loop.c:258 +> +> > > ï¼ +> +> > > ï¼ #5 main_loop_wait (address@hidden) at +> +> > > ï¼ util/main-loop.c:506 +> +> > > ï¼ +> +> > > ï¼ #6 0x00007fdccb526187 in main_loop () at vl.c:1898 +> +> > > ï¼ +> +> > > ï¼ #7 main (argc=ï¼optimized outï¼, argv=ï¼optimized outï¼, envp=ï¼optimized +> +> > > ï¼ outï¼) at vl.c:4709 +> +> > > ï¼ +> +> > > ï¼ (gdb) p ioc-ï¼features +> +> > > ï¼ +> +> > > ï¼ $1 = 6 +> +> > > ï¼ +> +> > > ï¼ (gdb) p ioc-ï¼name +> +> > > ï¼ +> +> > > ï¼ $2 = 0x7fdcce1b1ab0 "migration-socket-listener" +> +> > > ï¼ +> +> > > ï¼ +> +> > > ï¼ May be socket_accept_incoming_migration should +> +> > > ï¼ call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? +> +> > > ï¼ +> +> > > ï¼ +> +> > > ï¼ thank you. +> +> > > ï¼ +> +> > > ï¼ +> +> > > ï¼ +> +> > > ï¼ +> +> > > ï¼ +> +> > > ï¼ åå§é®ä»¶ +> +> > > ï¼ address@hidden +> +> > > ï¼ address@hidden +> +> > > ï¼ address@hidden@huawei.comï¼ +> +> > > ï¼ *æ¥ æ ï¼*2017å¹´03æ16æ¥ 14:46 +> +> > > ï¼ *主 é¢ ï¼**Re: [Qemu-devel] COLO failover hang* +> +> > > ï¼ +> +> > > ï¼ +> +> > > ï¼ +> +> > > ï¼ +> +> > > ï¼ On 03/15/2017 05:06 PM, wangguang wrote: +> +> > > ï¼ ï¼ am testing QEMU COLO feature described here [QEMU +> +> > > ï¼ ï¼ Wiki]( +http://wiki.qemu-project.org/Features/COLO +). +> +> > > ï¼ ï¼ +> +> > > ï¼ ï¼ When the Primary Node panic,the Secondary Node qemu hang. +> +> > > ï¼ ï¼ hang at recvmsg in qio_channel_socket_readv. +> +> > > ï¼ ï¼ And I run { 'execute': 'nbd-server-stop' } and { "execute": +> +> > > ï¼ ï¼ "x-colo-lost-heartbeat" } in Secondary VM's +> +> > > ï¼ ï¼ monitor,the Secondary Node qemu still hang at recvmsg . +> +> > > ï¼ ï¼ +> +> > > ï¼ ï¼ I found that the colo in qemu is not complete yet. +> +> > > ï¼ ï¼ Do the colo have any plan for development? +> +> > > ï¼ +> +> > > ï¼ Yes, We are developing. You can see some of patch we pushing. +> +> > > ï¼ +> +> > > ï¼ ï¼ Has anyone ever run it successfully? Any help is appreciated! +> +> > > ï¼ +> +> > > ï¼ In our internal version can run it successfully, +> +> > > ï¼ The failover detail you can ask Zhanghailiang for help. +> +> > > ï¼ Next time if you have some question about COLO, +> +> > > ï¼ please cc me and zhanghailiang address@hidden +> +> > > ï¼ +> +> > > ï¼ +> +> > > ï¼ Thanks +> +> > > ï¼ Zhang Chen +> +> > > ï¼ +> +> > > ï¼ +> +> > > ï¼ ï¼ +> +> > > ï¼ ï¼ +> +> > > ï¼ ï¼ +> +> > > ï¼ ï¼ centos7.2+qemu2.7.50 +> +> > > ï¼ ï¼ (gdb) bt +> +> > > ï¼ ï¼ #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 +> +> > > ï¼ ï¼ #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=ï¼optimized +> +> > > outï¼, +> +> > > ï¼ ï¼ iov=ï¼optimized outï¼, niov=ï¼optimized outï¼, fds=0x0, nfds=0x0, +> +> > > errp=0x0) at +> +> > > ï¼ ï¼ io/channel-socket.c:497 +> +> > > ï¼ ï¼ #2 0x00007f3e03329472 in qio_channel_read (address@hidden, +> +> > > ï¼ ï¼ address@hidden "", address@hidden, +> +> > > ï¼ ï¼ address@hidden) at io/channel.c:97 +> +> > > ï¼ ï¼ #3 0x00007f3e032750e0 in channel_get_buffer (opaque=ï¼optimized +> +> > > outï¼, +> +> > > ï¼ ï¼ buf=0x7f3e05910f38 "", pos=ï¼optimized outï¼, size=32768) at +> +> > > ï¼ ï¼ migration/qemu-file-channel.c:78 +> +> > > ï¼ ï¼ #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at +> +> > > ï¼ ï¼ migration/qemu-file.c:257 +> +> > > ï¼ ï¼ #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, +> +> > > ï¼ ï¼ address@hidden) at migration/qemu-file.c:510 +> +> > > ï¼ ï¼ #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at +> +> > > ï¼ ï¼ migration/qemu-file.c:523 +> +> > > ï¼ ï¼ #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at +> +> > > ï¼ ï¼ migration/qemu-file.c:603 +> +> > > ï¼ ï¼ #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, +> +> > > ï¼ ï¼ address@hidden) at migration/colo.c:215 +> +> > > ï¼ ï¼ #9 0x00007f3e0327250d in colo_wait_handle_message +> +> > > (errp=0x7f3d62bfaa48, +> +> > > ï¼ ï¼ checkpoint_request=ï¼synthetic pointerï¼, f=ï¼optimized outï¼) at +> +> > > ï¼ ï¼ migration/colo.c:546 +> +> > > ï¼ ï¼ #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at +> +> > > ï¼ ï¼ migration/colo.c:649 +> +> > > ï¼ ï¼ #11 0x00007f3e00cc1df3 in start_thread () from +> +> > > /lib64/libpthread.so.0 +> +> > > ï¼ ï¼ #12 0x00007f3dfc9c03ed in clone () from /lib64/libc.so.6 +> +> > > ï¼ ï¼ +> +> > > ï¼ ï¼ +> +> > > ï¼ ï¼ +> +> > > ï¼ ï¼ +> +> > > ï¼ ï¼ +> +> > > ï¼ ï¼ -- +> +> > > ï¼ ï¼ View this message in context: +> +> > > +http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html +> +> > > ï¼ ï¼ Sent from the Developer mailing list archive at Nabble.com. +> +> > > ï¼ ï¼ +> +> > > ï¼ ï¼ +> +> > > ï¼ ï¼ +> +> > > ï¼ ï¼ +> +> > > ï¼ +> +> > > ï¼ -- +> +> > > ï¼ Thanks +> +> > > ï¼ Zhang Chen +> +> > > ï¼ +> +> > > ï¼ +> +> > > ï¼ +> +> > > ï¼ +> +> > > ï¼ +> +> > > +> +> > +> +> -- +> +> Dr. David Alan Gilbert / address@hidden / Manchester, UK +> +> +> +> . +> +> +> +-- +Dr. David Alan Gilbert / address@hidden / Manchester, UK + |