diff options
Diffstat (limited to 'results/classifier/016/debug')
25 files changed, 16586 insertions, 0 deletions
diff --git a/results/classifier/016/debug/02572177 b/results/classifier/016/debug/02572177 new file mode 100644 index 00000000..44bd5648 --- /dev/null +++ b/results/classifier/016/debug/02572177 @@ -0,0 +1,448 @@ +debug: 0.942 +files: 0.236 +x86: 0.202 +assembly: 0.118 +virtual: 0.110 +hypervisor: 0.106 +operating system: 0.081 +performance: 0.061 +PID: 0.058 +i386: 0.040 +register: 0.039 +TCG: 0.035 +network: 0.025 +kernel: 0.017 +boot: 0.016 +socket: 0.016 +device: 0.011 +arm: 0.008 +architecture: 0.007 +ppc: 0.007 +semantic: 0.006 +user-level: 0.006 +mistranslation: 0.003 +alpha: 0.003 +permissions: 0.003 +VMM: 0.003 +peripherals: 0.002 +graphic: 0.002 +risc-v: 0.002 +KVM: 0.002 +vnc: 0.001 + +[Qemu-devel] 答复: Re: [BUG]COLO failover hang + +hi. + + +I test the git qemu master have the same problem. + + +(gdb) bt + + +#0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, niov=1, +fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 + + +#1 0x00007f658e4aa0c2 in qio_channel_read (address@hidden, address@hidden "", +address@hidden, address@hidden) at io/channel.c:114 + + +#2 0x00007f658e3ea990 in channel_get_buffer (opaque=ï¼optimized outï¼, +buf=0x7f65907cb838 "", pos=ï¼optimized outï¼, size=32768) at +migration/qemu-file-channel.c:78 + + +#3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at +migration/qemu-file.c:295 + + +#4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, address@hidden) at +migration/qemu-file.c:555 + + +#5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at +migration/qemu-file.c:568 + + +#6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at +migration/qemu-file.c:648 + + +#7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, +address@hidden) at migration/colo.c:244 + + +#8 0x00007f658e3e681e in colo_receive_check_message (f=ï¼optimized outï¼, +address@hidden, address@hidden) + + + at migration/colo.c:264 + + +#9 0x00007f658e3e740e in colo_process_incoming_thread (opaque=0x7f658eb30360 +ï¼mis_current.31286ï¼) at migration/colo.c:577 + + +#10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 + + +#11 0x00007f65881983ed in clone () from /lib64/libc.so.6 + + +(gdb) p ioc-ï¼name + + +$2 = 0x7f658ff7d5c0 "migration-socket-incoming" + + +(gdb) p ioc-ï¼features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN + + +$3 = 0 + + + + + +(gdb) bt + + +#0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, condition=G_IO_IN, +opaque=0x7fdcceeafa90) at migration/socket.c:137 + + +#1 0x00007fdcc6966350 in g_main_dispatch (context=ï¼optimized outï¼) at +gmain.c:3054 + + +#2 g_main_context_dispatch (context=ï¼optimized outï¼, address@hidden) at +gmain.c:3630 + + +#3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 + + +#4 os_host_main_loop_wait (timeout=ï¼optimized outï¼) at util/main-loop.c:258 + + +#5 main_loop_wait (address@hidden) at util/main-loop.c:506 + + +#6 0x00007fdccb526187 in main_loop () at vl.c:1898 + + +#7 main (argc=ï¼optimized outï¼, argv=ï¼optimized outï¼, envp=ï¼optimized outï¼) at +vl.c:4709 + + +(gdb) p ioc-ï¼features + + +$1 = 6 + + +(gdb) p ioc-ï¼name + + +$2 = 0x7fdcce1b1ab0 "migration-socket-listener" + + + + + +May be socket_accept_incoming_migration should call +qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? + + + + + +thank you. + + + + + + + + + + + + + + + +åå§é®ä»¶ + + + +åä»¶äººï¼ address@hidden +æ¶ä»¶äººï¼ç广10165992 address@hidden +æéäººï¼ address@hidden address@hidden +æ¥ æ ï¼2017å¹´03æ16æ¥ 14:46 +主 é¢ ï¼Re: [Qemu-devel] COLO failover hang + + + + + + + +On 03/15/2017 05:06 PM, wangguang wrote: +ï¼ am testing QEMU COLO feature described here [QEMU +ï¼ Wiki]( +http://wiki.qemu-project.org/Features/COLO +). +ï¼ +ï¼ When the Primary Node panic,the Secondary Node qemu hang. +ï¼ hang at recvmsg in qio_channel_socket_readv. +ï¼ And I run { 'execute': 'nbd-server-stop' } and { "execute": +ï¼ "x-colo-lost-heartbeat" } in Secondary VM's +ï¼ monitor,the Secondary Node qemu still hang at recvmsg . +ï¼ +ï¼ I found that the colo in qemu is not complete yet. +ï¼ Do the colo have any plan for development? + +Yes, We are developing. You can see some of patch we pushing. + +ï¼ Has anyone ever run it successfully? Any help is appreciated! + +In our internal version can run it successfully, +The failover detail you can ask Zhanghailiang for help. +Next time if you have some question about COLO, +please cc me and zhanghailiang address@hidden + + +Thanks +Zhang Chen + + +ï¼ +ï¼ +ï¼ +ï¼ centos7.2+qemu2.7.50 +ï¼ (gdb) bt +ï¼ #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 +ï¼ #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=ï¼optimized outï¼, +ï¼ iov=ï¼optimized outï¼, niov=ï¼optimized outï¼, fds=0x0, nfds=0x0, errp=0x0) at +ï¼ io/channel-socket.c:497 +ï¼ #2 0x00007f3e03329472 in qio_channel_read (address@hidden, +ï¼ address@hidden "", address@hidden, +ï¼ address@hidden) at io/channel.c:97 +ï¼ #3 0x00007f3e032750e0 in channel_get_buffer (opaque=ï¼optimized outï¼, +ï¼ buf=0x7f3e05910f38 "", pos=ï¼optimized outï¼, size=32768) at +ï¼ migration/qemu-file-channel.c:78 +ï¼ #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at +ï¼ migration/qemu-file.c:257 +ï¼ #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, +ï¼ address@hidden) at migration/qemu-file.c:510 +ï¼ #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at +ï¼ migration/qemu-file.c:523 +ï¼ #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at +ï¼ migration/qemu-file.c:603 +ï¼ #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, +ï¼ address@hidden) at migration/colo.c:215 +ï¼ #9 0x00007f3e0327250d in colo_wait_handle_message (errp=0x7f3d62bfaa48, +ï¼ checkpoint_request=ï¼synthetic pointerï¼, f=ï¼optimized outï¼) at +ï¼ migration/colo.c:546 +ï¼ #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at +ï¼ migration/colo.c:649 +ï¼ #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 +ï¼ #12 0x00007f3dfc9c03ed in clone () from /lib64/libc.so.6 +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ -- +ï¼ View this message in context: +http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html +ï¼ Sent from the Developer mailing list archive at Nabble.com. +ï¼ +ï¼ +ï¼ +ï¼ + +-- +Thanks +Zhang Chen + +Hi,Wang. + +You can test this branch: +https://github.com/coloft/qemu/tree/colo-v5.1-developing-COLO-frame-v21-with-shared-disk +and please follow wiki ensure your own configuration correctly. +http://wiki.qemu-project.org/Features/COLO +Thanks + +Zhang Chen + + +On 03/21/2017 03:27 PM, address@hidden wrote: +hi. + +I test the git qemu master have the same problem. + +(gdb) bt +#0 qio_channel_socket_readv (ioc=0x7f65911b4e50, iov=0x7f64ef3fd880, +niov=1, fds=0x0, nfds=0x0, errp=0x0) at io/channel-socket.c:461 +#1 0x00007f658e4aa0c2 in qio_channel_read +(address@hidden, address@hidden "", +address@hidden, address@hidden) at io/channel.c:114 +#2 0x00007f658e3ea990 in channel_get_buffer (opaque=ï¼optimized outï¼, +buf=0x7f65907cb838 "", pos=ï¼optimized outï¼, size=32768) at +migration/qemu-file-channel.c:78 +#3 0x00007f658e3e97fc in qemu_fill_buffer (f=0x7f65907cb800) at +migration/qemu-file.c:295 +#4 0x00007f658e3ea2e1 in qemu_peek_byte (address@hidden, +address@hidden) at migration/qemu-file.c:555 +#5 0x00007f658e3ea34b in qemu_get_byte (address@hidden) at +migration/qemu-file.c:568 +#6 0x00007f658e3ea552 in qemu_get_be32 (address@hidden) at +migration/qemu-file.c:648 +#7 0x00007f658e3e66e5 in colo_receive_message (f=0x7f65907cb800, +address@hidden) at migration/colo.c:244 +#8 0x00007f658e3e681e in colo_receive_check_message (f=ï¼optimized +outï¼, address@hidden, +address@hidden) +at migration/colo.c:264 +#9 0x00007f658e3e740e in colo_process_incoming_thread +(opaque=0x7f658eb30360 ï¼mis_current.31286ï¼) at migration/colo.c:577 +#10 0x00007f658be09df3 in start_thread () from /lib64/libpthread.so.0 + +#11 0x00007f65881983ed in clone () from /lib64/libc.so.6 + +(gdb) p ioc-ï¼name + +$2 = 0x7f658ff7d5c0 "migration-socket-incoming" + +(gdb) p ioc-ï¼features Do not support QIO_CHANNEL_FEATURE_SHUTDOWN + +$3 = 0 + + +(gdb) bt +#0 socket_accept_incoming_migration (ioc=0x7fdcceeafa90, +condition=G_IO_IN, opaque=0x7fdcceeafa90) at migration/socket.c:137 +#1 0x00007fdcc6966350 in g_main_dispatch (context=ï¼optimized outï¼) at +gmain.c:3054 +#2 g_main_context_dispatch (context=ï¼optimized outï¼, +address@hidden) at gmain.c:3630 +#3 0x00007fdccb8a6dcc in glib_pollfds_poll () at util/main-loop.c:213 +#4 os_host_main_loop_wait (timeout=ï¼optimized outï¼) at +util/main-loop.c:258 +#5 main_loop_wait (address@hidden) at +util/main-loop.c:506 +#6 0x00007fdccb526187 in main_loop () at vl.c:1898 +#7 main (argc=ï¼optimized outï¼, argv=ï¼optimized outï¼, envp=ï¼optimized +outï¼) at vl.c:4709 +(gdb) p ioc-ï¼features + +$1 = 6 + +(gdb) p ioc-ï¼name + +$2 = 0x7fdcce1b1ab0 "migration-socket-listener" +May be socket_accept_incoming_migration should +call qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN)?? +thank you. + + + + + +åå§é®ä»¶ +address@hidden; +*æ¶ä»¶äººï¼*ç广10165992;address@hidden; +address@hidden;address@hidden; +*æ¥ æ ï¼*2017å¹´03æ16æ¥ 14:46 +*主 é¢ ï¼**Re: [Qemu-devel] COLO failover hang* + + + + +On 03/15/2017 05:06 PM, wangguang wrote: +ï¼ am testing QEMU COLO feature described here [QEMU +ï¼ Wiki]( +http://wiki.qemu-project.org/Features/COLO +). +ï¼ +ï¼ When the Primary Node panic,the Secondary Node qemu hang. +ï¼ hang at recvmsg in qio_channel_socket_readv. +ï¼ And I run { 'execute': 'nbd-server-stop' } and { "execute": +ï¼ "x-colo-lost-heartbeat" } in Secondary VM's +ï¼ monitor,the Secondary Node qemu still hang at recvmsg . +ï¼ +ï¼ I found that the colo in qemu is not complete yet. +ï¼ Do the colo have any plan for development? + +Yes, We are developing. You can see some of patch we pushing. + +ï¼ Has anyone ever run it successfully? Any help is appreciated! + +In our internal version can run it successfully, +The failover detail you can ask Zhanghailiang for help. +Next time if you have some question about COLO, +please cc me and zhanghailiang address@hidden + + +Thanks +Zhang Chen + + +ï¼ +ï¼ +ï¼ +ï¼ centos7.2+qemu2.7.50 +ï¼ (gdb) bt +ï¼ #0 0x00007f3e00cc86ad in recvmsg () from /lib64/libpthread.so.0 +ï¼ #1 0x00007f3e0332b738 in qio_channel_socket_readv (ioc=ï¼optimized outï¼, +ï¼ iov=ï¼optimized outï¼, niov=ï¼optimized outï¼, fds=0x0, nfds=0x0, errp=0x0) at +ï¼ io/channel-socket.c:497 +ï¼ #2 0x00007f3e03329472 in qio_channel_read (address@hidden, +ï¼ address@hidden "", address@hidden, +ï¼ address@hidden) at io/channel.c:97 +ï¼ #3 0x00007f3e032750e0 in channel_get_buffer (opaque=ï¼optimized outï¼, +ï¼ buf=0x7f3e05910f38 "", pos=ï¼optimized outï¼, size=32768) at +ï¼ migration/qemu-file-channel.c:78 +ï¼ #4 0x00007f3e0327412c in qemu_fill_buffer (f=0x7f3e05910f00) at +ï¼ migration/qemu-file.c:257 +ï¼ #5 0x00007f3e03274a41 in qemu_peek_byte (address@hidden, +ï¼ address@hidden) at migration/qemu-file.c:510 +ï¼ #6 0x00007f3e03274aab in qemu_get_byte (address@hidden) at +ï¼ migration/qemu-file.c:523 +ï¼ #7 0x00007f3e03274cb2 in qemu_get_be32 (address@hidden) at +ï¼ migration/qemu-file.c:603 +ï¼ #8 0x00007f3e03271735 in colo_receive_message (f=0x7f3e05910f00, +ï¼ address@hidden) at migration/colo.c:215 +ï¼ #9 0x00007f3e0327250d in colo_wait_handle_message (errp=0x7f3d62bfaa48, +ï¼ checkpoint_request=ï¼synthetic pointerï¼, f=ï¼optimized outï¼) at +ï¼ migration/colo.c:546 +ï¼ #10 colo_process_incoming_thread (opaque=0x7f3e067245e0) at +ï¼ migration/colo.c:649 +ï¼ #11 0x00007f3e00cc1df3 in start_thread () from /lib64/libpthread.so.0 +ï¼ #12 0x00007f3dfc9c03ed in clone () from /lib64/libc.so.6 +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ +ï¼ -- +ï¼ View this message in context: +http://qemu.11.n7.nabble.com/COLO-failover-hang-tp473250.html +ï¼ Sent from the Developer mailing list archive at Nabble.com. +ï¼ +ï¼ +ï¼ +ï¼ + +-- +Thanks +Zhang Chen +-- +Thanks +Zhang Chen + diff --git a/results/classifier/016/debug/05479587 b/results/classifier/016/debug/05479587 new file mode 100644 index 00000000..afdbd656 --- /dev/null +++ b/results/classifier/016/debug/05479587 @@ -0,0 +1,110 @@ +debug: 0.967 +network: 0.931 +virtual: 0.852 +operating system: 0.716 +hypervisor: 0.107 +register: 0.102 +device: 0.063 +files: 0.056 +permissions: 0.051 +x86: 0.050 +semantic: 0.049 +socket: 0.032 +boot: 0.031 +TCG: 0.027 +PID: 0.027 +user-level: 0.025 +i386: 0.022 +risc-v: 0.017 +VMM: 0.014 +performance: 0.010 +vnc: 0.007 +ppc: 0.007 +peripherals: 0.006 +kernel: 0.005 +KVM: 0.005 +assembly: 0.004 +alpha: 0.003 +arm: 0.003 +graphic: 0.003 +architecture: 0.002 +mistranslation: 0.002 + +[Qemu-devel] [BUG] network qga : windows os lost ip address of the network card in some cases + +We think this problem coulde be solevd in qga modulesãcan anybody give some +advice ? + + +[BUG] network : windows os lost ip address of the network card in some cases + +we found this problem for a long time ãFor example, if we has three network +card in virtual xml file ï¼such as "network connection 1" / "network connection +2"/"network connection 3" ã + +Echo network card has own ip address ï¼such as 192.168.1.1 / 2.1 /3.1 , when +delete the first card ï¼reboot the windows virtual os, then this problem +happened ! + + + + +we found that the sencond network card will replace the first one , then the +ip address of "network connection 2 " become 192.168.1.1 ã + + +Our third party users began to complain about this bug ãAll the business of the +second ip lost !!! + +I mean both of windows and linux has this bug , we solve this bug in linux +throught bonding netcrad pci and mac address ã + +There is no good solution on windows os . thera are ? we implemented a plan to +resumption of IP by QGA. Is there a better way ? + + + + + + + + +åå§é®ä»¶ + + + +å件人ï¼å°¹ä½ä¸º10144574 +æ¶ä»¶äººï¼ address@hidden +æ¥ æ ï¼2017å¹´04æ14æ¥ 16:46 +主 é¢ ï¼[BUG] network : windows os lost ip address of the network card in some +cases + + + + + + +we found this problem for a long time ãFor example, if we has three network +card in virtual xml file ï¼such as "network connection 1" / "network connection +2"/"network connection 3" ã + +Echo network card has own ip address ï¼such as 192.168.1.1 / 2.1 /3.1 , when +delete the first card ï¼reboot the windows virtual os, then this problem +happened ! + + + + +we found that the sencond network card will replace the first one , then the +ip address of "network connection 2 " become 192.168.1.1 ã + + +Our third party users began to complain about this bug ãAll the business of the +second ip lost !!! + +I mean both of windows and linux has this bug , we solve this bug in linux +throught bonding netcrad pci and mac address ã + +There is no good solution on windows os . thera are ? we implemented a plan to +resumption of IP by QGA. Is there a better way ? + diff --git a/results/classifier/016/debug/11357571 b/results/classifier/016/debug/11357571 new file mode 100644 index 00000000..80cf79d8 --- /dev/null +++ b/results/classifier/016/debug/11357571 @@ -0,0 +1,74 @@ +debug: 0.960 +vnc: 0.815 +network: 0.605 +virtual: 0.569 +graphic: 0.186 +x86: 0.108 +TCG: 0.095 +boot: 0.049 +operating system: 0.045 +performance: 0.042 +register: 0.040 +hypervisor: 0.030 +i386: 0.023 +files: 0.022 +ppc: 0.021 +PID: 0.011 +VMM: 0.011 +risc-v: 0.011 +alpha: 0.010 +user-level: 0.010 +device: 0.007 +assembly: 0.005 +socket: 0.004 +kernel: 0.003 +semantic: 0.003 +KVM: 0.003 +permissions: 0.002 +arm: 0.002 +architecture: 0.001 +peripherals: 0.001 +mistranslation: 0.000 + +[Qemu-devel] [BUG] VNC: client won't send FramebufferUpdateRequest if job in flight is aborted + +Hi Gerd, Daniel. + +We noticed that if VncSharePolicy was configured with +VNC_SHARE_POLICY_FORCE_SHARED mode and +multiple vnc clients opened vnc connections, some clients could go blank screen +at high probability. +This problem can be reproduced when we regularly reboot suse12sp3 in graphic +mode both +with RealVNC and noVNC client. + +Then we dig into it and find out that some clients go blank screen because they +don't +send FramebufferUpdateRequest any more. One step further, we notice that each +time +the job in flight is aborted one client go blank screen. + +The bug is triggered in the following procedure. +Guest reboot => graphic mode switch => graphic_hw_update => vga_update_display +=> vga_draw_graphic (full_update = 1) => dpy_gfx_replace_surface => +vnc_dpy_switch => +vnc_abort_display_jobs (client may have job in flight) => job removed from the +queue +If one client has vnc job in flight, *vnc_abort_display_jobs* will wait until +its job is abandoned. +This behavior is done in vnc_worker_thread_loop when 'if (job->vs->ioc == NULL +|| job->vs->abort == true)' +branch is taken. + +As we can see, *vnc_abort_display_jobs* is intended to do some optimization to +avoid unnecessary client update. +But if client sends FramebufferUpdateRequest for some graphic area and its +FramebufferUpdate response job +is abandoned, the client may wait for the response and never send new +FramebufferUpdateRequest, which may +case the client go blank screen forever. + +So I am wondering whether we should drop the *vnc_abort_display_jobs* +optimization or do some trick here +to push the client to send new FramebufferUpdateRequest. Do you have any idea ? + diff --git a/results/classifier/016/debug/11933524 b/results/classifier/016/debug/11933524 new file mode 100644 index 00000000..9657808d --- /dev/null +++ b/results/classifier/016/debug/11933524 @@ -0,0 +1,1152 @@ +i386: 0.998 +x86: 0.946 +debug: 0.943 +kernel: 0.926 +boot: 0.907 +operating system: 0.652 +architecture: 0.061 +device: 0.024 +files: 0.023 +PID: 0.022 +TCG: 0.014 +register: 0.009 +assembly: 0.009 +semantic: 0.005 +hypervisor: 0.005 +virtual: 0.005 +peripherals: 0.003 +KVM: 0.003 +permissions: 0.003 +performance: 0.003 +alpha: 0.002 +risc-v: 0.002 +graphic: 0.001 +VMM: 0.001 +user-level: 0.001 +network: 0.001 +ppc: 0.001 +socket: 0.001 +mistranslation: 0.001 +vnc: 0.000 +arm: 0.000 + +[BUG] hw/i386/pc.c: CXL Fixed Memory Window should not reserve e820 in bios + +Early-boot e820 records will be inserted by the bios/efi/early boot +software and be reported to the kernel via insert_resource. Later, when +CXL drivers iterate through the regions again, they will insert another +resource and make the RESERVED memory area a child. + +This RESERVED memory area causes the memory region to become unusable, +and as a result attempting to create memory regions with + + `cxl create-region ...` + +Will fail due to the RESERVED area intersecting with the CXL window. + + +During boot the following traceback is observed: + +0xffffffff81101650 in insert_resource_expand_to_fit () +0xffffffff83d964c5 in e820__reserve_resources_late () +0xffffffff83e03210 in pcibios_resource_survey () +0xffffffff83e04f4a in pcibios_init () + +Which produces a call to reserve the CFMWS area: + +(gdb) p *new +$54 = {start = 0x290000000, end = 0x2cfffffff, name = "Reserved", + flags = 0x200, desc = 0x7, parent = 0x0, sibling = 0x0, + child = 0x0} + +Later the Kernel parses ACPI tables and reserves the exact same area as +the CXL Fixed Memory Window. The use of `insert_resource_conflict` +retains the RESERVED region and makes it a child of the new region. + +0xffffffff811016a4 in insert_resource_conflict () + insert_resource () +0xffffffff81a81389 in cxl_parse_cfmws () +0xffffffff818c4a81 in call_handler () + acpi_parse_entries_array () + +(gdb) p/x *new +$59 = {start = 0x290000000, end = 0x2cfffffff, name = "CXL Window 0", + flags = 0x200, desc = 0x0, parent = 0x0, sibling = 0x0, + child = 0x0} + +This produces the following output in /proc/iomem: + +590000000-68fffffff : CXL Window 0 + 590000000-68fffffff : Reserved + +This reserved area causes `get_free_mem_region()` to fail due to a check +against `__region_intersects()`. Due to this reserved area, the +intersect check will only ever return REGION_INTERSECTS, which causes +`cxl create-region` to always fail. + +Signed-off-by: Gregory Price <gregory.price@memverge.com> +--- + hw/i386/pc.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 566accf7e6..5bf5465a21 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, + hwaddr cxl_size = MiB; + + cxl_base = pc_get_cxl_range_start(pcms); +- e820_add_entry(cxl_base, cxl_size, E820_RESERVED); + memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); + memory_region_add_subregion(system_memory, cxl_base, mr); + cxl_resv_end = cxl_base + cxl_size; +@@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, + memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, fw, + "cxl-fixed-memory-region", fw->size); + memory_region_add_subregion(system_memory, fw->base, &fw->mr); +- e820_add_entry(fw->base, fw->size, E820_RESERVED); + cxl_fmw_base += fw->size; + cxl_resv_end = cxl_fmw_base; + } +-- +2.37.3 + +Early-boot e820 records will be inserted by the bios/efi/early boot +software and be reported to the kernel via insert_resource. Later, when +CXL drivers iterate through the regions again, they will insert another +resource and make the RESERVED memory area a child. + +This RESERVED memory area causes the memory region to become unusable, +and as a result attempting to create memory regions with + + `cxl create-region ...` + +Will fail due to the RESERVED area intersecting with the CXL window. + + +During boot the following traceback is observed: + +0xffffffff81101650 in insert_resource_expand_to_fit () +0xffffffff83d964c5 in e820__reserve_resources_late () +0xffffffff83e03210 in pcibios_resource_survey () +0xffffffff83e04f4a in pcibios_init () + +Which produces a call to reserve the CFMWS area: + +(gdb) p *new +$54 = {start = 0x290000000, end = 0x2cfffffff, name = "Reserved", + flags = 0x200, desc = 0x7, parent = 0x0, sibling = 0x0, + child = 0x0} + +Later the Kernel parses ACPI tables and reserves the exact same area as +the CXL Fixed Memory Window. The use of `insert_resource_conflict` +retains the RESERVED region and makes it a child of the new region. + +0xffffffff811016a4 in insert_resource_conflict () + insert_resource () +0xffffffff81a81389 in cxl_parse_cfmws () +0xffffffff818c4a81 in call_handler () + acpi_parse_entries_array () + +(gdb) p/x *new +$59 = {start = 0x290000000, end = 0x2cfffffff, name = "CXL Window 0", + flags = 0x200, desc = 0x0, parent = 0x0, sibling = 0x0, + child = 0x0} + +This produces the following output in /proc/iomem: + +590000000-68fffffff : CXL Window 0 + 590000000-68fffffff : Reserved + +This reserved area causes `get_free_mem_region()` to fail due to a check +against `__region_intersects()`. Due to this reserved area, the +intersect check will only ever return REGION_INTERSECTS, which causes +`cxl create-region` to always fail. + +Signed-off-by: Gregory Price <gregory.price@memverge.com> +--- + hw/i386/pc.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 566accf7e6..5bf5465a21 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, + hwaddr cxl_size = MiB; +cxl_base = pc_get_cxl_range_start(pcms); +- e820_add_entry(cxl_base, cxl_size, E820_RESERVED); + memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); + memory_region_add_subregion(system_memory, cxl_base, mr); + cxl_resv_end = cxl_base + cxl_size; +@@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, + memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, +fw, + "cxl-fixed-memory-region", fw->size); + memory_region_add_subregion(system_memory, fw->base, &fw->mr); +Or will this be subregion of cxl_base? + +Thanks, +Pankaj +- e820_add_entry(fw->base, fw->size, E820_RESERVED); + cxl_fmw_base += fw->size; + cxl_resv_end = cxl_fmw_base; + } + +> +> - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); +> +> memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); +> +> memory_region_add_subregion(system_memory, cxl_base, mr); +> +> cxl_resv_end = cxl_base + cxl_size; +> +> @@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, +> +> memory_region_init_io(&fw->mr, OBJECT(machine), +> +> &cfmws_ops, fw, +> +> "cxl-fixed-memory-region", +> +> fw->size); +> +> memory_region_add_subregion(system_memory, fw->base, +> +> &fw->mr); +> +> +Or will this be subregion of cxl_base? +> +> +Thanks, +> +Pankaj +The memory region backing this memory area still has to be initialized +and added in the QEMU system, but it will now be initialized for use by +linux after PCI/ACPI setup occurs and the CXL driver discovers it via +CDAT. + +It's also still possible to assign this area a static memory region at +bool by setting up the SRATs in the ACPI tables, but that patch is not +upstream yet. + +On Tue, Oct 18, 2022 at 5:14 AM Gregory Price <gourry.memverge@gmail.com> wrote: +> +> +Early-boot e820 records will be inserted by the bios/efi/early boot +> +software and be reported to the kernel via insert_resource. Later, when +> +CXL drivers iterate through the regions again, they will insert another +> +resource and make the RESERVED memory area a child. +I have already sent a patch +https://www.mail-archive.com/qemu-devel@nongnu.org/msg882012.html +. +When the patch is applied, there would not be any reserved entries +even with passing E820_RESERVED . +So this patch needs to be evaluated in the light of the above patch I +sent. Once you apply my patch, does the issue still exist? + +> +> +This RESERVED memory area causes the memory region to become unusable, +> +and as a result attempting to create memory regions with +> +> +`cxl create-region ...` +> +> +Will fail due to the RESERVED area intersecting with the CXL window. +> +> +> +During boot the following traceback is observed: +> +> +0xffffffff81101650 in insert_resource_expand_to_fit () +> +0xffffffff83d964c5 in e820__reserve_resources_late () +> +0xffffffff83e03210 in pcibios_resource_survey () +> +0xffffffff83e04f4a in pcibios_init () +> +> +Which produces a call to reserve the CFMWS area: +> +> +(gdb) p *new +> +$54 = {start = 0x290000000, end = 0x2cfffffff, name = "Reserved", +> +flags = 0x200, desc = 0x7, parent = 0x0, sibling = 0x0, +> +child = 0x0} +> +> +Later the Kernel parses ACPI tables and reserves the exact same area as +> +the CXL Fixed Memory Window. The use of `insert_resource_conflict` +> +retains the RESERVED region and makes it a child of the new region. +> +> +0xffffffff811016a4 in insert_resource_conflict () +> +insert_resource () +> +0xffffffff81a81389 in cxl_parse_cfmws () +> +0xffffffff818c4a81 in call_handler () +> +acpi_parse_entries_array () +> +> +(gdb) p/x *new +> +$59 = {start = 0x290000000, end = 0x2cfffffff, name = "CXL Window 0", +> +flags = 0x200, desc = 0x0, parent = 0x0, sibling = 0x0, +> +child = 0x0} +> +> +This produces the following output in /proc/iomem: +> +> +590000000-68fffffff : CXL Window 0 +> +590000000-68fffffff : Reserved +> +> +This reserved area causes `get_free_mem_region()` to fail due to a check +> +against `__region_intersects()`. Due to this reserved area, the +> +intersect check will only ever return REGION_INTERSECTS, which causes +> +`cxl create-region` to always fail. +> +> +Signed-off-by: Gregory Price <gregory.price@memverge.com> +> +--- +> +hw/i386/pc.c | 2 -- +> +1 file changed, 2 deletions(-) +> +> +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +> +index 566accf7e6..5bf5465a21 100644 +> +--- a/hw/i386/pc.c +> ++++ b/hw/i386/pc.c +> +@@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, +> +hwaddr cxl_size = MiB; +> +> +cxl_base = pc_get_cxl_range_start(pcms); +> +- e820_add_entry(cxl_base, cxl_size, E820_RESERVED); +> +memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); +> +memory_region_add_subregion(system_memory, cxl_base, mr); +> +cxl_resv_end = cxl_base + cxl_size; +> +@@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, +> +memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, +> +fw, +> +"cxl-fixed-memory-region", fw->size); +> +memory_region_add_subregion(system_memory, fw->base, +> +&fw->mr); +> +- e820_add_entry(fw->base, fw->size, E820_RESERVED); +> +cxl_fmw_base += fw->size; +> +cxl_resv_end = cxl_fmw_base; +> +} +> +-- +> +2.37.3 +> + +This patch does not resolve the issue, reserved entries are still created. +[  0.000000] BIOS-e820: [mem 0x0000000280000000-0x00000002800fffff] reserved +[  0.000000] BIOS-e820: [mem 0x0000000290000000-0x000000029fffffff] reserved +# cat /proc/iomem +290000000-29fffffff : CXL Window 0 + 290000000-29fffffff : Reserved +# cxl create-region -m -d decoder0.0 -w 1 -g 256 mem0 +cxl region: create_region: region0: set_size failed: Numerical result out of range +cxl region: cmd_create_region: created 0 regions +On Tue, Oct 18, 2022 at 2:05 AM Ani Sinha < +ani@anisinha.ca +> wrote: +On Tue, Oct 18, 2022 at 5:14 AM Gregory Price < +gourry.memverge@gmail.com +> wrote: +> +> Early-boot e820 records will be inserted by the bios/efi/early boot +> software and be reported to the kernel via insert_resource. Later, when +> CXL drivers iterate through the regions again, they will insert another +> resource and make the RESERVED memory area a child. +I have already sent a patch +https://www.mail-archive.com/qemu-devel@nongnu.org/msg882012.html +. +When the patch is applied, there would not be any reserved entries +even with passing E820_RESERVED . +So this patch needs to be evaluated in the light of the above patch I +sent. Once you apply my patch, does the issue still exist? +> +> This RESERVED memory area causes the memory region to become unusable, +> and as a result attempting to create memory regions with +> +>   `cxl create-region ...` +> +> Will fail due to the RESERVED area intersecting with the CXL window. +> +> +> During boot the following traceback is observed: +> +> 0xffffffff81101650 in insert_resource_expand_to_fit () +> 0xffffffff83d964c5 in e820__reserve_resources_late () +> 0xffffffff83e03210 in pcibios_resource_survey () +> 0xffffffff83e04f4a in pcibios_init () +> +> Which produces a call to reserve the CFMWS area: +> +> (gdb) p *new +> $54 = {start = 0x290000000, end = 0x2cfffffff, name = "Reserved", +>    flags = 0x200, desc = 0x7, parent = 0x0, sibling = 0x0, +>    child = 0x0} +> +> Later the Kernel parses ACPI tables and reserves the exact same area as +> the CXL Fixed Memory Window. The use of `insert_resource_conflict` +> retains the RESERVED region and makes it a child of the new region. +> +> 0xffffffff811016a4 in insert_resource_conflict () +>            insert_resource () +> 0xffffffff81a81389 in cxl_parse_cfmws () +> 0xffffffff818c4a81 in call_handler () +>            acpi_parse_entries_array () +> +> (gdb) p/x *new +> $59 = {start = 0x290000000, end = 0x2cfffffff, name = "CXL Window 0", +>    flags = 0x200, desc = 0x0, parent = 0x0, sibling = 0x0, +>    child = 0x0} +> +> This produces the following output in /proc/iomem: +> +> 590000000-68fffffff : CXL Window 0 +>  590000000-68fffffff : Reserved +> +> This reserved area causes `get_free_mem_region()` to fail due to a check +> against `__region_intersects()`. Due to this reserved area, the +> intersect check will only ever return REGION_INTERSECTS, which causes +> `cxl create-region` to always fail. +> +> Signed-off-by: Gregory Price < +gregory.price@memverge.com +> +> --- +> hw/i386/pc.c | 2 -- +> 1 file changed, 2 deletions(-) +> +> diff --git a/hw/i386/pc.c b/hw/i386/pc.c +> index 566accf7e6..5bf5465a21 100644 +> --- a/hw/i386/pc.c +> +++ b/hw/i386/pc.c +> @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, +>     hwaddr cxl_size = MiB; +> +>     cxl_base = pc_get_cxl_range_start(pcms); +> -    e820_add_entry(cxl_base, cxl_size, E820_RESERVED); +>     memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); +>     memory_region_add_subregion(system_memory, cxl_base, mr); +>     cxl_resv_end = cxl_base + cxl_size; +> @@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, +>         memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, fw, +>                    "cxl-fixed-memory-region", fw->size); +>         memory_region_add_subregion(system_memory, fw->base, &fw->mr); +> -        e820_add_entry(fw->base, fw->size, E820_RESERVED); +>         cxl_fmw_base += fw->size; +>         cxl_resv_end = cxl_fmw_base; +>       } +> -- +> 2.37.3 +> + ++Gerd Hoffmann + +On Tue, Oct 18, 2022 at 8:16 PM Gregory Price <gourry.memverge@gmail.com> wrote: +> +> +This patch does not resolve the issue, reserved entries are still created. +> +> +[ 0.000000] BIOS-e820: [mem 0x0000000280000000-0x00000002800fffff] reserved +> +[ 0.000000] BIOS-e820: [mem 0x0000000290000000-0x000000029fffffff] reserved +> +> +# cat /proc/iomem +> +290000000-29fffffff : CXL Window 0 +> +290000000-29fffffff : Reserved +> +> +# cxl create-region -m -d decoder0.0 -w 1 -g 256 mem0 +> +cxl region: create_region: region0: set_size failed: Numerical result out of +> +range +> +cxl region: cmd_create_region: created 0 regions +> +> +On Tue, Oct 18, 2022 at 2:05 AM Ani Sinha <ani@anisinha.ca> wrote: +> +> +> +> On Tue, Oct 18, 2022 at 5:14 AM Gregory Price <gourry.memverge@gmail.com> +> +> wrote: +> +> > +> +> > Early-boot e820 records will be inserted by the bios/efi/early boot +> +> > software and be reported to the kernel via insert_resource. Later, when +> +> > CXL drivers iterate through the regions again, they will insert another +> +> > resource and make the RESERVED memory area a child. +> +> +> +> I have already sent a patch +> +> +https://www.mail-archive.com/qemu-devel@nongnu.org/msg882012.html +. +> +> When the patch is applied, there would not be any reserved entries +> +> even with passing E820_RESERVED . +> +> So this patch needs to be evaluated in the light of the above patch I +> +> sent. Once you apply my patch, does the issue still exist? +> +> +> +> > +> +> > This RESERVED memory area causes the memory region to become unusable, +> +> > and as a result attempting to create memory regions with +> +> > +> +> > `cxl create-region ...` +> +> > +> +> > Will fail due to the RESERVED area intersecting with the CXL window. +> +> > +> +> > +> +> > During boot the following traceback is observed: +> +> > +> +> > 0xffffffff81101650 in insert_resource_expand_to_fit () +> +> > 0xffffffff83d964c5 in e820__reserve_resources_late () +> +> > 0xffffffff83e03210 in pcibios_resource_survey () +> +> > 0xffffffff83e04f4a in pcibios_init () +> +> > +> +> > Which produces a call to reserve the CFMWS area: +> +> > +> +> > (gdb) p *new +> +> > $54 = {start = 0x290000000, end = 0x2cfffffff, name = "Reserved", +> +> > flags = 0x200, desc = 0x7, parent = 0x0, sibling = 0x0, +> +> > child = 0x0} +> +> > +> +> > Later the Kernel parses ACPI tables and reserves the exact same area as +> +> > the CXL Fixed Memory Window. The use of `insert_resource_conflict` +> +> > retains the RESERVED region and makes it a child of the new region. +> +> > +> +> > 0xffffffff811016a4 in insert_resource_conflict () +> +> > insert_resource () +> +> > 0xffffffff81a81389 in cxl_parse_cfmws () +> +> > 0xffffffff818c4a81 in call_handler () +> +> > acpi_parse_entries_array () +> +> > +> +> > (gdb) p/x *new +> +> > $59 = {start = 0x290000000, end = 0x2cfffffff, name = "CXL Window 0", +> +> > flags = 0x200, desc = 0x0, parent = 0x0, sibling = 0x0, +> +> > child = 0x0} +> +> > +> +> > This produces the following output in /proc/iomem: +> +> > +> +> > 590000000-68fffffff : CXL Window 0 +> +> > 590000000-68fffffff : Reserved +> +> > +> +> > This reserved area causes `get_free_mem_region()` to fail due to a check +> +> > against `__region_intersects()`. Due to this reserved area, the +> +> > intersect check will only ever return REGION_INTERSECTS, which causes +> +> > `cxl create-region` to always fail. +> +> > +> +> > Signed-off-by: Gregory Price <gregory.price@memverge.com> +> +> > --- +> +> > hw/i386/pc.c | 2 -- +> +> > 1 file changed, 2 deletions(-) +> +> > +> +> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c +> +> > index 566accf7e6..5bf5465a21 100644 +> +> > --- a/hw/i386/pc.c +> +> > +++ b/hw/i386/pc.c +> +> > @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, +> +> > hwaddr cxl_size = MiB; +> +> > +> +> > cxl_base = pc_get_cxl_range_start(pcms); +> +> > - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); +> +> > memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); +> +> > memory_region_add_subregion(system_memory, cxl_base, mr); +> +> > cxl_resv_end = cxl_base + cxl_size; +> +> > @@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, +> +> > memory_region_init_io(&fw->mr, OBJECT(machine), +> +> > &cfmws_ops, fw, +> +> > "cxl-fixed-memory-region", +> +> > fw->size); +> +> > memory_region_add_subregion(system_memory, fw->base, +> +> > &fw->mr); +> +> > - e820_add_entry(fw->base, fw->size, E820_RESERVED); +> +> > cxl_fmw_base += fw->size; +> +> > cxl_resv_end = cxl_fmw_base; +> +> > } +> +> > -- +> +> > 2.37.3 +> +> > + +> +>> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c +> +>> > index 566accf7e6..5bf5465a21 100644 +> +>> > --- a/hw/i386/pc.c +> +>> > +++ b/hw/i386/pc.c +> +>> > @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, +> +>> > hwaddr cxl_size = MiB; +> +>> > +> +>> > cxl_base = pc_get_cxl_range_start(pcms); +> +>> > - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); +Just dropping it doesn't look like a good plan to me. + +You can try set etc/reserved-memory-end fw_cfg file instead. Firmware +(both seabios and ovmf) read it and will make sure the 64bit pci mmio +window is placed above that address, i.e. this effectively reserves +address space. Right now used by memory hotplug code, but should work +for cxl too I think (disclaimer: don't know much about cxl ...). + +take care & HTH, + Gerd + +On Tue, 8 Nov 2022 12:21:11 +0100 +Gerd Hoffmann <kraxel@redhat.com> wrote: + +> +> >> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c +> +> >> > index 566accf7e6..5bf5465a21 100644 +> +> >> > --- a/hw/i386/pc.c +> +> >> > +++ b/hw/i386/pc.c +> +> >> > @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, +> +> >> > hwaddr cxl_size = MiB; +> +> >> > +> +> >> > cxl_base = pc_get_cxl_range_start(pcms); +> +> >> > - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); +> +> +Just dropping it doesn't look like a good plan to me. +> +> +You can try set etc/reserved-memory-end fw_cfg file instead. Firmware +> +(both seabios and ovmf) read it and will make sure the 64bit pci mmio +> +window is placed above that address, i.e. this effectively reserves +> +address space. Right now used by memory hotplug code, but should work +> +for cxl too I think (disclaimer: don't know much about cxl ...). +As far as I know CXL impl. in QEMU isn't using etc/reserved-memory-end +at all, it' has its own mapping. + +Regardless of that, reserved E820 entries look wrong, and looking at +commit message OS is right to bailout on them (expected according +to ACPI spec). +Also spec says + +" +E820 Assumptions and Limitations + [...] + The platform boot firmware does not return a range description for the memory +mapping of + PCI devices, ISA Option ROMs, and ISA Plug and Play cards because the OS has +mechanisms + available to detect them. +" + +so dropping reserved entries looks reasonable from ACPI spec point of view. +(disclaimer: don't know much about cxl ... either) +> +> +take care & HTH, +> +Gerd +> + +On Fri, Nov 11, 2022 at 11:51:23AM +0100, Igor Mammedov wrote: +> +On Tue, 8 Nov 2022 12:21:11 +0100 +> +Gerd Hoffmann <kraxel@redhat.com> wrote: +> +> +> > >> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c +> +> > >> > index 566accf7e6..5bf5465a21 100644 +> +> > >> > --- a/hw/i386/pc.c +> +> > >> > +++ b/hw/i386/pc.c +> +> > >> > @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, +> +> > >> > hwaddr cxl_size = MiB; +> +> > >> > +> +> > >> > cxl_base = pc_get_cxl_range_start(pcms); +> +> > >> > - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); +> +> +> +> Just dropping it doesn't look like a good plan to me. +> +> +> +> You can try set etc/reserved-memory-end fw_cfg file instead. Firmware +> +> (both seabios and ovmf) read it and will make sure the 64bit pci mmio +> +> window is placed above that address, i.e. this effectively reserves +> +> address space. Right now used by memory hotplug code, but should work +> +> for cxl too I think (disclaimer: don't know much about cxl ...). +> +> +As far as I know CXL impl. in QEMU isn't using etc/reserved-memory-end +> +at all, it' has its own mapping. +This should be changed. cxl should make sure the highest address used +is stored in etc/reserved-memory-end to avoid the firmware mapping pci +resources there. + +> +so dropping reserved entries looks reasonable from ACPI spec point of view. +Yep, I don't want dispute that. + +I suspect the reason for these entries to exist in the first place is to +inform the firmware that it should not place stuff there, and if we +remove that to conform with the spec we need some alternative way for +that ... + +take care, + Gerd + +On Fri, 11 Nov 2022 12:40:59 +0100 +Gerd Hoffmann <kraxel@redhat.com> wrote: + +> +On Fri, Nov 11, 2022 at 11:51:23AM +0100, Igor Mammedov wrote: +> +> On Tue, 8 Nov 2022 12:21:11 +0100 +> +> Gerd Hoffmann <kraxel@redhat.com> wrote: +> +> +> +> > > >> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c +> +> > > >> > index 566accf7e6..5bf5465a21 100644 +> +> > > >> > --- a/hw/i386/pc.c +> +> > > >> > +++ b/hw/i386/pc.c +> +> > > >> > @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, +> +> > > >> > hwaddr cxl_size = MiB; +> +> > > >> > +> +> > > >> > cxl_base = pc_get_cxl_range_start(pcms); +> +> > > >> > - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); +> +> > +> +> > Just dropping it doesn't look like a good plan to me. +> +> > +> +> > You can try set etc/reserved-memory-end fw_cfg file instead. Firmware +> +> > (both seabios and ovmf) read it and will make sure the 64bit pci mmio +> +> > window is placed above that address, i.e. this effectively reserves +> +> > address space. Right now used by memory hotplug code, but should work +> +> > for cxl too I think (disclaimer: don't know much about cxl ...). +> +> +> +> As far as I know CXL impl. in QEMU isn't using etc/reserved-memory-end +> +> at all, it' has its own mapping. +> +> +This should be changed. cxl should make sure the highest address used +> +is stored in etc/reserved-memory-end to avoid the firmware mapping pci +> +resources there. +if (pcmc->has_reserved_memory && machine->device_memory->base) { + +[...] + + if (pcms->cxl_devices_state.is_enabled) { + + res_mem_end = cxl_resv_end; + +that should be handled by this line + + } + + *val = cpu_to_le64(ROUND_UP(res_mem_end, 1 * GiB)); + + fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val)); + + } + +so SeaBIOS shouldn't intrude into CXL address space +(I assume EDK2 behave similarly here) + +> +> so dropping reserved entries looks reasonable from ACPI spec point of view. +> +> +> +> +Yep, I don't want dispute that. +> +> +I suspect the reason for these entries to exist in the first place is to +> +inform the firmware that it should not place stuff there, and if we +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +just to educate me, can you point out what SeaBIOS code does with reservations. + +> +remove that to conform with the spec we need some alternative way for +> +that ... +with etc/reserved-memory-end set as above, +is E820_RESERVED really needed here? + +(my understanding was that E820_RESERVED weren't accounted for when +initializing PCI devices) + +> +> +take care, +> +Gerd +> + +> +if (pcmc->has_reserved_memory && machine->device_memory->base) { +> +> +[...] +> +> +if (pcms->cxl_devices_state.is_enabled) { +> +> +res_mem_end = cxl_resv_end; +> +> +that should be handled by this line +> +> +} +> +> +*val = cpu_to_le64(ROUND_UP(res_mem_end, 1 * GiB)); +> +> +fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, +> +sizeof(*val)); +> +} +> +> +so SeaBIOS shouldn't intrude into CXL address space +Yes, looks good, so with this in place already everyting should be fine. + +> +(I assume EDK2 behave similarly here) +Correct, ovmf reads that fw_cfg file too. + +> +> I suspect the reason for these entries to exist in the first place is to +> +> inform the firmware that it should not place stuff there, and if we +> +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +> +just to educate me, can you point out what SeaBIOS code does with +> +reservations. +They are added to the e820 map which gets passed on to the OS. seabios +uses (and updateas) the e820 map too, when allocating memory for +example. While thinking about it I'm not fully sure it actually looks +at reservations, maybe it only uses (and updates) ram entries when +allocating memory. + +> +> remove that to conform with the spec we need some alternative way for +> +> that ... +> +> +with etc/reserved-memory-end set as above, +> +is E820_RESERVED really needed here? +No. Setting etc/reserved-memory-end is enough. + +So for the original patch: +Acked-by: Gerd Hoffmann <kraxel@redhat.com> + +take care, + Gerd + +On Fri, Nov 11, 2022 at 02:36:02PM +0100, Gerd Hoffmann wrote: +> +> if (pcmc->has_reserved_memory && machine->device_memory->base) { +> +> +> +> [...] +> +> +> +> if (pcms->cxl_devices_state.is_enabled) { +> +> +> +> res_mem_end = cxl_resv_end; +> +> +> +> that should be handled by this line +> +> +> +> } +> +> +> +> *val = cpu_to_le64(ROUND_UP(res_mem_end, 1 * GiB)); +> +> +> +> fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, +> +> sizeof(*val)); +> +> } +> +> +> +> so SeaBIOS shouldn't intrude into CXL address space +> +> +Yes, looks good, so with this in place already everyting should be fine. +> +> +> (I assume EDK2 behave similarly here) +> +> +Correct, ovmf reads that fw_cfg file too. +> +> +> > I suspect the reason for these entries to exist in the first place is to +> +> > inform the firmware that it should not place stuff there, and if we +> +> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +> +> just to educate me, can you point out what SeaBIOS code does with +> +> reservations. +> +> +They are added to the e820 map which gets passed on to the OS. seabios +> +uses (and updateas) the e820 map too, when allocating memory for +> +example. While thinking about it I'm not fully sure it actually looks +> +at reservations, maybe it only uses (and updates) ram entries when +> +allocating memory. +> +> +> > remove that to conform with the spec we need some alternative way for +> +> > that ... +> +> +> +> with etc/reserved-memory-end set as above, +> +> is E820_RESERVED really needed here? +> +> +No. Setting etc/reserved-memory-end is enough. +> +> +So for the original patch: +> +Acked-by: Gerd Hoffmann <kraxel@redhat.com> +> +> +take care, +> +Gerd +It's upstream already, sorry I can't add your tag. + +-- +MST + diff --git a/results/classifier/016/debug/12360755 b/results/classifier/016/debug/12360755 new file mode 100644 index 00000000..63602fa3 --- /dev/null +++ b/results/classifier/016/debug/12360755 @@ -0,0 +1,323 @@ +debug: 0.952 +operating system: 0.778 +TCG: 0.737 +device: 0.733 +VMM: 0.625 +socket: 0.595 +vnc: 0.590 +network: 0.575 +boot: 0.569 +PID: 0.546 +virtual: 0.455 +peripherals: 0.448 +register: 0.414 +risc-v: 0.408 +kernel: 0.306 +alpha: 0.283 +hypervisor: 0.273 +semantic: 0.215 +mistranslation: 0.198 +files: 0.177 +assembly: 0.098 +performance: 0.084 +KVM: 0.076 +ppc: 0.056 +user-level: 0.043 +architecture: 0.034 +permissions: 0.020 +graphic: 0.010 +x86: 0.006 +i386: 0.005 +arm: 0.003 + +[Qemu-devel] [BUG] virtio-net linux driver fails to probe on MIPS Malta since 'hw/virtio-pci: fix virtio behaviour' + +Hi, + +I've bisected the following failure of the virtio_net linux v4.10 driver +to probe in QEMU v2.9.0-rc1 emulating a MIPS Malta machine: + +virtio_net virtio0: virtio: device uses modern interface but does not have +VIRTIO_F_VERSION_1 +virtio_net: probe of virtio0 failed with error -22 + +To QEMU commit 9a4c0e220d8a ("hw/virtio-pci: fix virtio behaviour"). + +It appears that adding ",disable-modern=on,disable-legacy=off" to the +virtio-net -device makes it work again. + +I presume this should really just work out of the box. Any ideas why it +isn't? + +Cheers +James +signature.asc +Description: +Digital signature + +On 03/17/2017 11:57 PM, James Hogan wrote: +Hi, + +I've bisected the following failure of the virtio_net linux v4.10 driver +to probe in QEMU v2.9.0-rc1 emulating a MIPS Malta machine: + +virtio_net virtio0: virtio: device uses modern interface but does not have +VIRTIO_F_VERSION_1 +virtio_net: probe of virtio0 failed with error -22 + +To QEMU commit 9a4c0e220d8a ("hw/virtio-pci: fix virtio behaviour"). + +It appears that adding ",disable-modern=on,disable-legacy=off" to the +virtio-net -device makes it work again. + +I presume this should really just work out of the box. Any ideas why it +isn't? +Hi, + + +This is strange. This commit changes virtio devices from legacy to virtio +"transitional". +(your command line changes it to legacy) +Linux 4.10 supports virtio modern/transitional (as far as I know) and on QEMU +side +there is nothing new. + +Michael, do you have any idea? + +Thanks, +Marcel +Cheers +James + +On Mon, Mar 20, 2017 at 05:21:22PM +0200, Marcel Apfelbaum wrote: +> +On 03/17/2017 11:57 PM, James Hogan wrote: +> +> Hi, +> +> +> +> I've bisected the following failure of the virtio_net linux v4.10 driver +> +> to probe in QEMU v2.9.0-rc1 emulating a MIPS Malta machine: +> +> +> +> virtio_net virtio0: virtio: device uses modern interface but does not have +> +> VIRTIO_F_VERSION_1 +> +> virtio_net: probe of virtio0 failed with error -22 +> +> +> +> To QEMU commit 9a4c0e220d8a ("hw/virtio-pci: fix virtio behaviour"). +> +> +> +> It appears that adding ",disable-modern=on,disable-legacy=off" to the +> +> virtio-net -device makes it work again. +> +> +> +> I presume this should really just work out of the box. Any ideas why it +> +> isn't? +> +> +> +> +Hi, +> +> +> +This is strange. This commit changes virtio devices from legacy to virtio +> +"transitional". +> +(your command line changes it to legacy) +> +Linux 4.10 supports virtio modern/transitional (as far as I know) and on QEMU +> +side +> +there is nothing new. +> +> +Michael, do you have any idea? +> +> +Thanks, +> +Marcel +My guess would be firmware mishandling 64 bit BARs - we saw such +a case on sparc previously. As a result you are probably reading +all zeroes from features register or something like that. +Marcel, could you send a patch making the bar 32 bit? +If that helps we know what the issue is. + +> +> Cheers +> +> James +> +> + +On 03/20/2017 05:43 PM, Michael S. Tsirkin wrote: +On Mon, Mar 20, 2017 at 05:21:22PM +0200, Marcel Apfelbaum wrote: +On 03/17/2017 11:57 PM, James Hogan wrote: +Hi, + +I've bisected the following failure of the virtio_net linux v4.10 driver +to probe in QEMU v2.9.0-rc1 emulating a MIPS Malta machine: + +virtio_net virtio0: virtio: device uses modern interface but does not have +VIRTIO_F_VERSION_1 +virtio_net: probe of virtio0 failed with error -22 + +To QEMU commit 9a4c0e220d8a ("hw/virtio-pci: fix virtio behaviour"). + +It appears that adding ",disable-modern=on,disable-legacy=off" to the +virtio-net -device makes it work again. + +I presume this should really just work out of the box. Any ideas why it +isn't? +Hi, + + +This is strange. This commit changes virtio devices from legacy to virtio +"transitional". +(your command line changes it to legacy) +Linux 4.10 supports virtio modern/transitional (as far as I know) and on QEMU +side +there is nothing new. + +Michael, do you have any idea? + +Thanks, +Marcel +My guess would be firmware mishandling 64 bit BARs - we saw such +a case on sparc previously. As a result you are probably reading +all zeroes from features register or something like that. +Marcel, could you send a patch making the bar 32 bit? +If that helps we know what the issue is. +Sure, + +Thanks, +Marcel +Cheers +James + +On 03/20/2017 05:43 PM, Michael S. Tsirkin wrote: +On Mon, Mar 20, 2017 at 05:21:22PM +0200, Marcel Apfelbaum wrote: +On 03/17/2017 11:57 PM, James Hogan wrote: +Hi, + +I've bisected the following failure of the virtio_net linux v4.10 driver +to probe in QEMU v2.9.0-rc1 emulating a MIPS Malta machine: + +virtio_net virtio0: virtio: device uses modern interface but does not have +VIRTIO_F_VERSION_1 +virtio_net: probe of virtio0 failed with error -22 + +To QEMU commit 9a4c0e220d8a ("hw/virtio-pci: fix virtio behaviour"). + +It appears that adding ",disable-modern=on,disable-legacy=off" to the +virtio-net -device makes it work again. + +I presume this should really just work out of the box. Any ideas why it +isn't? +Hi, + + +This is strange. This commit changes virtio devices from legacy to virtio +"transitional". +(your command line changes it to legacy) +Linux 4.10 supports virtio modern/transitional (as far as I know) and on QEMU +side +there is nothing new. + +Michael, do you have any idea? + +Thanks, +Marcel +My guess would be firmware mishandling 64 bit BARs - we saw such +a case on sparc previously. As a result you are probably reading +all zeroes from features register or something like that. +Marcel, could you send a patch making the bar 32 bit? +If that helps we know what the issue is. +Hi James, + +Can you please check if the below patch fixes the problem? +Please note it is not a solution. + +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index f9b7244..5b4d429 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -1671,9 +1671,7 @@ static void virtio_pci_device_plugged(DeviceState *d, +Error **errp) + } + + pci_register_bar(&proxy->pci_dev, proxy->modern_mem_bar_idx, +- PCI_BASE_ADDRESS_SPACE_MEMORY | +- PCI_BASE_ADDRESS_MEM_PREFETCH | +- PCI_BASE_ADDRESS_MEM_TYPE_64, ++ PCI_BASE_ADDRESS_SPACE_MEMORY, + &proxy->modern_bar); + + proxy->config_cap = virtio_pci_add_mem_cap(proxy, &cfg.cap); + + +Thanks, +Marcel + +Hi Marcel, + +On Tue, Mar 21, 2017 at 04:16:58PM +0200, Marcel Apfelbaum wrote: +> +Can you please check if the below patch fixes the problem? +> +Please note it is not a solution. +> +> +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +> +index f9b7244..5b4d429 100644 +> +--- a/hw/virtio/virtio-pci.c +> ++++ b/hw/virtio/virtio-pci.c +> +@@ -1671,9 +1671,7 @@ static void virtio_pci_device_plugged(DeviceState *d, +> +Error **errp) +> +} +> +> +pci_register_bar(&proxy->pci_dev, proxy->modern_mem_bar_idx, +> +- PCI_BASE_ADDRESS_SPACE_MEMORY | +> +- PCI_BASE_ADDRESS_MEM_PREFETCH | +> +- PCI_BASE_ADDRESS_MEM_TYPE_64, +> ++ PCI_BASE_ADDRESS_SPACE_MEMORY, +> +&proxy->modern_bar); +> +> +proxy->config_cap = virtio_pci_add_mem_cap(proxy, &cfg.cap); +Sorry for the delay trying this, I was away last week. + +No, it doesn't seem to make any difference. + +Thanks +James +signature.asc +Description: +Digital signature + diff --git a/results/classifier/016/debug/16228234 b/results/classifier/016/debug/16228234 new file mode 100644 index 00000000..83a7710c --- /dev/null +++ b/results/classifier/016/debug/16228234 @@ -0,0 +1,1871 @@ +debug: 0.880 +hypervisor: 0.677 +virtual: 0.554 +files: 0.246 +network: 0.159 +PID: 0.075 +TCG: 0.062 +assembly: 0.054 +operating system: 0.049 +device: 0.033 +x86: 0.031 +register: 0.027 +performance: 0.019 +user-level: 0.013 +KVM: 0.011 +ppc: 0.010 +semantic: 0.010 +socket: 0.007 +risc-v: 0.007 +kernel: 0.006 +i386: 0.006 +alpha: 0.005 +architecture: 0.004 +VMM: 0.004 +arm: 0.003 +vnc: 0.003 +permissions: 0.002 +peripherals: 0.002 +graphic: 0.002 +boot: 0.001 +mistranslation: 0.000 + +[Qemu-devel] [Bug?] BQL about live migration + +Hello Juan & Dave, + +We hit a bug in our test: +Network error occurs when migrating a guest, libvirt then rollback the +migration, causes qemu coredump +qemu log: +2017-03-01T12:54:33.904949+08:00|info|qemu[17672]|[33614]|monitor_qapi_event_emit[479]|: + {"timestamp": {"seconds": 1488344073, "microseconds": 904914}, "event": "STOP"} +2017-03-01T12:54:37.522500+08:00|info|qemu[17672]|[17672]|handle_qmp_command[3930]|: + qmp_cmd_name: migrate_cancel +2017-03-01T12:54:37.522607+08:00|info|qemu[17672]|[17672]|monitor_qapi_event_emit[479]|: + {"timestamp": {"seconds": 1488344077, "microseconds": 522556}, "event": +"MIGRATION", "data": {"status": "cancelling"}} +2017-03-01T12:54:37.524671+08:00|info|qemu[17672]|[17672]|handle_qmp_command[3930]|: + qmp_cmd_name: cont +2017-03-01T12:54:37.524733+08:00|info|qemu[17672]|[17672]|virtio_set_status[725]|: + virtio-balloon device status is 7 that means DRIVER OK +2017-03-01T12:54:37.525434+08:00|info|qemu[17672]|[17672]|virtio_set_status[725]|: + virtio-net device status is 7 that means DRIVER OK +2017-03-01T12:54:37.525484+08:00|info|qemu[17672]|[17672]|virtio_set_status[725]|: + virtio-blk device status is 7 that means DRIVER OK +2017-03-01T12:54:37.525562+08:00|info|qemu[17672]|[17672]|virtio_set_status[725]|: + virtio-serial device status is 7 that means DRIVER OK +2017-03-01T12:54:37.527653+08:00|info|qemu[17672]|[17672]|vm_start[981]|: +vm_state-notify:3ms +2017-03-01T12:54:37.528523+08:00|info|qemu[17672]|[17672]|monitor_qapi_event_emit[479]|: + {"timestamp": {"seconds": 1488344077, "microseconds": 527699}, "event": +"RESUME"} +2017-03-01T12:54:37.530680+08:00|info|qemu[17672]|[33614]|migration_bitmap_sync[720]|: + this iteration cycle takes 3s, new dirtied data:0MB +2017-03-01T12:54:37.530909+08:00|info|qemu[17672]|[33614]|monitor_qapi_event_emit[479]|: + {"timestamp": {"seconds": 1488344077, "microseconds": 530733}, "event": +"MIGRATION_PASS", "data": {"pass": 3}} +2017-03-01T04:54:37.530997Z qemu-kvm: socket_writev_buffer: Got err=32 for +(131583/18446744073709551615) +qemu-kvm: /home/abuild/rpmbuild/BUILD/qemu-kvm-2.6.0/hw/net/virtio_net.c:1519: +virtio_net_save: Assertion `!n->vhost_started' failed. +2017-03-01 12:54:43.028: shutting down + +> +From qemu log, qemu received and processed migrate_cancel/cont qmp commands +after guest been stopped and entered the last round of migration. Then +migration thread try to save device state when guest is running(started by +cont command), causes assert and coredump. +This is because in last iter, we call cpu_synchronize_all_states() to +synchronize vcpu states, this call will release qemu_global_mutex and wait +for do_kvm_cpu_synchronize_state() to be executed on target vcpu: +(gdb) bt +#0 0x00007f763d1046d5 in pthread_cond_wait@@GLIBC_2.3.2 () from +/lib64/libpthread.so.0 +#1 0x00007f7643e51d7f in qemu_cond_wait (cond=0x7f764445eca0 <qemu_work_cond>, +mutex=0x7f764445eba0 <qemu_global_mutex>) at util/qemu-thread-posix.c:132 +#2 0x00007f7643a2e154 in run_on_cpu (cpu=0x7f7644e06d80, func=0x7f7643a46413 +<do_kvm_cpu_synchronize_state>, data=0x7f7644e06d80) at +/mnt/public/yanghy/qemu-kvm/cpus.c:995 +#3 0x00007f7643a46487 in kvm_cpu_synchronize_state (cpu=0x7f7644e06d80) at +/mnt/public/yanghy/qemu-kvm/kvm-all.c:1805 +#4 0x00007f7643a2c700 in cpu_synchronize_state (cpu=0x7f7644e06d80) at +/mnt/public/yanghy/qemu-kvm/include/sysemu/kvm.h:457 +#5 0x00007f7643a2db0c in cpu_synchronize_all_states () at +/mnt/public/yanghy/qemu-kvm/cpus.c:766 +#6 0x00007f7643a67b5b in qemu_savevm_state_complete_precopy (f=0x7f76462f2d30, +iterable_only=false) at /mnt/public/yanghy/qemu-kvm/migration/savevm.c:1051 +#7 0x00007f7643d121e9 in migration_completion (s=0x7f76443e78c0 +<current_migration.37571>, current_active_state=4, +old_vm_running=0x7f74343fda00, start_time=0x7f74343fda08) at +migration/migration.c:1753 +#8 0x00007f7643d126c5 in migration_thread (opaque=0x7f76443e78c0 +<current_migration.37571>) at migration/migration.c:1922 +#9 0x00007f763d100dc5 in start_thread () from /lib64/libpthread.so.0 +#10 0x00007f763ce2e71d in clone () from /lib64/libc.so.6 +(gdb) p iothread_locked +$1 = true + +and then, qemu main thread been executed, it won't block because migration +thread released the qemu_global_mutex: +(gdb) thr 1 +[Switching to thread 1 (Thread 0x7fe298e08bc0 (LWP 30767))] +#0 os_host_main_loop_wait (timeout=931565) at main-loop.c:270 +270 QEMU_LOG(LOG_INFO,"***** after qemu_pool_ns: timeout %d\n", +timeout); +(gdb) p iothread_locked +$2 = true +(gdb) l 268 +263 +264 ret = qemu_poll_ns((GPollFD *)gpollfds->data, gpollfds->len, +timeout); +265 +266 +267 if (timeout) { +268 qemu_mutex_lock_iothread(); +269 if (runstate_check(RUN_STATE_FINISH_MIGRATE)) { +270 QEMU_LOG(LOG_INFO,"***** after qemu_pool_ns: timeout %d\n", +timeout); +271 } +272 } +(gdb) + +So, although we've hold iothread_lock in stop© phase of migration, we +can't guarantee the iothread been locked all through the stop & copy phase, +any thoughts on how to solve this problem? + + +Thanks, +-Gonglei + +On Fri, 03/03 09:29, Gonglei (Arei) wrote: +> +Hello Juan & Dave, +> +> +We hit a bug in our test: +> +Network error occurs when migrating a guest, libvirt then rollback the +> +migration, causes qemu coredump +> +qemu log: +> +2017-03-01T12:54:33.904949+08:00|info|qemu[17672]|[33614]|monitor_qapi_event_emit[479]|: +> +{"timestamp": {"seconds": 1488344073, "microseconds": 904914}, "event": +> +"STOP"} +> +2017-03-01T12:54:37.522500+08:00|info|qemu[17672]|[17672]|handle_qmp_command[3930]|: +> +qmp_cmd_name: migrate_cancel +> +2017-03-01T12:54:37.522607+08:00|info|qemu[17672]|[17672]|monitor_qapi_event_emit[479]|: +> +{"timestamp": {"seconds": 1488344077, "microseconds": 522556}, "event": +> +"MIGRATION", "data": {"status": "cancelling"}} +> +2017-03-01T12:54:37.524671+08:00|info|qemu[17672]|[17672]|handle_qmp_command[3930]|: +> +qmp_cmd_name: cont +> +2017-03-01T12:54:37.524733+08:00|info|qemu[17672]|[17672]|virtio_set_status[725]|: +> +virtio-balloon device status is 7 that means DRIVER OK +> +2017-03-01T12:54:37.525434+08:00|info|qemu[17672]|[17672]|virtio_set_status[725]|: +> +virtio-net device status is 7 that means DRIVER OK +> +2017-03-01T12:54:37.525484+08:00|info|qemu[17672]|[17672]|virtio_set_status[725]|: +> +virtio-blk device status is 7 that means DRIVER OK +> +2017-03-01T12:54:37.525562+08:00|info|qemu[17672]|[17672]|virtio_set_status[725]|: +> +virtio-serial device status is 7 that means DRIVER OK +> +2017-03-01T12:54:37.527653+08:00|info|qemu[17672]|[17672]|vm_start[981]|: +> +vm_state-notify:3ms +> +2017-03-01T12:54:37.528523+08:00|info|qemu[17672]|[17672]|monitor_qapi_event_emit[479]|: +> +{"timestamp": {"seconds": 1488344077, "microseconds": 527699}, "event": +> +"RESUME"} +> +2017-03-01T12:54:37.530680+08:00|info|qemu[17672]|[33614]|migration_bitmap_sync[720]|: +> +this iteration cycle takes 3s, new dirtied data:0MB +> +2017-03-01T12:54:37.530909+08:00|info|qemu[17672]|[33614]|monitor_qapi_event_emit[479]|: +> +{"timestamp": {"seconds": 1488344077, "microseconds": 530733}, "event": +> +"MIGRATION_PASS", "data": {"pass": 3}} +> +2017-03-01T04:54:37.530997Z qemu-kvm: socket_writev_buffer: Got err=32 for +> +(131583/18446744073709551615) +> +qemu-kvm: +> +/home/abuild/rpmbuild/BUILD/qemu-kvm-2.6.0/hw/net/virtio_net.c:1519: +> +virtio_net_save: Assertion `!n->vhost_started' failed. +> +2017-03-01 12:54:43.028: shutting down +> +> +From qemu log, qemu received and processed migrate_cancel/cont qmp commands +> +after guest been stopped and entered the last round of migration. Then +> +migration thread try to save device state when guest is running(started by +> +cont command), causes assert and coredump. +> +This is because in last iter, we call cpu_synchronize_all_states() to +> +synchronize vcpu states, this call will release qemu_global_mutex and wait +> +for do_kvm_cpu_synchronize_state() to be executed on target vcpu: +> +(gdb) bt +> +#0 0x00007f763d1046d5 in pthread_cond_wait@@GLIBC_2.3.2 () from +> +/lib64/libpthread.so.0 +> +#1 0x00007f7643e51d7f in qemu_cond_wait (cond=0x7f764445eca0 +> +<qemu_work_cond>, mutex=0x7f764445eba0 <qemu_global_mutex>) at +> +util/qemu-thread-posix.c:132 +> +#2 0x00007f7643a2e154 in run_on_cpu (cpu=0x7f7644e06d80, func=0x7f7643a46413 +> +<do_kvm_cpu_synchronize_state>, data=0x7f7644e06d80) at +> +/mnt/public/yanghy/qemu-kvm/cpus.c:995 +> +#3 0x00007f7643a46487 in kvm_cpu_synchronize_state (cpu=0x7f7644e06d80) at +> +/mnt/public/yanghy/qemu-kvm/kvm-all.c:1805 +> +#4 0x00007f7643a2c700 in cpu_synchronize_state (cpu=0x7f7644e06d80) at +> +/mnt/public/yanghy/qemu-kvm/include/sysemu/kvm.h:457 +> +#5 0x00007f7643a2db0c in cpu_synchronize_all_states () at +> +/mnt/public/yanghy/qemu-kvm/cpus.c:766 +> +#6 0x00007f7643a67b5b in qemu_savevm_state_complete_precopy +> +(f=0x7f76462f2d30, iterable_only=false) at +> +/mnt/public/yanghy/qemu-kvm/migration/savevm.c:1051 +> +#7 0x00007f7643d121e9 in migration_completion (s=0x7f76443e78c0 +> +<current_migration.37571>, current_active_state=4, +> +old_vm_running=0x7f74343fda00, start_time=0x7f74343fda08) at +> +migration/migration.c:1753 +> +#8 0x00007f7643d126c5 in migration_thread (opaque=0x7f76443e78c0 +> +<current_migration.37571>) at migration/migration.c:1922 +> +#9 0x00007f763d100dc5 in start_thread () from /lib64/libpthread.so.0 +> +#10 0x00007f763ce2e71d in clone () from /lib64/libc.so.6 +> +(gdb) p iothread_locked +> +$1 = true +> +> +and then, qemu main thread been executed, it won't block because migration +> +thread released the qemu_global_mutex: +> +(gdb) thr 1 +> +[Switching to thread 1 (Thread 0x7fe298e08bc0 (LWP 30767))] +> +#0 os_host_main_loop_wait (timeout=931565) at main-loop.c:270 +> +270 QEMU_LOG(LOG_INFO,"***** after qemu_pool_ns: timeout +> +%d\n", timeout); +> +(gdb) p iothread_locked +> +$2 = true +> +(gdb) l 268 +> +263 +> +264 ret = qemu_poll_ns((GPollFD *)gpollfds->data, gpollfds->len, +> +timeout); +> +265 +> +266 +> +267 if (timeout) { +> +268 qemu_mutex_lock_iothread(); +> +269 if (runstate_check(RUN_STATE_FINISH_MIGRATE)) { +> +270 QEMU_LOG(LOG_INFO,"***** after qemu_pool_ns: timeout +> +%d\n", timeout); +> +271 } +> +272 } +> +(gdb) +> +> +So, although we've hold iothread_lock in stop© phase of migration, we +> +can't guarantee the iothread been locked all through the stop & copy phase, +> +any thoughts on how to solve this problem? +Could you post a backtrace of the assertion? + +Fam + +On 2017/3/3 18:42, Fam Zheng wrote: +> +On Fri, 03/03 09:29, Gonglei (Arei) wrote: +> +> Hello Juan & Dave, +> +> +> +> We hit a bug in our test: +> +> Network error occurs when migrating a guest, libvirt then rollback the +> +> migration, causes qemu coredump +> +> qemu log: +> +> 2017-03-01T12:54:33.904949+08:00|info|qemu[17672]|[33614]|monitor_qapi_event_emit[479]|: +> +> {"timestamp": {"seconds": 1488344073, "microseconds": 904914}, "event": +> +> "STOP"} +> +> 2017-03-01T12:54:37.522500+08:00|info|qemu[17672]|[17672]|handle_qmp_command[3930]|: +> +> qmp_cmd_name: migrate_cancel +> +> 2017-03-01T12:54:37.522607+08:00|info|qemu[17672]|[17672]|monitor_qapi_event_emit[479]|: +> +> {"timestamp": {"seconds": 1488344077, "microseconds": 522556}, "event": +> +> "MIGRATION", "data": {"status": "cancelling"}} +> +> 2017-03-01T12:54:37.524671+08:00|info|qemu[17672]|[17672]|handle_qmp_command[3930]|: +> +> qmp_cmd_name: cont +> +> 2017-03-01T12:54:37.524733+08:00|info|qemu[17672]|[17672]|virtio_set_status[725]|: +> +> virtio-balloon device status is 7 that means DRIVER OK +> +> 2017-03-01T12:54:37.525434+08:00|info|qemu[17672]|[17672]|virtio_set_status[725]|: +> +> virtio-net device status is 7 that means DRIVER OK +> +> 2017-03-01T12:54:37.525484+08:00|info|qemu[17672]|[17672]|virtio_set_status[725]|: +> +> virtio-blk device status is 7 that means DRIVER OK +> +> 2017-03-01T12:54:37.525562+08:00|info|qemu[17672]|[17672]|virtio_set_status[725]|: +> +> virtio-serial device status is 7 that means DRIVER OK +> +> 2017-03-01T12:54:37.527653+08:00|info|qemu[17672]|[17672]|vm_start[981]|: +> +> vm_state-notify:3ms +> +> 2017-03-01T12:54:37.528523+08:00|info|qemu[17672]|[17672]|monitor_qapi_event_emit[479]|: +> +> {"timestamp": {"seconds": 1488344077, "microseconds": 527699}, "event": +> +> "RESUME"} +> +> 2017-03-01T12:54:37.530680+08:00|info|qemu[17672]|[33614]|migration_bitmap_sync[720]|: +> +> this iteration cycle takes 3s, new dirtied data:0MB +> +> 2017-03-01T12:54:37.530909+08:00|info|qemu[17672]|[33614]|monitor_qapi_event_emit[479]|: +> +> {"timestamp": {"seconds": 1488344077, "microseconds": 530733}, "event": +> +> "MIGRATION_PASS", "data": {"pass": 3}} +> +> 2017-03-01T04:54:37.530997Z qemu-kvm: socket_writev_buffer: Got err=32 for +> +> (131583/18446744073709551615) +> +> qemu-kvm: +> +> /home/abuild/rpmbuild/BUILD/qemu-kvm-2.6.0/hw/net/virtio_net.c:1519: +> +> virtio_net_save: Assertion `!n->vhost_started' failed. +> +> 2017-03-01 12:54:43.028: shutting down +> +> +> +> From qemu log, qemu received and processed migrate_cancel/cont qmp commands +> +> after guest been stopped and entered the last round of migration. Then +> +> migration thread try to save device state when guest is running(started by +> +> cont command), causes assert and coredump. +> +> This is because in last iter, we call cpu_synchronize_all_states() to +> +> synchronize vcpu states, this call will release qemu_global_mutex and wait +> +> for do_kvm_cpu_synchronize_state() to be executed on target vcpu: +> +> (gdb) bt +> +> #0 0x00007f763d1046d5 in pthread_cond_wait@@GLIBC_2.3.2 () from +> +> /lib64/libpthread.so.0 +> +> #1 0x00007f7643e51d7f in qemu_cond_wait (cond=0x7f764445eca0 +> +> <qemu_work_cond>, mutex=0x7f764445eba0 <qemu_global_mutex>) at +> +> util/qemu-thread-posix.c:132 +> +> #2 0x00007f7643a2e154 in run_on_cpu (cpu=0x7f7644e06d80, +> +> func=0x7f7643a46413 <do_kvm_cpu_synchronize_state>, data=0x7f7644e06d80) at +> +> /mnt/public/yanghy/qemu-kvm/cpus.c:995 +> +> #3 0x00007f7643a46487 in kvm_cpu_synchronize_state (cpu=0x7f7644e06d80) at +> +> /mnt/public/yanghy/qemu-kvm/kvm-all.c:1805 +> +> #4 0x00007f7643a2c700 in cpu_synchronize_state (cpu=0x7f7644e06d80) at +> +> /mnt/public/yanghy/qemu-kvm/include/sysemu/kvm.h:457 +> +> #5 0x00007f7643a2db0c in cpu_synchronize_all_states () at +> +> /mnt/public/yanghy/qemu-kvm/cpus.c:766 +> +> #6 0x00007f7643a67b5b in qemu_savevm_state_complete_precopy +> +> (f=0x7f76462f2d30, iterable_only=false) at +> +> /mnt/public/yanghy/qemu-kvm/migration/savevm.c:1051 +> +> #7 0x00007f7643d121e9 in migration_completion (s=0x7f76443e78c0 +> +> <current_migration.37571>, current_active_state=4, +> +> old_vm_running=0x7f74343fda00, start_time=0x7f74343fda08) at +> +> migration/migration.c:1753 +> +> #8 0x00007f7643d126c5 in migration_thread (opaque=0x7f76443e78c0 +> +> <current_migration.37571>) at migration/migration.c:1922 +> +> #9 0x00007f763d100dc5 in start_thread () from /lib64/libpthread.so.0 +> +> #10 0x00007f763ce2e71d in clone () from /lib64/libc.so.6 +> +> (gdb) p iothread_locked +> +> $1 = true +> +> +> +> and then, qemu main thread been executed, it won't block because migration +> +> thread released the qemu_global_mutex: +> +> (gdb) thr 1 +> +> [Switching to thread 1 (Thread 0x7fe298e08bc0 (LWP 30767))] +> +> #0 os_host_main_loop_wait (timeout=931565) at main-loop.c:270 +> +> 270 QEMU_LOG(LOG_INFO,"***** after qemu_pool_ns: timeout +> +> %d\n", timeout); +> +> (gdb) p iothread_locked +> +> $2 = true +> +> (gdb) l 268 +> +> 263 +> +> 264 ret = qemu_poll_ns((GPollFD *)gpollfds->data, gpollfds->len, +> +> timeout); +> +> 265 +> +> 266 +> +> 267 if (timeout) { +> +> 268 qemu_mutex_lock_iothread(); +> +> 269 if (runstate_check(RUN_STATE_FINISH_MIGRATE)) { +> +> 270 QEMU_LOG(LOG_INFO,"***** after qemu_pool_ns: timeout +> +> %d\n", timeout); +> +> 271 } +> +> 272 } +> +> (gdb) +> +> +> +> So, although we've hold iothread_lock in stop© phase of migration, we +> +> can't guarantee the iothread been locked all through the stop & copy phase, +> +> any thoughts on how to solve this problem? +> +> +Could you post a backtrace of the assertion? +#0 0x00007f97b1fbe5d7 in raise () from /usr/lib64/libc.so.6 +#1 0x00007f97b1fbfcc8 in abort () from /usr/lib64/libc.so.6 +#2 0x00007f97b1fb7546 in __assert_fail_base () from /usr/lib64/libc.so.6 +#3 0x00007f97b1fb75f2 in __assert_fail () from /usr/lib64/libc.so.6 +#4 0x000000000049fd19 in virtio_net_save (f=0x7f97a8ca44d0, +opaque=0x7f97a86e9018) at /usr/src/debug/qemu-kvm-2.6.0/hw/ +#5 0x000000000047e380 in vmstate_save_old_style (address@hidden, +address@hidden, se=0x7f9 +#6 0x000000000047fb93 in vmstate_save (address@hidden, address@hidden, +address@hidden +#7 0x0000000000481ad2 in qemu_savevm_state_complete_precopy (f=0x7f97a8ca44d0, +address@hidden) +#8 0x00000000006c6b60 in migration_completion (address@hidden +<current_migration.38312>, current_active_state=curre + address@hidden) at migration/migration.c:1761 +#9 0x00000000006c71db in migration_thread (address@hidden +<current_migration.38312>) at migration/migrati + +> +> +Fam +> +-- +Thanks, +Yang + +* Gonglei (Arei) (address@hidden) wrote: +> +Hello Juan & Dave, +cc'ing in pbonzini since it's magic involving cpu_synrhonize_all_states() + +> +We hit a bug in our test: +> +Network error occurs when migrating a guest, libvirt then rollback the +> +migration, causes qemu coredump +> +qemu log: +> +2017-03-01T12:54:33.904949+08:00|info|qemu[17672]|[33614]|monitor_qapi_event_emit[479]|: +> +{"timestamp": {"seconds": 1488344073, "microseconds": 904914}, "event": +> +"STOP"} +> +2017-03-01T12:54:37.522500+08:00|info|qemu[17672]|[17672]|handle_qmp_command[3930]|: +> +qmp_cmd_name: migrate_cancel +> +2017-03-01T12:54:37.522607+08:00|info|qemu[17672]|[17672]|monitor_qapi_event_emit[479]|: +> +{"timestamp": {"seconds": 1488344077, "microseconds": 522556}, "event": +> +"MIGRATION", "data": {"status": "cancelling"}} +> +2017-03-01T12:54:37.524671+08:00|info|qemu[17672]|[17672]|handle_qmp_command[3930]|: +> +qmp_cmd_name: cont +> +2017-03-01T12:54:37.524733+08:00|info|qemu[17672]|[17672]|virtio_set_status[725]|: +> +virtio-balloon device status is 7 that means DRIVER OK +> +2017-03-01T12:54:37.525434+08:00|info|qemu[17672]|[17672]|virtio_set_status[725]|: +> +virtio-net device status is 7 that means DRIVER OK +> +2017-03-01T12:54:37.525484+08:00|info|qemu[17672]|[17672]|virtio_set_status[725]|: +> +virtio-blk device status is 7 that means DRIVER OK +> +2017-03-01T12:54:37.525562+08:00|info|qemu[17672]|[17672]|virtio_set_status[725]|: +> +virtio-serial device status is 7 that means DRIVER OK +> +2017-03-01T12:54:37.527653+08:00|info|qemu[17672]|[17672]|vm_start[981]|: +> +vm_state-notify:3ms +> +2017-03-01T12:54:37.528523+08:00|info|qemu[17672]|[17672]|monitor_qapi_event_emit[479]|: +> +{"timestamp": {"seconds": 1488344077, "microseconds": 527699}, "event": +> +"RESUME"} +> +2017-03-01T12:54:37.530680+08:00|info|qemu[17672]|[33614]|migration_bitmap_sync[720]|: +> +this iteration cycle takes 3s, new dirtied data:0MB +> +2017-03-01T12:54:37.530909+08:00|info|qemu[17672]|[33614]|monitor_qapi_event_emit[479]|: +> +{"timestamp": {"seconds": 1488344077, "microseconds": 530733}, "event": +> +"MIGRATION_PASS", "data": {"pass": 3}} +> +2017-03-01T04:54:37.530997Z qemu-kvm: socket_writev_buffer: Got err=32 for +> +(131583/18446744073709551615) +> +qemu-kvm: +> +/home/abuild/rpmbuild/BUILD/qemu-kvm-2.6.0/hw/net/virtio_net.c:1519: +> +virtio_net_save: Assertion `!n->vhost_started' failed. +> +2017-03-01 12:54:43.028: shutting down +> +> +From qemu log, qemu received and processed migrate_cancel/cont qmp commands +> +after guest been stopped and entered the last round of migration. Then +> +migration thread try to save device state when guest is running(started by +> +cont command), causes assert and coredump. +> +This is because in last iter, we call cpu_synchronize_all_states() to +> +synchronize vcpu states, this call will release qemu_global_mutex and wait +> +for do_kvm_cpu_synchronize_state() to be executed on target vcpu: +> +(gdb) bt +> +#0 0x00007f763d1046d5 in pthread_cond_wait@@GLIBC_2.3.2 () from +> +/lib64/libpthread.so.0 +> +#1 0x00007f7643e51d7f in qemu_cond_wait (cond=0x7f764445eca0 +> +<qemu_work_cond>, mutex=0x7f764445eba0 <qemu_global_mutex>) at +> +util/qemu-thread-posix.c:132 +> +#2 0x00007f7643a2e154 in run_on_cpu (cpu=0x7f7644e06d80, func=0x7f7643a46413 +> +<do_kvm_cpu_synchronize_state>, data=0x7f7644e06d80) at +> +/mnt/public/yanghy/qemu-kvm/cpus.c:995 +> +#3 0x00007f7643a46487 in kvm_cpu_synchronize_state (cpu=0x7f7644e06d80) at +> +/mnt/public/yanghy/qemu-kvm/kvm-all.c:1805 +> +#4 0x00007f7643a2c700 in cpu_synchronize_state (cpu=0x7f7644e06d80) at +> +/mnt/public/yanghy/qemu-kvm/include/sysemu/kvm.h:457 +> +#5 0x00007f7643a2db0c in cpu_synchronize_all_states () at +> +/mnt/public/yanghy/qemu-kvm/cpus.c:766 +> +#6 0x00007f7643a67b5b in qemu_savevm_state_complete_precopy +> +(f=0x7f76462f2d30, iterable_only=false) at +> +/mnt/public/yanghy/qemu-kvm/migration/savevm.c:1051 +> +#7 0x00007f7643d121e9 in migration_completion (s=0x7f76443e78c0 +> +<current_migration.37571>, current_active_state=4, +> +old_vm_running=0x7f74343fda00, start_time=0x7f74343fda08) at +> +migration/migration.c:1753 +> +#8 0x00007f7643d126c5 in migration_thread (opaque=0x7f76443e78c0 +> +<current_migration.37571>) at migration/migration.c:1922 +> +#9 0x00007f763d100dc5 in start_thread () from /lib64/libpthread.so.0 +> +#10 0x00007f763ce2e71d in clone () from /lib64/libc.so.6 +> +(gdb) p iothread_locked +> +$1 = true +> +> +and then, qemu main thread been executed, it won't block because migration +> +thread released the qemu_global_mutex: +> +(gdb) thr 1 +> +[Switching to thread 1 (Thread 0x7fe298e08bc0 (LWP 30767))] +> +#0 os_host_main_loop_wait (timeout=931565) at main-loop.c:270 +> +270 QEMU_LOG(LOG_INFO,"***** after qemu_pool_ns: timeout +> +%d\n", timeout); +> +(gdb) p iothread_locked +> +$2 = true +> +(gdb) l 268 +> +263 +> +264 ret = qemu_poll_ns((GPollFD *)gpollfds->data, gpollfds->len, +> +timeout); +> +265 +> +266 +> +267 if (timeout) { +> +268 qemu_mutex_lock_iothread(); +> +269 if (runstate_check(RUN_STATE_FINISH_MIGRATE)) { +> +270 QEMU_LOG(LOG_INFO,"***** after qemu_pool_ns: timeout +> +%d\n", timeout); +> +271 } +> +272 } +> +(gdb) +> +> +So, although we've hold iothread_lock in stop© phase of migration, we +> +can't guarantee the iothread been locked all through the stop & copy phase, +> +any thoughts on how to solve this problem? +Ouch that's pretty nasty; I remember Paolo explaining to me a while ago that +their were times when run_on_cpu would have to drop the BQL and I worried about +it, +but this is the 1st time I've seen an error due to it. + +Do you know what the migration state was at that point? Was it +MIGRATION_STATUS_CANCELLING? +I'm thinking perhaps we should stop 'cont' from continuing while migration is in +MIGRATION_STATUS_CANCELLING. Do we send an event when we hit CANCELLED - so +that +perhaps libvirt could avoid sending the 'cont' until then? + +Dave + + +> +> +Thanks, +> +-Gonglei +> +-- +Dr. David Alan Gilbert / address@hidden / Manchester, UK + +On 03/03/2017 13:00, Dr. David Alan Gilbert wrote: +> +Ouch that's pretty nasty; I remember Paolo explaining to me a while ago that +> +their were times when run_on_cpu would have to drop the BQL and I worried +> +about it, +> +but this is the 1st time I've seen an error due to it. +> +> +Do you know what the migration state was at that point? Was it +> +MIGRATION_STATUS_CANCELLING? +> +I'm thinking perhaps we should stop 'cont' from continuing while migration is +> +in +> +MIGRATION_STATUS_CANCELLING. Do we send an event when we hit CANCELLED - so +> +that +> +perhaps libvirt could avoid sending the 'cont' until then? +No, there's no event, though I thought libvirt would poll until +"query-migrate" returns the cancelled state. Of course that is a small +consolation, because a segfault is unacceptable. + +One possibility is to suspend the monitor in qmp_migrate_cancel and +resume it (with add_migration_state_change_notifier) when we hit the +CANCELLED state. I'm not sure what the latency would be between the end +of migrate_fd_cancel and finally reaching CANCELLED. + +Paolo + +* Paolo Bonzini (address@hidden) wrote: +> +> +> +On 03/03/2017 13:00, Dr. David Alan Gilbert wrote: +> +> Ouch that's pretty nasty; I remember Paolo explaining to me a while ago that +> +> their were times when run_on_cpu would have to drop the BQL and I worried +> +> about it, +> +> but this is the 1st time I've seen an error due to it. +> +> +> +> Do you know what the migration state was at that point? Was it +> +> MIGRATION_STATUS_CANCELLING? +> +> I'm thinking perhaps we should stop 'cont' from continuing while migration +> +> is in +> +> MIGRATION_STATUS_CANCELLING. Do we send an event when we hit CANCELLED - +> +> so that +> +> perhaps libvirt could avoid sending the 'cont' until then? +> +> +No, there's no event, though I thought libvirt would poll until +> +"query-migrate" returns the cancelled state. Of course that is a small +> +consolation, because a segfault is unacceptable. +I think you might get an event if you set the new migrate capability called +'events' on! + +void migrate_set_state(int *state, int old_state, int new_state) +{ + if (atomic_cmpxchg(state, old_state, new_state) == old_state) { + trace_migrate_set_state(new_state); + migrate_generate_event(new_state); + } +} + +static void migrate_generate_event(int new_state) +{ + if (migrate_use_events()) { + qapi_event_send_migration(new_state, &error_abort); + } +} + +That event feature went in sometime after 2.3.0. + +> +One possibility is to suspend the monitor in qmp_migrate_cancel and +> +resume it (with add_migration_state_change_notifier) when we hit the +> +CANCELLED state. I'm not sure what the latency would be between the end +> +of migrate_fd_cancel and finally reaching CANCELLED. +I don't like suspending monitors; it can potentially take quite a significant +time to do a cancel. +How about making 'cont' fail if we're in CANCELLING? + +I'd really love to see the 'run_on_cpu' being more careful about the BQL; +we really need all of the rest of the devices to stay quiesced at times. + +Dave + +> +Paolo +-- +Dr. David Alan Gilbert / address@hidden / Manchester, UK + +On 03/03/2017 14:11, Dr. David Alan Gilbert wrote: +> +* Paolo Bonzini (address@hidden) wrote: +> +> +> +> +> +> On 03/03/2017 13:00, Dr. David Alan Gilbert wrote: +> +>> Ouch that's pretty nasty; I remember Paolo explaining to me a while ago that +> +>> their were times when run_on_cpu would have to drop the BQL and I worried +> +>> about it, +> +>> but this is the 1st time I've seen an error due to it. +> +>> +> +>> Do you know what the migration state was at that point? Was it +> +>> MIGRATION_STATUS_CANCELLING? +> +>> I'm thinking perhaps we should stop 'cont' from continuing while migration +> +>> is in +> +>> MIGRATION_STATUS_CANCELLING. Do we send an event when we hit CANCELLED - +> +>> so that +> +>> perhaps libvirt could avoid sending the 'cont' until then? +> +> +> +> No, there's no event, though I thought libvirt would poll until +> +> "query-migrate" returns the cancelled state. Of course that is a small +> +> consolation, because a segfault is unacceptable. +> +> +I think you might get an event if you set the new migrate capability called +> +'events' on! +> +> +void migrate_set_state(int *state, int old_state, int new_state) +> +{ +> +if (atomic_cmpxchg(state, old_state, new_state) == old_state) { +> +trace_migrate_set_state(new_state); +> +migrate_generate_event(new_state); +> +} +> +} +> +> +static void migrate_generate_event(int new_state) +> +{ +> +if (migrate_use_events()) { +> +qapi_event_send_migration(new_state, &error_abort); +> +} +> +} +> +> +That event feature went in sometime after 2.3.0. +> +> +> One possibility is to suspend the monitor in qmp_migrate_cancel and +> +> resume it (with add_migration_state_change_notifier) when we hit the +> +> CANCELLED state. I'm not sure what the latency would be between the end +> +> of migrate_fd_cancel and finally reaching CANCELLED. +> +> +I don't like suspending monitors; it can potentially take quite a significant +> +time to do a cancel. +> +How about making 'cont' fail if we're in CANCELLING? +Actually I thought that would be the case already (in fact CANCELLING is +internal only; the outside world sees it as "active" in query-migrate). + +Lei, what is the runstate? (That is, why did cont succeed at all)? + +Paolo + +> +I'd really love to see the 'run_on_cpu' being more careful about the BQL; +> +we really need all of the rest of the devices to stay quiesced at times. +That's not really possible, because of how condition variables work. :( + +* Paolo Bonzini (address@hidden) wrote: +> +> +> +On 03/03/2017 14:11, Dr. David Alan Gilbert wrote: +> +> * Paolo Bonzini (address@hidden) wrote: +> +>> +> +>> +> +>> On 03/03/2017 13:00, Dr. David Alan Gilbert wrote: +> +>>> Ouch that's pretty nasty; I remember Paolo explaining to me a while ago +> +>>> that +> +>>> their were times when run_on_cpu would have to drop the BQL and I worried +> +>>> about it, +> +>>> but this is the 1st time I've seen an error due to it. +> +>>> +> +>>> Do you know what the migration state was at that point? Was it +> +>>> MIGRATION_STATUS_CANCELLING? +> +>>> I'm thinking perhaps we should stop 'cont' from continuing while +> +>>> migration is in +> +>>> MIGRATION_STATUS_CANCELLING. Do we send an event when we hit CANCELLED - +> +>>> so that +> +>>> perhaps libvirt could avoid sending the 'cont' until then? +> +>> +> +>> No, there's no event, though I thought libvirt would poll until +> +>> "query-migrate" returns the cancelled state. Of course that is a small +> +>> consolation, because a segfault is unacceptable. +> +> +> +> I think you might get an event if you set the new migrate capability called +> +> 'events' on! +> +> +> +> void migrate_set_state(int *state, int old_state, int new_state) +> +> { +> +> if (atomic_cmpxchg(state, old_state, new_state) == old_state) { +> +> trace_migrate_set_state(new_state); +> +> migrate_generate_event(new_state); +> +> } +> +> } +> +> +> +> static void migrate_generate_event(int new_state) +> +> { +> +> if (migrate_use_events()) { +> +> qapi_event_send_migration(new_state, &error_abort); +> +> } +> +> } +> +> +> +> That event feature went in sometime after 2.3.0. +> +> +> +>> One possibility is to suspend the monitor in qmp_migrate_cancel and +> +>> resume it (with add_migration_state_change_notifier) when we hit the +> +>> CANCELLED state. I'm not sure what the latency would be between the end +> +>> of migrate_fd_cancel and finally reaching CANCELLED. +> +> +> +> I don't like suspending monitors; it can potentially take quite a +> +> significant +> +> time to do a cancel. +> +> How about making 'cont' fail if we're in CANCELLING? +> +> +Actually I thought that would be the case already (in fact CANCELLING is +> +internal only; the outside world sees it as "active" in query-migrate). +> +> +Lei, what is the runstate? (That is, why did cont succeed at all)? +I suspect it's RUN_STATE_FINISH_MIGRATE - we set that before we do the device +save, and that's what we get at the end of a migrate and it's legal to restart +from there. + +> +Paolo +> +> +> I'd really love to see the 'run_on_cpu' being more careful about the BQL; +> +> we really need all of the rest of the devices to stay quiesced at times. +> +> +That's not really possible, because of how condition variables work. :( +*Really* we need to find a solution to that - there's probably lots of +other things that can spring up in that small window other than the +'cont'. + +Dave + +-- +Dr. David Alan Gilbert / address@hidden / Manchester, UK + +On 03/03/2017 14:26, Dr. David Alan Gilbert wrote: +> +* Paolo Bonzini (address@hidden) wrote: +> +> +> +> +> +> On 03/03/2017 14:11, Dr. David Alan Gilbert wrote: +> +>> * Paolo Bonzini (address@hidden) wrote: +> +>>> +> +>>> +> +>>> On 03/03/2017 13:00, Dr. David Alan Gilbert wrote: +> +>>>> Ouch that's pretty nasty; I remember Paolo explaining to me a while ago +> +>>>> that +> +>>>> their were times when run_on_cpu would have to drop the BQL and I worried +> +>>>> about it, +> +>>>> but this is the 1st time I've seen an error due to it. +> +>>>> +> +>>>> Do you know what the migration state was at that point? Was it +> +>>>> MIGRATION_STATUS_CANCELLING? +> +>>>> I'm thinking perhaps we should stop 'cont' from continuing while +> +>>>> migration is in +> +>>>> MIGRATION_STATUS_CANCELLING. Do we send an event when we hit CANCELLED - +> +>>>> so that +> +>>>> perhaps libvirt could avoid sending the 'cont' until then? +> +>>> +> +>>> No, there's no event, though I thought libvirt would poll until +> +>>> "query-migrate" returns the cancelled state. Of course that is a small +> +>>> consolation, because a segfault is unacceptable. +> +>> +> +>> I think you might get an event if you set the new migrate capability called +> +>> 'events' on! +> +>> +> +>> void migrate_set_state(int *state, int old_state, int new_state) +> +>> { +> +>> if (atomic_cmpxchg(state, old_state, new_state) == old_state) { +> +>> trace_migrate_set_state(new_state); +> +>> migrate_generate_event(new_state); +> +>> } +> +>> } +> +>> +> +>> static void migrate_generate_event(int new_state) +> +>> { +> +>> if (migrate_use_events()) { +> +>> qapi_event_send_migration(new_state, &error_abort); +> +>> } +> +>> } +> +>> +> +>> That event feature went in sometime after 2.3.0. +> +>> +> +>>> One possibility is to suspend the monitor in qmp_migrate_cancel and +> +>>> resume it (with add_migration_state_change_notifier) when we hit the +> +>>> CANCELLED state. I'm not sure what the latency would be between the end +> +>>> of migrate_fd_cancel and finally reaching CANCELLED. +> +>> +> +>> I don't like suspending monitors; it can potentially take quite a +> +>> significant +> +>> time to do a cancel. +> +>> How about making 'cont' fail if we're in CANCELLING? +> +> +> +> Actually I thought that would be the case already (in fact CANCELLING is +> +> internal only; the outside world sees it as "active" in query-migrate). +> +> +> +> Lei, what is the runstate? (That is, why did cont succeed at all)? +> +> +I suspect it's RUN_STATE_FINISH_MIGRATE - we set that before we do the device +> +save, and that's what we get at the end of a migrate and it's legal to restart +> +from there. +Yeah, but I think we get there at the end of a failed migrate only. So +perhaps we can introduce a new state RUN_STATE_FAILED_MIGRATE and forbid +"cont" from finish-migrate (only allow it from failed-migrate)? + +Paolo + +> +> Paolo +> +> +> +>> I'd really love to see the 'run_on_cpu' being more careful about the BQL; +> +>> we really need all of the rest of the devices to stay quiesced at times. +> +> +> +> That's not really possible, because of how condition variables work. :( +> +> +*Really* we need to find a solution to that - there's probably lots of +> +other things that can spring up in that small window other than the +> +'cont'. +> +> +Dave +> +> +-- +> +Dr. David Alan Gilbert / address@hidden / Manchester, UK +> + +Hi Paolo, + +On Fri, Mar 3, 2017 at 9:33 PM, Paolo Bonzini <address@hidden> wrote: + +> +> +> +On 03/03/2017 14:26, Dr. David Alan Gilbert wrote: +> +> * Paolo Bonzini (address@hidden) wrote: +> +>> +> +>> +> +>> On 03/03/2017 14:11, Dr. David Alan Gilbert wrote: +> +>>> * Paolo Bonzini (address@hidden) wrote: +> +>>>> +> +>>>> +> +>>>> On 03/03/2017 13:00, Dr. David Alan Gilbert wrote: +> +>>>>> Ouch that's pretty nasty; I remember Paolo explaining to me a while +> +ago that +> +>>>>> their were times when run_on_cpu would have to drop the BQL and I +> +worried about it, +> +>>>>> but this is the 1st time I've seen an error due to it. +> +>>>>> +> +>>>>> Do you know what the migration state was at that point? Was it +> +MIGRATION_STATUS_CANCELLING? +> +>>>>> I'm thinking perhaps we should stop 'cont' from continuing while +> +migration is in +> +>>>>> MIGRATION_STATUS_CANCELLING. Do we send an event when we hit +> +CANCELLED - so that +> +>>>>> perhaps libvirt could avoid sending the 'cont' until then? +> +>>>> +> +>>>> No, there's no event, though I thought libvirt would poll until +> +>>>> "query-migrate" returns the cancelled state. Of course that is a +> +small +> +>>>> consolation, because a segfault is unacceptable. +> +>>> +> +>>> I think you might get an event if you set the new migrate capability +> +called +> +>>> 'events' on! +> +>>> +> +>>> void migrate_set_state(int *state, int old_state, int new_state) +> +>>> { +> +>>> if (atomic_cmpxchg(state, old_state, new_state) == old_state) { +> +>>> trace_migrate_set_state(new_state); +> +>>> migrate_generate_event(new_state); +> +>>> } +> +>>> } +> +>>> +> +>>> static void migrate_generate_event(int new_state) +> +>>> { +> +>>> if (migrate_use_events()) { +> +>>> qapi_event_send_migration(new_state, &error_abort); +> +>>> } +> +>>> } +> +>>> +> +>>> That event feature went in sometime after 2.3.0. +> +>>> +> +>>>> One possibility is to suspend the monitor in qmp_migrate_cancel and +> +>>>> resume it (with add_migration_state_change_notifier) when we hit the +> +>>>> CANCELLED state. I'm not sure what the latency would be between the +> +end +> +>>>> of migrate_fd_cancel and finally reaching CANCELLED. +> +>>> +> +>>> I don't like suspending monitors; it can potentially take quite a +> +significant +> +>>> time to do a cancel. +> +>>> How about making 'cont' fail if we're in CANCELLING? +> +>> +> +>> Actually I thought that would be the case already (in fact CANCELLING is +> +>> internal only; the outside world sees it as "active" in query-migrate). +> +>> +> +>> Lei, what is the runstate? (That is, why did cont succeed at all)? +> +> +> +> I suspect it's RUN_STATE_FINISH_MIGRATE - we set that before we do the +> +device +> +> save, and that's what we get at the end of a migrate and it's legal to +> +restart +> +> from there. +> +> +Yeah, but I think we get there at the end of a failed migrate only. So +> +perhaps we can introduce a new state RUN_STATE_FAILED_MIGRATE +I think we do not need to introduce a new state here. If we hit 'cont' and +the run state is RUN_STATE_FINISH_MIGRATE, we could assume that +migration failed because 'RUN_STATE_FINISH_MIGRATE' only exists on +source side, means we are finishing migration, a 'cont' at the meantime +indicates that we are rolling back, otherwise source side should be +destroyed. + + +> +and forbid +> +"cont" from finish-migrate (only allow it from failed-migrate)? +> +The problem of forbid 'cont' here is that it will result in a failed +migration and the source +side will remain paused. We actually expect a usable guest when rollback. +Is there a way to kill migration thread when we're under main thread, if +there is, we +could do the following to solve this problem: +1. 'cont' received during runstate RUN_STATE_FINISH_MIGRATE +2. kill migration thread +3. vm_start() + +But this only solves 'cont' problem. As Dave said before, other things could +happen during the small windows while we are finishing migration, that's +what I was worried about... + + +> +Paolo +> +> +>> Paolo +> +>> +> +>>> I'd really love to see the 'run_on_cpu' being more careful about the +> +BQL; +> +>>> we really need all of the rest of the devices to stay quiesced at +> +times. +> +>> +> +>> That's not really possible, because of how condition variables work. :( +> +> +> +> *Really* we need to find a solution to that - there's probably lots of +> +> other things that can spring up in that small window other than the +> +> 'cont'. +> +> +> +> Dave +> +> +> +> -- +> +> Dr. David Alan Gilbert / address@hidden / Manchester, UK +> +> +> +> + +* Paolo Bonzini (address@hidden) wrote: +> +> +> +On 03/03/2017 14:26, Dr. David Alan Gilbert wrote: +> +> * Paolo Bonzini (address@hidden) wrote: +> +>> +> +>> +> +>> On 03/03/2017 14:11, Dr. David Alan Gilbert wrote: +> +>>> * Paolo Bonzini (address@hidden) wrote: +> +>>>> +> +>>>> +> +>>>> On 03/03/2017 13:00, Dr. David Alan Gilbert wrote: +> +>>>>> Ouch that's pretty nasty; I remember Paolo explaining to me a while ago +> +>>>>> that +> +>>>>> their were times when run_on_cpu would have to drop the BQL and I +> +>>>>> worried about it, +> +>>>>> but this is the 1st time I've seen an error due to it. +> +>>>>> +> +>>>>> Do you know what the migration state was at that point? Was it +> +>>>>> MIGRATION_STATUS_CANCELLING? +> +>>>>> I'm thinking perhaps we should stop 'cont' from continuing while +> +>>>>> migration is in +> +>>>>> MIGRATION_STATUS_CANCELLING. Do we send an event when we hit CANCELLED +> +>>>>> - so that +> +>>>>> perhaps libvirt could avoid sending the 'cont' until then? +> +>>>> +> +>>>> No, there's no event, though I thought libvirt would poll until +> +>>>> "query-migrate" returns the cancelled state. Of course that is a small +> +>>>> consolation, because a segfault is unacceptable. +> +>>> +> +>>> I think you might get an event if you set the new migrate capability +> +>>> called +> +>>> 'events' on! +> +>>> +> +>>> void migrate_set_state(int *state, int old_state, int new_state) +> +>>> { +> +>>> if (atomic_cmpxchg(state, old_state, new_state) == old_state) { +> +>>> trace_migrate_set_state(new_state); +> +>>> migrate_generate_event(new_state); +> +>>> } +> +>>> } +> +>>> +> +>>> static void migrate_generate_event(int new_state) +> +>>> { +> +>>> if (migrate_use_events()) { +> +>>> qapi_event_send_migration(new_state, &error_abort); +> +>>> } +> +>>> } +> +>>> +> +>>> That event feature went in sometime after 2.3.0. +> +>>> +> +>>>> One possibility is to suspend the monitor in qmp_migrate_cancel and +> +>>>> resume it (with add_migration_state_change_notifier) when we hit the +> +>>>> CANCELLED state. I'm not sure what the latency would be between the end +> +>>>> of migrate_fd_cancel and finally reaching CANCELLED. +> +>>> +> +>>> I don't like suspending monitors; it can potentially take quite a +> +>>> significant +> +>>> time to do a cancel. +> +>>> How about making 'cont' fail if we're in CANCELLING? +> +>> +> +>> Actually I thought that would be the case already (in fact CANCELLING is +> +>> internal only; the outside world sees it as "active" in query-migrate). +> +>> +> +>> Lei, what is the runstate? (That is, why did cont succeed at all)? +> +> +> +> I suspect it's RUN_STATE_FINISH_MIGRATE - we set that before we do the +> +> device +> +> save, and that's what we get at the end of a migrate and it's legal to +> +> restart +> +> from there. +> +> +Yeah, but I think we get there at the end of a failed migrate only. So +> +perhaps we can introduce a new state RUN_STATE_FAILED_MIGRATE and forbid +> +"cont" from finish-migrate (only allow it from failed-migrate)? +OK, I was wrong in my previous statement; we actually go +FINISH_MIGRATE->POSTMIGRATE +so no new state is needed; you shouldn't be restarting the cpu in +FINISH_MIGRATE. + +My preference is to get libvirt to wait for the transition to POSTMIGRATE before +it issues the 'cont'. I'd rather not block the monitor with 'cont' but I'm +not sure how we'd cleanly make cont fail without breaking existing libvirts +that usually don't hit this race. (cc'ing in Jiri). + +Dave + +> +Paolo +> +> +>> Paolo +> +>> +> +>>> I'd really love to see the 'run_on_cpu' being more careful about the BQL; +> +>>> we really need all of the rest of the devices to stay quiesced at times. +> +>> +> +>> That's not really possible, because of how condition variables work. :( +> +> +> +> *Really* we need to find a solution to that - there's probably lots of +> +> other things that can spring up in that small window other than the +> +> 'cont'. +> +> +> +> Dave +> +> +> +> -- +> +> Dr. David Alan Gilbert / address@hidden / Manchester, UK +> +> +-- +Dr. David Alan Gilbert / address@hidden / Manchester, UK + +Hi Dave, + +On Fri, Mar 3, 2017 at 9:26 PM, Dr. David Alan Gilbert <address@hidden> +wrote: + +> +* Paolo Bonzini (address@hidden) wrote: +> +> +> +> +> +> On 03/03/2017 14:11, Dr. David Alan Gilbert wrote: +> +> > * Paolo Bonzini (address@hidden) wrote: +> +> >> +> +> >> +> +> >> On 03/03/2017 13:00, Dr. David Alan Gilbert wrote: +> +... +> +> > That event feature went in sometime after 2.3.0. +> +> > +> +> >> One possibility is to suspend the monitor in qmp_migrate_cancel and +> +> >> resume it (with add_migration_state_change_notifier) when we hit the +> +> >> CANCELLED state. I'm not sure what the latency would be between the +> +end +> +> >> of migrate_fd_cancel and finally reaching CANCELLED. +> +> > +> +> > I don't like suspending monitors; it can potentially take quite a +> +significant +> +> > time to do a cancel. +> +> > How about making 'cont' fail if we're in CANCELLING? +> +> +> +> Actually I thought that would be the case already (in fact CANCELLING is +> +> internal only; the outside world sees it as "active" in query-migrate). +> +> +> +> Lei, what is the runstate? (That is, why did cont succeed at all)? +> +> +I suspect it's RUN_STATE_FINISH_MIGRATE - we set that before we do the +> +device +> +It is RUN_STATE_FINISH_MIGRATE. + + +> +save, and that's what we get at the end of a migrate and it's legal to +> +restart +> +from there. +> +> +> Paolo +> +> +> +> > I'd really love to see the 'run_on_cpu' being more careful about the +> +BQL; +> +> > we really need all of the rest of the devices to stay quiesced at +> +times. +> +> +> +> That's not really possible, because of how condition variables work. :( +> +> +*Really* we need to find a solution to that - there's probably lots of +> +other things that can spring up in that small window other than the +> +'cont'. +> +This is what I was worry about. Not only sync_cpu_state() will call +run_on_cpu() +but also vm_stop_force_state() will, both of them did hit the small windows +in our +test. + + +> +> +Dave +> +> +-- +> +Dr. David Alan Gilbert / address@hidden / Manchester, UK +> +> + diff --git a/results/classifier/016/debug/21247035 b/results/classifier/016/debug/21247035 new file mode 100644 index 00000000..ba624398 --- /dev/null +++ b/results/classifier/016/debug/21247035 @@ -0,0 +1,1348 @@ +debug: 0.843 +operating system: 0.163 +hypervisor: 0.116 +files: 0.092 +PID: 0.063 +kernel: 0.062 +TCG: 0.061 +register: 0.035 +performance: 0.027 +user-level: 0.023 +assembly: 0.019 +virtual: 0.019 +device: 0.014 +architecture: 0.007 +semantic: 0.006 +risc-v: 0.004 +socket: 0.003 +peripherals: 0.003 +network: 0.003 +graphic: 0.002 +VMM: 0.002 +boot: 0.001 +permissions: 0.001 +alpha: 0.001 +vnc: 0.001 +KVM: 0.001 +mistranslation: 0.000 +ppc: 0.000 +x86: 0.000 +i386: 0.000 +arm: 0.000 + +[Qemu-devel] [BUG] I/O thread segfault for QEMU on s390x + +Hi, +I have been noticing some segfaults for QEMU on s390x, and I have been +hitting this issue quite reliably (at least once in 10 runs of a test +case). The qemu version is 2.11.50, and I have systemd created coredumps +when this happens. + +Here is a back trace of the segfaulting thread: + + +#0 0x000003ffafed202c in swapcontext () from /lib64/libc.so.6 +#1 0x000002aa355c02ee in qemu_coroutine_new () at +util/coroutine-ucontext.c:164 +#2 0x000002aa355bec34 in qemu_coroutine_create +(address@hidden <blk_aio_read_entry>, +address@hidden) at util/qemu-coroutine.c:76 +#3 0x000002aa35510262 in blk_aio_prwv (blk=0x2aa65fbefa0, +offset=<optimized out>, bytes=<optimized out>, qiov=0x3ffa002a9c0, +address@hidden <blk_aio_read_entry>, flags=0, +cb=0x2aa35340a50 <virtio_blk_rw_complete>, opaque=0x3ffa002a960) at +block/block-backend.c:1299 +#4 0x000002aa35510376 in blk_aio_preadv (blk=<optimized out>, +offset=<optimized out>, qiov=<optimized out>, flags=<optimized out>, +cb=<optimized out>, opaque=0x3ffa002a960) at block/block-backend.c:1392 +#5 0x000002aa3534114e in submit_requests (niov=<optimized out>, +num_reqs=<optimized out>, start=<optimized out>, mrb=<optimized out>, +blk=<optimized out>) at +/usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:372 +#6 virtio_blk_submit_multireq (blk=<optimized out>, +address@hidden) at +/usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:402 +#7 0x000002aa353422e0 in virtio_blk_handle_vq (s=0x2aa6611e7d8, +vq=0x3ffb0f5f010) at /usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:620 +#8 0x000002aa3536655a in virtio_queue_notify_aio_vq +(address@hidden) at +/usr/src/debug/qemu-2.11.50/hw/virtio/virtio.c:1515 +#9 0x000002aa35366cd6 in virtio_queue_notify_aio_vq (vq=0x3ffb0f5f010) +at /usr/src/debug/qemu-2.11.50/hw/virtio/virtio.c:1511 +#10 virtio_queue_host_notifier_aio_poll (opaque=0x3ffb0f5f078) at +/usr/src/debug/qemu-2.11.50/hw/virtio/virtio.c:2409 +#11 0x000002aa355a8ba4 in run_poll_handlers_once +(address@hidden) at util/aio-posix.c:497 +#12 0x000002aa355a9b74 in run_poll_handlers (max_ns=<optimized out>, +ctx=0x2aa65f99310) at util/aio-posix.c:534 +#13 try_poll_mode (blocking=true, ctx=0x2aa65f99310) at util/aio-posix.c:562 +#14 aio_poll (ctx=0x2aa65f99310, address@hidden) at +util/aio-posix.c:602 +#15 0x000002aa353d2d0a in iothread_run (opaque=0x2aa65f990f0) at +iothread.c:60 +#16 0x000003ffb0f07e82 in start_thread () from /lib64/libpthread.so.0 +#17 0x000003ffaff91596 in thread_start () from /lib64/libc.so.6 +I don't have much knowledge about i/o threads and the block layer code +in QEMU, so I would like to report to the community about this issue. +I believe this very similar to the bug that I reported upstream couple +of days ago +( +https://lists.gnu.org/archive/html/qemu-devel/2018-02/msg04452.html +). +Any help would be greatly appreciated. + +Thanks +Farhan + +On Thu, Mar 1, 2018 at 10:33 PM, Farhan Ali <address@hidden> wrote: +> +Hi, +> +> +I have been noticing some segfaults for QEMU on s390x, and I have been +> +hitting this issue quite reliably (at least once in 10 runs of a test case). +> +The qemu version is 2.11.50, and I have systemd created coredumps +> +when this happens. +Can you describe the test case or suggest how to reproduce it for us? + +Fam + +On 03/02/2018 01:13 AM, Fam Zheng wrote: +On Thu, Mar 1, 2018 at 10:33 PM, Farhan Ali <address@hidden> wrote: +Hi, + +I have been noticing some segfaults for QEMU on s390x, and I have been +hitting this issue quite reliably (at least once in 10 runs of a test case). +The qemu version is 2.11.50, and I have systemd created coredumps +when this happens. +Can you describe the test case or suggest how to reproduce it for us? + +Fam +The test case is with a single guest, running a memory intensive +workload. The guest has 8 vpcus and 4G of memory. +Here is the qemu command line, if that helps: + +/usr/bin/qemu-kvm -name guest=sles,debug-threads=on \ +-S -object +secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-2-sles/master-key.aes +\ +-machine s390-ccw-virtio-2.12,accel=kvm,usb=off,dump-guest-core=off \ +-m 4096 -realtime mlock=off -smp 8,sockets=8,cores=1,threads=1 \ +-object iothread,id=iothread1 -object iothread,id=iothread2 -uuid +b83a596b-3a1a-4ac9-9f3e-d9a4032ee52c \ +-display none -no-user-config -nodefaults -chardev +socket,id=charmonitor,path=/var/lib/libvirt/qemu/domain-2-sles/monitor.sock,server,nowait +-mon chardev=charmonitor,id=monitor,mode=control -rtc base=utc +-no-shutdown \ +-boot strict=on -drive +file=/dev/mapper/360050763998b0883980000002400002b,format=raw,if=none,id=drive-virtio-disk0,cache=none,aio=native +-device +virtio-blk-ccw,iothread=iothread1,scsi=off,devno=fe.0.0001,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1 +-drive +file=/dev/mapper/360050763998b0883980000002800002f,format=raw,if=none,id=drive-virtio-disk1,cache=none,aio=native +-device +virtio-blk-ccw,iothread=iothread2,scsi=off,devno=fe.0.0002,drive=drive-virtio-disk1,id=virtio-disk1 +-netdev tap,fd=24,id=hostnet0,vhost=on,vhostfd=26 -device +virtio-net-ccw,netdev=hostnet0,id=net0,mac=02:38:a6:36:e8:1f,devno=fe.0.0000 +-chardev pty,id=charconsole0 -device +sclpconsole,chardev=charconsole0,id=console0 -device +virtio-balloon-ccw,id=balloon0,devno=fe.3.ffba -msg timestamp=on +Please let me know if I need to provide any other information. + +Thanks +Farhan + +On Thu, Mar 01, 2018 at 09:33:35AM -0500, Farhan Ali wrote: +> +Hi, +> +> +I have been noticing some segfaults for QEMU on s390x, and I have been +> +hitting this issue quite reliably (at least once in 10 runs of a test case). +> +The qemu version is 2.11.50, and I have systemd created coredumps +> +when this happens. +> +> +Here is a back trace of the segfaulting thread: +The backtrace looks normal. + +Please post the QEMU command-line and the details of the segfault (which +memory access faulted?). + +> +#0 0x000003ffafed202c in swapcontext () from /lib64/libc.so.6 +> +#1 0x000002aa355c02ee in qemu_coroutine_new () at +> +util/coroutine-ucontext.c:164 +> +#2 0x000002aa355bec34 in qemu_coroutine_create +> +(address@hidden <blk_aio_read_entry>, +> +address@hidden) at util/qemu-coroutine.c:76 +> +#3 0x000002aa35510262 in blk_aio_prwv (blk=0x2aa65fbefa0, offset=<optimized +> +out>, bytes=<optimized out>, qiov=0x3ffa002a9c0, +> +address@hidden <blk_aio_read_entry>, flags=0, +> +cb=0x2aa35340a50 <virtio_blk_rw_complete>, opaque=0x3ffa002a960) at +> +block/block-backend.c:1299 +> +#4 0x000002aa35510376 in blk_aio_preadv (blk=<optimized out>, +> +offset=<optimized out>, qiov=<optimized out>, flags=<optimized out>, +> +cb=<optimized out>, opaque=0x3ffa002a960) at block/block-backend.c:1392 +> +#5 0x000002aa3534114e in submit_requests (niov=<optimized out>, +> +num_reqs=<optimized out>, start=<optimized out>, mrb=<optimized out>, +> +blk=<optimized out>) at +> +/usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:372 +> +#6 virtio_blk_submit_multireq (blk=<optimized out>, +> +address@hidden) at +> +/usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:402 +> +#7 0x000002aa353422e0 in virtio_blk_handle_vq (s=0x2aa6611e7d8, +> +vq=0x3ffb0f5f010) at /usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:620 +> +#8 0x000002aa3536655a in virtio_queue_notify_aio_vq +> +(address@hidden) at +> +/usr/src/debug/qemu-2.11.50/hw/virtio/virtio.c:1515 +> +#9 0x000002aa35366cd6 in virtio_queue_notify_aio_vq (vq=0x3ffb0f5f010) at +> +/usr/src/debug/qemu-2.11.50/hw/virtio/virtio.c:1511 +> +#10 virtio_queue_host_notifier_aio_poll (opaque=0x3ffb0f5f078) at +> +/usr/src/debug/qemu-2.11.50/hw/virtio/virtio.c:2409 +> +#11 0x000002aa355a8ba4 in run_poll_handlers_once +> +(address@hidden) at util/aio-posix.c:497 +> +#12 0x000002aa355a9b74 in run_poll_handlers (max_ns=<optimized out>, +> +ctx=0x2aa65f99310) at util/aio-posix.c:534 +> +#13 try_poll_mode (blocking=true, ctx=0x2aa65f99310) at util/aio-posix.c:562 +> +#14 aio_poll (ctx=0x2aa65f99310, address@hidden) at +> +util/aio-posix.c:602 +> +#15 0x000002aa353d2d0a in iothread_run (opaque=0x2aa65f990f0) at +> +iothread.c:60 +> +#16 0x000003ffb0f07e82 in start_thread () from /lib64/libpthread.so.0 +> +#17 0x000003ffaff91596 in thread_start () from /lib64/libc.so.6 +> +> +> +I don't have much knowledge about i/o threads and the block layer code in +> +QEMU, so I would like to report to the community about this issue. +> +I believe this very similar to the bug that I reported upstream couple of +> +days ago +> +( +https://lists.gnu.org/archive/html/qemu-devel/2018-02/msg04452.html +). +> +> +Any help would be greatly appreciated. +> +> +Thanks +> +Farhan +> +signature.asc +Description: +PGP signature + +On 03/02/2018 04:23 AM, Stefan Hajnoczi wrote: +On Thu, Mar 01, 2018 at 09:33:35AM -0500, Farhan Ali wrote: +Hi, + +I have been noticing some segfaults for QEMU on s390x, and I have been +hitting this issue quite reliably (at least once in 10 runs of a test case). +The qemu version is 2.11.50, and I have systemd created coredumps +when this happens. + +Here is a back trace of the segfaulting thread: +The backtrace looks normal. + +Please post the QEMU command-line and the details of the segfault (which +memory access faulted?). +I was able to create another crash today and here is the qemu comand line + +/usr/bin/qemu-kvm -name guest=sles,debug-threads=on \ +-S -object +secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-2-sles/master-key.aes +\ +-machine s390-ccw-virtio-2.12,accel=kvm,usb=off,dump-guest-core=off \ +-m 4096 -realtime mlock=off -smp 8,sockets=8,cores=1,threads=1 \ +-object iothread,id=iothread1 -object iothread,id=iothread2 -uuid +b83a596b-3a1a-4ac9-9f3e-d9a4032ee52c \ +-display none -no-user-config -nodefaults -chardev +socket,id=charmonitor,path=/var/lib/libvirt/qemu/domain-2-sles/monitor.sock,server,nowait +-mon chardev=charmonitor,id=monitor,mode=control -rtc base=utc +-no-shutdown \ +-boot strict=on -drive +file=/dev/mapper/360050763998b0883980000002400002b,format=raw,if=none,id=drive-virtio-disk0,cache=none,aio=native +-device +virtio-blk-ccw,iothread=iothread1,scsi=off,devno=fe.0.0001,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1 +-drive +file=/dev/mapper/360050763998b0883980000002800002f,format=raw,if=none,id=drive-virtio-disk1,cache=none,aio=native +-device +virtio-blk-ccw,iothread=iothread2,scsi=off,devno=fe.0.0002,drive=drive-virtio-disk1,id=virtio-disk1 +-netdev tap,fd=24,id=hostnet0,vhost=on,vhostfd=26 -device +virtio-net-ccw,netdev=hostnet0,id=net0,mac=02:38:a6:36:e8:1f,devno=fe.0.0000 +-chardev pty,id=charconsole0 -device +sclpconsole,chardev=charconsole0,id=console0 -device +virtio-balloon-ccw,id=balloon0,devno=fe.3.ffba -msg timestamp=on +This the latest back trace on the segfaulting thread, and it seems to +segfault in swapcontext. +Program terminated with signal SIGSEGV, Segmentation fault. +#0 0x000003ff8595202c in swapcontext () from /lib64/libc.so.6 + + +This is the remaining back trace: + +#0 0x000003ff8595202c in swapcontext () from /lib64/libc.so.6 +#1 0x000002aa33b45566 in qemu_coroutine_new () at +util/coroutine-ucontext.c:164 +#2 0x000002aa33b43eac in qemu_coroutine_create +(address@hidden <blk_aio_write_entry>, +address@hidden) at util/qemu-coroutine.c:76 +#3 0x000002aa33a954da in blk_aio_prwv (blk=0x2aa4f0efda0, +offset=<optimized out>, bytes=<optimized out>, qiov=0x3ff74019080, +address@hidden <blk_aio_write_entry>, flags=0, +cb=0x2aa338c62e8 <virtio_blk_rw_complete>, opaque=0x3ff74019020) at +block/block-backend.c:1299 +#4 0x000002aa33a9563e in blk_aio_pwritev (blk=<optimized out>, +offset=<optimized out>, qiov=<optimized out>, flags=<optimized out>, +cb=<optimized out>, opaque=0x3ff74019020) at block/block-backend.c:1400 +#5 0x000002aa338c6a38 in submit_requests (niov=<optimized out>, +num_reqs=1, start=<optimized out>, mrb=0x3ff831fe6e0, blk=<optimized +out>) at /usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:369 +#6 virtio_blk_submit_multireq (blk=<optimized out>, +address@hidden) at +/usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:426 +#7 0x000002aa338c7b78 in virtio_blk_handle_vq (s=0x2aa4f2507c8, +vq=0x3ff869df010) at /usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:620 +#8 0x000002aa338ebdf2 in virtio_queue_notify_aio_vq (vq=0x3ff869df010) +at /usr/src/debug/qemu-2.11.50/hw/virtio/virtio.c:1515 +#9 0x000002aa33b2df46 in aio_dispatch_handlers +(address@hidden) at util/aio-posix.c:406 +#10 0x000002aa33b2eb50 in aio_poll (ctx=0x2aa4f0ca050, +address@hidden) at util/aio-posix.c:692 +#11 0x000002aa33957f6a in iothread_run (opaque=0x2aa4f0c9630) at +iothread.c:60 +#12 0x000003ff86987e82 in start_thread () from /lib64/libpthread.so.0 +#13 0x000003ff85a11596 in thread_start () from /lib64/libc.so.6 +Backtrace stopped: previous frame identical to this frame (corrupt stack?) + +On Fri, Mar 02, 2018 at 10:30:57AM -0500, Farhan Ali wrote: +> +> +> +On 03/02/2018 04:23 AM, Stefan Hajnoczi wrote: +> +> On Thu, Mar 01, 2018 at 09:33:35AM -0500, Farhan Ali wrote: +> +> > Hi, +> +> > +> +> > I have been noticing some segfaults for QEMU on s390x, and I have been +> +> > hitting this issue quite reliably (at least once in 10 runs of a test +> +> > case). +> +> > The qemu version is 2.11.50, and I have systemd created coredumps +> +> > when this happens. +> +> > +> +> > Here is a back trace of the segfaulting thread: +> +> The backtrace looks normal. +> +> +> +> Please post the QEMU command-line and the details of the segfault (which +> +> memory access faulted?). +> +> +> +> +> +I was able to create another crash today and here is the qemu comand line +> +> +/usr/bin/qemu-kvm -name guest=sles,debug-threads=on \ +> +-S -object +> +secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-2-sles/master-key.aes +> +\ +> +-machine s390-ccw-virtio-2.12,accel=kvm,usb=off,dump-guest-core=off \ +> +-m 4096 -realtime mlock=off -smp 8,sockets=8,cores=1,threads=1 \ +> +-object iothread,id=iothread1 -object iothread,id=iothread2 -uuid +> +b83a596b-3a1a-4ac9-9f3e-d9a4032ee52c \ +> +-display none -no-user-config -nodefaults -chardev +> +socket,id=charmonitor,path=/var/lib/libvirt/qemu/domain-2-sles/monitor.sock,server,nowait +> +> +-mon chardev=charmonitor,id=monitor,mode=control -rtc base=utc -no-shutdown +> +\ +> +-boot strict=on -drive +> +file=/dev/mapper/360050763998b0883980000002400002b,format=raw,if=none,id=drive-virtio-disk0,cache=none,aio=native +> +-device +> +virtio-blk-ccw,iothread=iothread1,scsi=off,devno=fe.0.0001,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1 +> +-drive +> +file=/dev/mapper/360050763998b0883980000002800002f,format=raw,if=none,id=drive-virtio-disk1,cache=none,aio=native +> +-device +> +virtio-blk-ccw,iothread=iothread2,scsi=off,devno=fe.0.0002,drive=drive-virtio-disk1,id=virtio-disk1 +> +-netdev tap,fd=24,id=hostnet0,vhost=on,vhostfd=26 -device +> +virtio-net-ccw,netdev=hostnet0,id=net0,mac=02:38:a6:36:e8:1f,devno=fe.0.0000 +> +-chardev pty,id=charconsole0 -device +> +sclpconsole,chardev=charconsole0,id=console0 -device +> +virtio-balloon-ccw,id=balloon0,devno=fe.3.ffba -msg timestamp=on +> +> +> +This the latest back trace on the segfaulting thread, and it seems to +> +segfault in swapcontext. +> +> +Program terminated with signal SIGSEGV, Segmentation fault. +> +#0 0x000003ff8595202c in swapcontext () from /lib64/libc.so.6 +Please include the following gdb output: + + (gdb) disas swapcontext + (gdb) i r + +That way it's possible to see which instruction faulted and which +registers were being accessed. + +> +This is the remaining back trace: +> +> +#0 0x000003ff8595202c in swapcontext () from /lib64/libc.so.6 +> +#1 0x000002aa33b45566 in qemu_coroutine_new () at +> +util/coroutine-ucontext.c:164 +> +#2 0x000002aa33b43eac in qemu_coroutine_create +> +(address@hidden <blk_aio_write_entry>, +> +address@hidden) at util/qemu-coroutine.c:76 +> +#3 0x000002aa33a954da in blk_aio_prwv (blk=0x2aa4f0efda0, offset=<optimized +> +out>, bytes=<optimized out>, qiov=0x3ff74019080, +> +address@hidden <blk_aio_write_entry>, flags=0, +> +cb=0x2aa338c62e8 <virtio_blk_rw_complete>, opaque=0x3ff74019020) at +> +block/block-backend.c:1299 +> +#4 0x000002aa33a9563e in blk_aio_pwritev (blk=<optimized out>, +> +offset=<optimized out>, qiov=<optimized out>, flags=<optimized out>, +> +cb=<optimized out>, opaque=0x3ff74019020) at block/block-backend.c:1400 +> +#5 0x000002aa338c6a38 in submit_requests (niov=<optimized out>, num_reqs=1, +> +start=<optimized out>, mrb=0x3ff831fe6e0, blk=<optimized out>) at +> +/usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:369 +> +#6 virtio_blk_submit_multireq (blk=<optimized out>, +> +address@hidden) at +> +/usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:426 +> +#7 0x000002aa338c7b78 in virtio_blk_handle_vq (s=0x2aa4f2507c8, +> +vq=0x3ff869df010) at /usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:620 +> +#8 0x000002aa338ebdf2 in virtio_queue_notify_aio_vq (vq=0x3ff869df010) at +> +/usr/src/debug/qemu-2.11.50/hw/virtio/virtio.c:1515 +> +#9 0x000002aa33b2df46 in aio_dispatch_handlers +> +(address@hidden) at util/aio-posix.c:406 +> +#10 0x000002aa33b2eb50 in aio_poll (ctx=0x2aa4f0ca050, +> +address@hidden) at util/aio-posix.c:692 +> +#11 0x000002aa33957f6a in iothread_run (opaque=0x2aa4f0c9630) at +> +iothread.c:60 +> +#12 0x000003ff86987e82 in start_thread () from /lib64/libpthread.so.0 +> +#13 0x000003ff85a11596 in thread_start () from /lib64/libc.so.6 +> +Backtrace stopped: previous frame identical to this frame (corrupt stack?) +> +signature.asc +Description: +PGP signature + +On 03/05/2018 06:03 AM, Stefan Hajnoczi wrote: +Please include the following gdb output: + + (gdb) disas swapcontext + (gdb) i r + +That way it's possible to see which instruction faulted and which +registers were being accessed. +here is the disas out for swapcontext, this is on a coredump with +debugging symbols enabled for qemu. So the addresses from the previous +dump is a little different. +(gdb) disas swapcontext +Dump of assembler code for function swapcontext: + 0x000003ff90751fb8 <+0>: lgr %r1,%r2 + 0x000003ff90751fbc <+4>: lgr %r0,%r3 + 0x000003ff90751fc0 <+8>: stfpc 248(%r1) + 0x000003ff90751fc4 <+12>: std %f0,256(%r1) + 0x000003ff90751fc8 <+16>: std %f1,264(%r1) + 0x000003ff90751fcc <+20>: std %f2,272(%r1) + 0x000003ff90751fd0 <+24>: std %f3,280(%r1) + 0x000003ff90751fd4 <+28>: std %f4,288(%r1) + 0x000003ff90751fd8 <+32>: std %f5,296(%r1) + 0x000003ff90751fdc <+36>: std %f6,304(%r1) + 0x000003ff90751fe0 <+40>: std %f7,312(%r1) + 0x000003ff90751fe4 <+44>: std %f8,320(%r1) + 0x000003ff90751fe8 <+48>: std %f9,328(%r1) + 0x000003ff90751fec <+52>: std %f10,336(%r1) + 0x000003ff90751ff0 <+56>: std %f11,344(%r1) + 0x000003ff90751ff4 <+60>: std %f12,352(%r1) + 0x000003ff90751ff8 <+64>: std %f13,360(%r1) + 0x000003ff90751ffc <+68>: std %f14,368(%r1) + 0x000003ff90752000 <+72>: std %f15,376(%r1) + 0x000003ff90752004 <+76>: slgr %r2,%r2 + 0x000003ff90752008 <+80>: stam %a0,%a15,184(%r1) + 0x000003ff9075200c <+84>: stmg %r0,%r15,56(%r1) + 0x000003ff90752012 <+90>: la %r2,2 + 0x000003ff90752016 <+94>: lgr %r5,%r0 + 0x000003ff9075201a <+98>: la %r3,384(%r5) + 0x000003ff9075201e <+102>: la %r4,384(%r1) + 0x000003ff90752022 <+106>: lghi %r5,8 + 0x000003ff90752026 <+110>: svc 175 + 0x000003ff90752028 <+112>: lgr %r5,%r0 +=> 0x000003ff9075202c <+116>: lfpc 248(%r5) + 0x000003ff90752030 <+120>: ld %f0,256(%r5) + 0x000003ff90752034 <+124>: ld %f1,264(%r5) + 0x000003ff90752038 <+128>: ld %f2,272(%r5) + 0x000003ff9075203c <+132>: ld %f3,280(%r5) + 0x000003ff90752040 <+136>: ld %f4,288(%r5) + 0x000003ff90752044 <+140>: ld %f5,296(%r5) + 0x000003ff90752048 <+144>: ld %f6,304(%r5) + 0x000003ff9075204c <+148>: ld %f7,312(%r5) + 0x000003ff90752050 <+152>: ld %f8,320(%r5) + 0x000003ff90752054 <+156>: ld %f9,328(%r5) + 0x000003ff90752058 <+160>: ld %f10,336(%r5) + 0x000003ff9075205c <+164>: ld %f11,344(%r5) + 0x000003ff90752060 <+168>: ld %f12,352(%r5) + 0x000003ff90752064 <+172>: ld %f13,360(%r5) + 0x000003ff90752068 <+176>: ld %f14,368(%r5) + 0x000003ff9075206c <+180>: ld %f15,376(%r5) + 0x000003ff90752070 <+184>: lam %a2,%a15,192(%r5) + 0x000003ff90752074 <+188>: lmg %r0,%r15,56(%r5) + 0x000003ff9075207a <+194>: br %r14 +End of assembler dump. + +(gdb) i r +r0 0x0 0 +r1 0x3ff8fe7de40 4396165881408 +r2 0x0 0 +r3 0x3ff8fe7e1c0 4396165882304 +r4 0x3ff8fe7dfc0 4396165881792 +r5 0x0 0 +r6 0xffffffff88004880 18446744071696304256 +r7 0x3ff880009e0 4396033247712 +r8 0x27ff89000 10736930816 +r9 0x3ff88001460 4396033250400 +r10 0x1000 4096 +r11 0x1261be0 19274720 +r12 0x3ff88001e00 4396033252864 +r13 0x14d0bc0 21826496 +r14 0x1312ac8 19999432 +r15 0x3ff8fe7dc80 4396165880960 +pc 0x3ff9075202c 0x3ff9075202c <swapcontext+116> +cc 0x2 2 + +On 03/05/2018 07:45 PM, Farhan Ali wrote: +> +> +> +On 03/05/2018 06:03 AM, Stefan Hajnoczi wrote: +> +> Please include the following gdb output: +> +> +> +>   (gdb) disas swapcontext +> +>   (gdb) i r +> +> +> +> That way it's possible to see which instruction faulted and which +> +> registers were being accessed. +> +> +> +here is the disas out for swapcontext, this is on a coredump with debugging +> +symbols enabled for qemu. So the addresses from the previous dump is a little +> +different. +> +> +> +(gdb) disas swapcontext +> +Dump of assembler code for function swapcontext: +> +  0x000003ff90751fb8 <+0>:   lgr   %r1,%r2 +> +  0x000003ff90751fbc <+4>:   lgr   %r0,%r3 +> +  0x000003ff90751fc0 <+8>:   stfpc   248(%r1) +> +  0x000003ff90751fc4 <+12>:   std   %f0,256(%r1) +> +  0x000003ff90751fc8 <+16>:   std   %f1,264(%r1) +> +  0x000003ff90751fcc <+20>:   std   %f2,272(%r1) +> +  0x000003ff90751fd0 <+24>:   std   %f3,280(%r1) +> +  0x000003ff90751fd4 <+28>:   std   %f4,288(%r1) +> +  0x000003ff90751fd8 <+32>:   std   %f5,296(%r1) +> +  0x000003ff90751fdc <+36>:   std   %f6,304(%r1) +> +  0x000003ff90751fe0 <+40>:   std   %f7,312(%r1) +> +  0x000003ff90751fe4 <+44>:   std   %f8,320(%r1) +> +  0x000003ff90751fe8 <+48>:   std   %f9,328(%r1) +> +  0x000003ff90751fec <+52>:   std   %f10,336(%r1) +> +  0x000003ff90751ff0 <+56>:   std   %f11,344(%r1) +> +  0x000003ff90751ff4 <+60>:   std   %f12,352(%r1) +> +  0x000003ff90751ff8 <+64>:   std   %f13,360(%r1) +> +  0x000003ff90751ffc <+68>:   std   %f14,368(%r1) +> +  0x000003ff90752000 <+72>:   std   %f15,376(%r1) +> +  0x000003ff90752004 <+76>:   slgr   %r2,%r2 +> +  0x000003ff90752008 <+80>:   stam   %a0,%a15,184(%r1) +> +  0x000003ff9075200c <+84>:   stmg   %r0,%r15,56(%r1) +> +  0x000003ff90752012 <+90>:   la   %r2,2 +> +  0x000003ff90752016 <+94>:   lgr   %r5,%r0 +> +  0x000003ff9075201a <+98>:   la   %r3,384(%r5) +> +  0x000003ff9075201e <+102>:   la   %r4,384(%r1) +> +  0x000003ff90752022 <+106>:   lghi   %r5,8 +> +  0x000003ff90752026 <+110>:   svc   175 +sys_rt_sigprocmask. r0 should not be changed by the system call. + +> +  0x000003ff90752028 <+112>:   lgr   %r5,%r0 +> +=> 0x000003ff9075202c <+116>:   lfpc   248(%r5) +so r5 is zero and it was loaded from r0. r0 was loaded from r3 (which is the +2nd parameter to this +function). Now this is odd. + +> +  0x000003ff90752030 <+120>:   ld   %f0,256(%r5) +> +  0x000003ff90752034 <+124>:   ld   %f1,264(%r5) +> +  0x000003ff90752038 <+128>:   ld   %f2,272(%r5) +> +  0x000003ff9075203c <+132>:   ld   %f3,280(%r5) +> +  0x000003ff90752040 <+136>:   ld   %f4,288(%r5) +> +  0x000003ff90752044 <+140>:   ld   %f5,296(%r5) +> +  0x000003ff90752048 <+144>:   ld   %f6,304(%r5) +> +  0x000003ff9075204c <+148>:   ld   %f7,312(%r5) +> +  0x000003ff90752050 <+152>:   ld   %f8,320(%r5) +> +  0x000003ff90752054 <+156>:   ld   %f9,328(%r5) +> +  0x000003ff90752058 <+160>:   ld   %f10,336(%r5) +> +  0x000003ff9075205c <+164>:   ld   %f11,344(%r5) +> +  0x000003ff90752060 <+168>:   ld   %f12,352(%r5) +> +  0x000003ff90752064 <+172>:   ld   %f13,360(%r5) +> +  0x000003ff90752068 <+176>:   ld   %f14,368(%r5) +> +  0x000003ff9075206c <+180>:   ld   %f15,376(%r5) +> +  0x000003ff90752070 <+184>:   lam   %a2,%a15,192(%r5) +> +  0x000003ff90752074 <+188>:   lmg   %r0,%r15,56(%r5) +> +  0x000003ff9075207a <+194>:   br   %r14 +> +End of assembler dump. +> +> +(gdb) i r +> +r0            0x0   0 +> +r1            0x3ff8fe7de40   4396165881408 +> +r2            0x0   0 +> +r3            0x3ff8fe7e1c0   4396165882304 +> +r4            0x3ff8fe7dfc0   4396165881792 +> +r5            0x0   0 +> +r6            0xffffffff88004880   18446744071696304256 +> +r7            0x3ff880009e0   4396033247712 +> +r8            0x27ff89000   10736930816 +> +r9            0x3ff88001460   4396033250400 +> +r10           0x1000   4096 +> +r11           0x1261be0   19274720 +> +r12           0x3ff88001e00   4396033252864 +> +r13           0x14d0bc0   21826496 +> +r14           0x1312ac8   19999432 +> +r15           0x3ff8fe7dc80   4396165880960 +> +pc            0x3ff9075202c   0x3ff9075202c <swapcontext+116> +> +cc            0x2   2 + +On 5 March 2018 at 18:54, Christian Borntraeger <address@hidden> wrote: +> +> +> +On 03/05/2018 07:45 PM, Farhan Ali wrote: +> +> 0x000003ff90752026 <+110>: svc 175 +> +> +sys_rt_sigprocmask. r0 should not be changed by the system call. +> +> +> 0x000003ff90752028 <+112>: lgr %r5,%r0 +> +> => 0x000003ff9075202c <+116>: lfpc 248(%r5) +> +> +so r5 is zero and it was loaded from r0. r0 was loaded from r3 (which is the +> +2nd parameter to this +> +function). Now this is odd. +...particularly given that the only place we call swapcontext() +the second parameter is always the address of a local variable +and can't be 0... + +thanks +-- PMM + +Do you happen to run with a recent host kernel that has + +commit 7041d28115e91f2144f811ffe8a195c696b1e1d0 + s390: scrub registers on kernel entry and KVM exit + + + + + +Can you run with this on top +diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S +index 13a133a6015c..d6dc0e5e8f74 100644 +--- a/arch/s390/kernel/entry.S ++++ b/arch/s390/kernel/entry.S +@@ -426,13 +426,13 @@ ENTRY(system_call) + UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER + BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP + stmg %r0,%r7,__PT_R0(%r11) +- # clear user controlled register to prevent speculative use +- xgr %r0,%r0 + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC + mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC + stg %r14,__PT_FLAGS(%r11) + .Lsysc_do_svc: ++ # clear user controlled register to prevent speculative use ++ xgr %r0,%r0 + # load address of system call table + lg %r10,__THREAD_sysc_table(%r13,%r12) + llgh %r8,__PT_INT_CODE+2(%r11) + + +To me it looks like that the critical section cleanup (interrupt during system +call entry) might +save the registers again into ptregs but we have already zeroed out r0. +This patch moves the clearing of r0 after sysc_do_svc, which should fix the +critical +section cleanup. + +Adding Martin and Heiko. Will spin a patch. + + +On 03/05/2018 07:54 PM, Christian Borntraeger wrote: +> +> +> +On 03/05/2018 07:45 PM, Farhan Ali wrote: +> +> +> +> +> +> On 03/05/2018 06:03 AM, Stefan Hajnoczi wrote: +> +>> Please include the following gdb output: +> +>> +> +>>   (gdb) disas swapcontext +> +>>   (gdb) i r +> +>> +> +>> That way it's possible to see which instruction faulted and which +> +>> registers were being accessed. +> +> +> +> +> +> here is the disas out for swapcontext, this is on a coredump with debugging +> +> symbols enabled for qemu. So the addresses from the previous dump is a +> +> little different. +> +> +> +> +> +> (gdb) disas swapcontext +> +> Dump of assembler code for function swapcontext: +> +>   0x000003ff90751fb8 <+0>:   lgr   %r1,%r2 +> +>   0x000003ff90751fbc <+4>:   lgr   %r0,%r3 +> +>   0x000003ff90751fc0 <+8>:   stfpc   248(%r1) +> +>   0x000003ff90751fc4 <+12>:   std   %f0,256(%r1) +> +>   0x000003ff90751fc8 <+16>:   std   %f1,264(%r1) +> +>   0x000003ff90751fcc <+20>:   std   %f2,272(%r1) +> +>   0x000003ff90751fd0 <+24>:   std   %f3,280(%r1) +> +>   0x000003ff90751fd4 <+28>:   std   %f4,288(%r1) +> +>   0x000003ff90751fd8 <+32>:   std   %f5,296(%r1) +> +>   0x000003ff90751fdc <+36>:   std   %f6,304(%r1) +> +>   0x000003ff90751fe0 <+40>:   std   %f7,312(%r1) +> +>   0x000003ff90751fe4 <+44>:   std   %f8,320(%r1) +> +>   0x000003ff90751fe8 <+48>:   std   %f9,328(%r1) +> +>   0x000003ff90751fec <+52>:   std   %f10,336(%r1) +> +>   0x000003ff90751ff0 <+56>:   std   %f11,344(%r1) +> +>   0x000003ff90751ff4 <+60>:   std   %f12,352(%r1) +> +>   0x000003ff90751ff8 <+64>:   std   %f13,360(%r1) +> +>   0x000003ff90751ffc <+68>:   std   %f14,368(%r1) +> +>   0x000003ff90752000 <+72>:   std   %f15,376(%r1) +> +>   0x000003ff90752004 <+76>:   slgr   %r2,%r2 +> +>   0x000003ff90752008 <+80>:   stam   %a0,%a15,184(%r1) +> +>   0x000003ff9075200c <+84>:   stmg   %r0,%r15,56(%r1) +> +>   0x000003ff90752012 <+90>:   la   %r2,2 +> +>   0x000003ff90752016 <+94>:   lgr   %r5,%r0 +> +>   0x000003ff9075201a <+98>:   la   %r3,384(%r5) +> +>   0x000003ff9075201e <+102>:   la   %r4,384(%r1) +> +>   0x000003ff90752022 <+106>:   lghi   %r5,8 +> +>   0x000003ff90752026 <+110>:   svc   175 +> +> +sys_rt_sigprocmask. r0 should not be changed by the system call. +> +> +>   0x000003ff90752028 <+112>:   lgr   %r5,%r0 +> +> => 0x000003ff9075202c <+116>:   lfpc   248(%r5) +> +> +so r5 is zero and it was loaded from r0. r0 was loaded from r3 (which is the +> +2nd parameter to this +> +function). Now this is odd. +> +> +>   0x000003ff90752030 <+120>:   ld   %f0,256(%r5) +> +>   0x000003ff90752034 <+124>:   ld   %f1,264(%r5) +> +>   0x000003ff90752038 <+128>:   ld   %f2,272(%r5) +> +>   0x000003ff9075203c <+132>:   ld   %f3,280(%r5) +> +>   0x000003ff90752040 <+136>:   ld   %f4,288(%r5) +> +>   0x000003ff90752044 <+140>:   ld   %f5,296(%r5) +> +>   0x000003ff90752048 <+144>:   ld   %f6,304(%r5) +> +>   0x000003ff9075204c <+148>:   ld   %f7,312(%r5) +> +>   0x000003ff90752050 <+152>:   ld   %f8,320(%r5) +> +>   0x000003ff90752054 <+156>:   ld   %f9,328(%r5) +> +>   0x000003ff90752058 <+160>:   ld   %f10,336(%r5) +> +>   0x000003ff9075205c <+164>:   ld   %f11,344(%r5) +> +>   0x000003ff90752060 <+168>:   ld   %f12,352(%r5) +> +>   0x000003ff90752064 <+172>:   ld   %f13,360(%r5) +> +>   0x000003ff90752068 <+176>:   ld   %f14,368(%r5) +> +>   0x000003ff9075206c <+180>:   ld   %f15,376(%r5) +> +>   0x000003ff90752070 <+184>:   lam   %a2,%a15,192(%r5) +> +>   0x000003ff90752074 <+188>:   lmg   %r0,%r15,56(%r5) +> +>   0x000003ff9075207a <+194>:   br   %r14 +> +> End of assembler dump. +> +> +> +> (gdb) i r +> +> r0            0x0   0 +> +> r1            0x3ff8fe7de40   4396165881408 +> +> r2            0x0   0 +> +> r3            0x3ff8fe7e1c0   4396165882304 +> +> r4            0x3ff8fe7dfc0   4396165881792 +> +> r5            0x0   0 +> +> r6            0xffffffff88004880   18446744071696304256 +> +> r7            0x3ff880009e0   4396033247712 +> +> r8            0x27ff89000   10736930816 +> +> r9            0x3ff88001460   4396033250400 +> +> r10           0x1000   4096 +> +> r11           0x1261be0   19274720 +> +> r12           0x3ff88001e00   4396033252864 +> +> r13           0x14d0bc0   21826496 +> +> r14           0x1312ac8   19999432 +> +> r15           0x3ff8fe7dc80   4396165880960 +> +> pc            0x3ff9075202c   0x3ff9075202c <swapcontext+116> +> +> cc            0x2   2 + +On 03/05/2018 02:08 PM, Christian Borntraeger wrote: +Do you happen to run with a recent host kernel that has + +commit 7041d28115e91f2144f811ffe8a195c696b1e1d0 + s390: scrub registers on kernel entry and KVM exit +Yes. +Can you run with this on top +diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S +index 13a133a6015c..d6dc0e5e8f74 100644 +--- a/arch/s390/kernel/entry.S ++++ b/arch/s390/kernel/entry.S +@@ -426,13 +426,13 @@ ENTRY(system_call) + UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER + BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP + stmg %r0,%r7,__PT_R0(%r11) +- # clear user controlled register to prevent speculative use +- xgr %r0,%r0 + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC + mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC + stg %r14,__PT_FLAGS(%r11) + .Lsysc_do_svc: ++ # clear user controlled register to prevent speculative use ++ xgr %r0,%r0 + # load address of system call table + lg %r10,__THREAD_sysc_table(%r13,%r12) + llgh %r8,__PT_INT_CODE+2(%r11) + + +To me it looks like that the critical section cleanup (interrupt during system +call entry) might +save the registers again into ptregs but we have already zeroed out r0. +This patch moves the clearing of r0 after sysc_do_svc, which should fix the +critical +section cleanup. +Okay I will run with this. +Adding Martin and Heiko. Will spin a patch. + + +On 03/05/2018 07:54 PM, Christian Borntraeger wrote: +On 03/05/2018 07:45 PM, Farhan Ali wrote: +On 03/05/2018 06:03 AM, Stefan Hajnoczi wrote: +Please include the following gdb output: + +   (gdb) disas swapcontext +   (gdb) i r + +That way it's possible to see which instruction faulted and which +registers were being accessed. +here is the disas out for swapcontext, this is on a coredump with debugging +symbols enabled for qemu. So the addresses from the previous dump is a little +different. + + +(gdb) disas swapcontext +Dump of assembler code for function swapcontext: +   0x000003ff90751fb8 <+0>:   lgr   %r1,%r2 +   0x000003ff90751fbc <+4>:   lgr   %r0,%r3 +   0x000003ff90751fc0 <+8>:   stfpc   248(%r1) +   0x000003ff90751fc4 <+12>:   std   %f0,256(%r1) +   0x000003ff90751fc8 <+16>:   std   %f1,264(%r1) +   0x000003ff90751fcc <+20>:   std   %f2,272(%r1) +   0x000003ff90751fd0 <+24>:   std   %f3,280(%r1) +   0x000003ff90751fd4 <+28>:   std   %f4,288(%r1) +   0x000003ff90751fd8 <+32>:   std   %f5,296(%r1) +   0x000003ff90751fdc <+36>:   std   %f6,304(%r1) +   0x000003ff90751fe0 <+40>:   std   %f7,312(%r1) +   0x000003ff90751fe4 <+44>:   std   %f8,320(%r1) +   0x000003ff90751fe8 <+48>:   std   %f9,328(%r1) +   0x000003ff90751fec <+52>:   std   %f10,336(%r1) +   0x000003ff90751ff0 <+56>:   std   %f11,344(%r1) +   0x000003ff90751ff4 <+60>:   std   %f12,352(%r1) +   0x000003ff90751ff8 <+64>:   std   %f13,360(%r1) +   0x000003ff90751ffc <+68>:   std   %f14,368(%r1) +   0x000003ff90752000 <+72>:   std   %f15,376(%r1) +   0x000003ff90752004 <+76>:   slgr   %r2,%r2 +   0x000003ff90752008 <+80>:   stam   %a0,%a15,184(%r1) +   0x000003ff9075200c <+84>:   stmg   %r0,%r15,56(%r1) +   0x000003ff90752012 <+90>:   la   %r2,2 +   0x000003ff90752016 <+94>:   lgr   %r5,%r0 +   0x000003ff9075201a <+98>:   la   %r3,384(%r5) +   0x000003ff9075201e <+102>:   la   %r4,384(%r1) +   0x000003ff90752022 <+106>:   lghi   %r5,8 +   0x000003ff90752026 <+110>:   svc   175 +sys_rt_sigprocmask. r0 should not be changed by the system call. +  0x000003ff90752028 <+112>:   lgr   %r5,%r0 +=> 0x000003ff9075202c <+116>:   lfpc   248(%r5) +so r5 is zero and it was loaded from r0. r0 was loaded from r3 (which is the +2nd parameter to this +function). Now this is odd. +  0x000003ff90752030 <+120>:   ld   %f0,256(%r5) +   0x000003ff90752034 <+124>:   ld   %f1,264(%r5) +   0x000003ff90752038 <+128>:   ld   %f2,272(%r5) +   0x000003ff9075203c <+132>:   ld   %f3,280(%r5) +   0x000003ff90752040 <+136>:   ld   %f4,288(%r5) +   0x000003ff90752044 <+140>:   ld   %f5,296(%r5) +   0x000003ff90752048 <+144>:   ld   %f6,304(%r5) +   0x000003ff9075204c <+148>:   ld   %f7,312(%r5) +   0x000003ff90752050 <+152>:   ld   %f8,320(%r5) +   0x000003ff90752054 <+156>:   ld   %f9,328(%r5) +   0x000003ff90752058 <+160>:   ld   %f10,336(%r5) +   0x000003ff9075205c <+164>:   ld   %f11,344(%r5) +   0x000003ff90752060 <+168>:   ld   %f12,352(%r5) +   0x000003ff90752064 <+172>:   ld   %f13,360(%r5) +   0x000003ff90752068 <+176>:   ld   %f14,368(%r5) +   0x000003ff9075206c <+180>:   ld   %f15,376(%r5) +   0x000003ff90752070 <+184>:   lam   %a2,%a15,192(%r5) +   0x000003ff90752074 <+188>:   lmg   %r0,%r15,56(%r5) +   0x000003ff9075207a <+194>:   br   %r14 +End of assembler dump. + +(gdb) i r +r0            0x0   0 +r1            0x3ff8fe7de40   4396165881408 +r2            0x0   0 +r3            0x3ff8fe7e1c0   4396165882304 +r4            0x3ff8fe7dfc0   4396165881792 +r5            0x0   0 +r6            0xffffffff88004880   18446744071696304256 +r7            0x3ff880009e0   4396033247712 +r8            0x27ff89000   10736930816 +r9            0x3ff88001460   4396033250400 +r10           0x1000   4096 +r11           0x1261be0   19274720 +r12           0x3ff88001e00   4396033252864 +r13           0x14d0bc0   21826496 +r14           0x1312ac8   19999432 +r15           0x3ff8fe7dc80   4396165880960 +pc            0x3ff9075202c   0x3ff9075202c <swapcontext+116> +cc            0x2   2 + +On Mon, 5 Mar 2018 20:08:45 +0100 +Christian Borntraeger <address@hidden> wrote: + +> +Do you happen to run with a recent host kernel that has +> +> +commit 7041d28115e91f2144f811ffe8a195c696b1e1d0 +> +s390: scrub registers on kernel entry and KVM exit +> +> +Can you run with this on top +> +diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S +> +index 13a133a6015c..d6dc0e5e8f74 100644 +> +--- a/arch/s390/kernel/entry.S +> ++++ b/arch/s390/kernel/entry.S +> +@@ -426,13 +426,13 @@ ENTRY(system_call) +> +UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER +> +BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP +> +stmg %r0,%r7,__PT_R0(%r11) +> +- # clear user controlled register to prevent speculative use +> +- xgr %r0,%r0 +> +mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC +> +mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW +> +mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC +> +stg %r14,__PT_FLAGS(%r11) +> +.Lsysc_do_svc: +> ++ # clear user controlled register to prevent speculative use +> ++ xgr %r0,%r0 +> +# load address of system call table +> +lg %r10,__THREAD_sysc_table(%r13,%r12) +> +llgh %r8,__PT_INT_CODE+2(%r11) +> +> +> +To me it looks like that the critical section cleanup (interrupt during +> +system call entry) might +> +save the registers again into ptregs but we have already zeroed out r0. +> +This patch moves the clearing of r0 after sysc_do_svc, which should fix the +> +critical +> +section cleanup. +> +> +Adding Martin and Heiko. Will spin a patch. +Argh, yes. Thanks Chrisitan, this is it. I have been searching for the bug +for days now. The point is that if the system call handler is interrupted +after the xgr but before .Lsysc_do_svc the code at .Lcleanup_system_call +repeats the stmg for %r0-%r7 but now %r0 is already zero. + +Please commit a patch for this and I'll will queue it up immediately. + +-- +blue skies, + Martin. + +"Reality continues to ruin my life." - Calvin. + +On 03/06/2018 01:34 AM, Martin Schwidefsky wrote: +On Mon, 5 Mar 2018 20:08:45 +0100 +Christian Borntraeger <address@hidden> wrote: +Do you happen to run with a recent host kernel that has + +commit 7041d28115e91f2144f811ffe8a195c696b1e1d0 + s390: scrub registers on kernel entry and KVM exit + +Can you run with this on top +diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S +index 13a133a6015c..d6dc0e5e8f74 100644 +--- a/arch/s390/kernel/entry.S ++++ b/arch/s390/kernel/entry.S +@@ -426,13 +426,13 @@ ENTRY(system_call) + UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER + BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP + stmg %r0,%r7,__PT_R0(%r11) +- # clear user controlled register to prevent speculative use +- xgr %r0,%r0 + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC + mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW + mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC + stg %r14,__PT_FLAGS(%r11) + .Lsysc_do_svc: ++ # clear user controlled register to prevent speculative use ++ xgr %r0,%r0 + # load address of system call table + lg %r10,__THREAD_sysc_table(%r13,%r12) + llgh %r8,__PT_INT_CODE+2(%r11) + + +To me it looks like that the critical section cleanup (interrupt during system +call entry) might +save the registers again into ptregs but we have already zeroed out r0. +This patch moves the clearing of r0 after sysc_do_svc, which should fix the +critical +section cleanup. + +Adding Martin and Heiko. Will spin a patch. +Argh, yes. Thanks Chrisitan, this is it. I have been searching for the bug +for days now. The point is that if the system call handler is interrupted +after the xgr but before .Lsysc_do_svc the code at .Lcleanup_system_call +repeats the stmg for %r0-%r7 but now %r0 is already zero. + +Please commit a patch for this and I'll will queue it up immediately. +This patch does fix the QEMU crash. I haven't seen the crash after +running the test case for more than a day. Thanks to everyone for taking +a look at this problem :) +Thanks +Farhan + diff --git a/results/classifier/016/debug/22219210 b/results/classifier/016/debug/22219210 new file mode 100644 index 00000000..d1e1b0fb --- /dev/null +++ b/results/classifier/016/debug/22219210 @@ -0,0 +1,70 @@ +x86: 0.996 +debug: 0.965 +virtual: 0.932 +user-level: 0.812 +TCG: 0.627 +network: 0.538 +operating system: 0.235 +hypervisor: 0.090 +i386: 0.061 +PID: 0.051 +register: 0.037 +VMM: 0.034 +performance: 0.029 +architecture: 0.023 +device: 0.023 +socket: 0.021 +files: 0.019 +kernel: 0.017 +assembly: 0.009 +peripherals: 0.007 +risc-v: 0.006 +semantic: 0.005 +boot: 0.004 +graphic: 0.004 +alpha: 0.003 +KVM: 0.003 +permissions: 0.002 +ppc: 0.002 +vnc: 0.002 +mistranslation: 0.000 +arm: 0.000 + +[BUG][CPU hot-plug]CPU hot-plugs cause the qemu process to coredump + +Hello,Recently, when I was developing CPU hot-plugs under the loongarch +architecture, +I found that there was a problem with qemu cpu hot-plugs under x86 +architecture, +which caused the qemu process coredump when repeatedly inserting and +unplugging +the CPU when the TCG was accelerated. + + +The specific operation process is as follows: + +1.Use the following command to start the virtual machine + +qemu-system-x86_64 \ +-machine q35 \ +-cpu Broadwell-IBRS \ +-smp 1,maxcpus=4,sockets=4,cores=1,threads=1 \ +-m 4G \ +-drive file=~/anolis-8.8.qcow2 \ +-serial stdio  \ +-monitor telnet:localhost:4498,server,nowait + + +2.Enter QEMU Monitor via telnet for repeated CPU insertion and unplugging + +telnet 127.0.0.1 4498 +(qemu) device_add +Broadwell-IBRS-x86_64-cpu,socket-id=1,core-id=0,thread-id=0,id=cpu1 +(qemu) device_del cpu1 +(qemu) device_add +Broadwell-IBRS-x86_64-cpu,socket-id=1,core-id=0,thread-id=0,id=cpu1 +3.You will notice that the QEMU process has a coredump + +# malloc(): unsorted double linked list corrupted +Aborted (core dumped) + diff --git a/results/classifier/016/debug/23270873 b/results/classifier/016/debug/23270873 new file mode 100644 index 00000000..b11ef1d0 --- /dev/null +++ b/results/classifier/016/debug/23270873 @@ -0,0 +1,719 @@ +debug: 0.890 +operating system: 0.576 +hypervisor: 0.219 +virtual: 0.203 +boot: 0.131 +kernel: 0.070 +files: 0.068 +TCG: 0.042 +performance: 0.031 +PID: 0.019 +register: 0.018 +semantic: 0.016 +VMM: 0.014 +user-level: 0.012 +device: 0.006 +architecture: 0.005 +risc-v: 0.004 +assembly: 0.004 +network: 0.003 +socket: 0.003 +peripherals: 0.002 +KVM: 0.002 +vnc: 0.002 +alpha: 0.001 +graphic: 0.001 +permissions: 0.001 +mistranslation: 0.001 +x86: 0.000 +ppc: 0.000 +i386: 0.000 +arm: 0.000 + +[Qemu-devel] [BUG?] aio_get_linux_aio: Assertion `ctx->linux_aio' failed + +Hi, + +I am seeing some strange QEMU assertion failures for qemu on s390x, +which prevents a guest from starting. + +Git bisecting points to the following commit as the source of the error. + +commit ed6e2161715c527330f936d44af4c547f25f687e +Author: Nishanth Aravamudan <address@hidden> +Date: Fri Jun 22 12:37:00 2018 -0700 + + linux-aio: properly bubble up errors from initialization + + laio_init() can fail for a couple of reasons, which will lead to a NULL + pointer dereference in laio_attach_aio_context(). + + To solve this, add a aio_setup_linux_aio() function which is called + early in raw_open_common. If this fails, propagate the error up. The + signature of aio_get_linux_aio() was not modified, because it seems + preferable to return the actual errno from the possible failing + initialization calls. + + Additionally, when the AioContext changes, we need to associate a + LinuxAioState with the new AioContext. Use the bdrv_attach_aio_context + callback and call the new aio_setup_linux_aio(), which will allocate a +new AioContext if needed, and return errors on failures. If it +fails for +any reason, fallback to threaded AIO with an error message, as the + device is already in-use by the guest. + + Add an assert that aio_get_linux_aio() cannot return NULL. + + Signed-off-by: Nishanth Aravamudan <address@hidden> + Message-id: address@hidden + Signed-off-by: Stefan Hajnoczi <address@hidden> +Not sure what is causing this assertion to fail. Here is the qemu +command line of the guest, from qemu log, which throws this error: +LC_ALL=C PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin +QEMU_AUDIO_DRV=none /usr/local/bin/qemu-system-s390x -name +guest=rt_vm1,debug-threads=on -S -object +secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-21-rt_vm1/master-key.aes +-machine s390-ccw-virtio-2.12,accel=kvm,usb=off,dump-guest-core=off -m +1024 -realtime mlock=off -smp 4,sockets=4,cores=1,threads=1 -object +iothread,id=iothread1 -uuid 0cde16cd-091d-41bd-9ac2-5243df5c9a0d +-display none -no-user-config -nodefaults -chardev +socket,id=charmonitor,fd=28,server,nowait -mon +chardev=charmonitor,id=monitor,mode=control -rtc base=utc -no-shutdown +-boot strict=on -drive +file=/dev/mapper/360050763998b0883980000002a000031,format=raw,if=none,id=drive-virtio-disk0,cache=none,aio=native +-device +virtio-blk-ccw,iothread=iothread1,scsi=off,devno=fe.0.0001,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1,write-cache=on +-netdev tap,fd=30,id=hostnet0,vhost=on,vhostfd=31 -device +virtio-net-ccw,netdev=hostnet0,id=net0,mac=02:3a:c8:67:95:84,devno=fe.0.0000 +-netdev tap,fd=32,id=hostnet1,vhost=on,vhostfd=33 -device +virtio-net-ccw,netdev=hostnet1,id=net1,mac=52:54:00:2a:e5:08,devno=fe.0.0002 +-chardev pty,id=charconsole0 -device +sclpconsole,chardev=charconsole0,id=console0 -device +virtio-balloon-ccw,id=balloon0,devno=fe.3.ffba -sandbox +on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny +-msg timestamp=on +2018-07-17 15:48:42.252+0000: Domain id=21 is tainted: high-privileges +2018-07-17T15:48:42.279380Z qemu-system-s390x: -chardev +pty,id=charconsole0: char device redirected to /dev/pts/3 (label +charconsole0) +qemu-system-s390x: util/async.c:339: aio_get_linux_aio: Assertion +`ctx->linux_aio' failed. +2018-07-17 15:48:43.309+0000: shutting down, reason=failed + + +Any help debugging this would be greatly appreciated. + +Thank you +Farhan + +On 17.07.2018 [13:25:53 -0400], Farhan Ali wrote: +> +Hi, +> +> +I am seeing some strange QEMU assertion failures for qemu on s390x, +> +which prevents a guest from starting. +> +> +Git bisecting points to the following commit as the source of the error. +> +> +commit ed6e2161715c527330f936d44af4c547f25f687e +> +Author: Nishanth Aravamudan <address@hidden> +> +Date: Fri Jun 22 12:37:00 2018 -0700 +> +> +linux-aio: properly bubble up errors from initialization +> +> +laio_init() can fail for a couple of reasons, which will lead to a NULL +> +pointer dereference in laio_attach_aio_context(). +> +> +To solve this, add a aio_setup_linux_aio() function which is called +> +early in raw_open_common. If this fails, propagate the error up. The +> +signature of aio_get_linux_aio() was not modified, because it seems +> +preferable to return the actual errno from the possible failing +> +initialization calls. +> +> +Additionally, when the AioContext changes, we need to associate a +> +LinuxAioState with the new AioContext. Use the bdrv_attach_aio_context +> +callback and call the new aio_setup_linux_aio(), which will allocate a +> +new AioContext if needed, and return errors on failures. If it fails for +> +any reason, fallback to threaded AIO with an error message, as the +> +device is already in-use by the guest. +> +> +Add an assert that aio_get_linux_aio() cannot return NULL. +> +> +Signed-off-by: Nishanth Aravamudan <address@hidden> +> +Message-id: address@hidden +> +Signed-off-by: Stefan Hajnoczi <address@hidden> +> +> +> +Not sure what is causing this assertion to fail. Here is the qemu command +> +line of the guest, from qemu log, which throws this error: +> +> +> +LC_ALL=C PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin +> +QEMU_AUDIO_DRV=none /usr/local/bin/qemu-system-s390x -name +> +guest=rt_vm1,debug-threads=on -S -object +> +secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-21-rt_vm1/master-key.aes +> +-machine s390-ccw-virtio-2.12,accel=kvm,usb=off,dump-guest-core=off -m 1024 +> +-realtime mlock=off -smp 4,sockets=4,cores=1,threads=1 -object +> +iothread,id=iothread1 -uuid 0cde16cd-091d-41bd-9ac2-5243df5c9a0d -display +> +none -no-user-config -nodefaults -chardev +> +socket,id=charmonitor,fd=28,server,nowait -mon +> +chardev=charmonitor,id=monitor,mode=control -rtc base=utc -no-shutdown -boot +> +strict=on -drive +> +file=/dev/mapper/360050763998b0883980000002a000031,format=raw,if=none,id=drive-virtio-disk0,cache=none,aio=native +> +-device +> +virtio-blk-ccw,iothread=iothread1,scsi=off,devno=fe.0.0001,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1,write-cache=on +> +-netdev tap,fd=30,id=hostnet0,vhost=on,vhostfd=31 -device +> +virtio-net-ccw,netdev=hostnet0,id=net0,mac=02:3a:c8:67:95:84,devno=fe.0.0000 +> +-netdev tap,fd=32,id=hostnet1,vhost=on,vhostfd=33 -device +> +virtio-net-ccw,netdev=hostnet1,id=net1,mac=52:54:00:2a:e5:08,devno=fe.0.0002 +> +-chardev pty,id=charconsole0 -device +> +sclpconsole,chardev=charconsole0,id=console0 -device +> +virtio-balloon-ccw,id=balloon0,devno=fe.3.ffba -sandbox +> +on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny -msg +> +timestamp=on +> +> +> +> +2018-07-17 15:48:42.252+0000: Domain id=21 is tainted: high-privileges +> +2018-07-17T15:48:42.279380Z qemu-system-s390x: -chardev pty,id=charconsole0: +> +char device redirected to /dev/pts/3 (label charconsole0) +> +qemu-system-s390x: util/async.c:339: aio_get_linux_aio: Assertion +> +`ctx->linux_aio' failed. +> +2018-07-17 15:48:43.309+0000: shutting down, reason=failed +> +> +> +Any help debugging this would be greatly appreciated. +iiuc, this possibly implies AIO was not actually used previously on this +guest (it might have silently been falling back to threaded IO?). I +don't have access to s390x, but would it be possible to run qemu under +gdb and see if aio_setup_linux_aio is being called at all (I think it +might not be, but I'm not sure why), and if so, if it's for the context +in question? + +If it's not being called first, could you see what callpath is calling +aio_get_linux_aio when this assertion trips? + +Thanks! +-Nish + +On 07/17/2018 04:52 PM, Nishanth Aravamudan wrote: +iiuc, this possibly implies AIO was not actually used previously on this +guest (it might have silently been falling back to threaded IO?). I +don't have access to s390x, but would it be possible to run qemu under +gdb and see if aio_setup_linux_aio is being called at all (I think it +might not be, but I'm not sure why), and if so, if it's for the context +in question? + +If it's not being called first, could you see what callpath is calling +aio_get_linux_aio when this assertion trips? + +Thanks! +-Nish +Hi Nishant, +From the coredump of the guest this is the call trace that calls +aio_get_linux_aio: +Stack trace of thread 145158: +#0 0x000003ff94dbe274 raise (libc.so.6) +#1 0x000003ff94da39a8 abort (libc.so.6) +#2 0x000003ff94db62ce __assert_fail_base (libc.so.6) +#3 0x000003ff94db634c __assert_fail (libc.so.6) +#4 0x000002aa20db067a aio_get_linux_aio (qemu-system-s390x) +#5 0x000002aa20d229a8 raw_aio_plug (qemu-system-s390x) +#6 0x000002aa20d309ee bdrv_io_plug (qemu-system-s390x) +#7 0x000002aa20b5a8ea virtio_blk_handle_vq (qemu-system-s390x) +#8 0x000002aa20db2f6e aio_dispatch_handlers (qemu-system-s390x) +#9 0x000002aa20db3c34 aio_poll (qemu-system-s390x) +#10 0x000002aa20be32a2 iothread_run (qemu-system-s390x) +#11 0x000003ff94f879a8 start_thread (libpthread.so.0) +#12 0x000003ff94e797ee thread_start (libc.so.6) + + +Thanks for taking a look and responding. + +Thanks +Farhan + +On 07/18/2018 09:42 AM, Farhan Ali wrote: +On 07/17/2018 04:52 PM, Nishanth Aravamudan wrote: +iiuc, this possibly implies AIO was not actually used previously on this +guest (it might have silently been falling back to threaded IO?). I +don't have access to s390x, but would it be possible to run qemu under +gdb and see if aio_setup_linux_aio is being called at all (I think it +might not be, but I'm not sure why), and if so, if it's for the context +in question? + +If it's not being called first, could you see what callpath is calling +aio_get_linux_aio when this assertion trips? + +Thanks! +-Nish +Hi Nishant, +From the coredump of the guest this is the call trace that calls +aio_get_linux_aio: +Stack trace of thread 145158: +#0 0x000003ff94dbe274 raise (libc.so.6) +#1 0x000003ff94da39a8 abort (libc.so.6) +#2 0x000003ff94db62ce __assert_fail_base (libc.so.6) +#3 0x000003ff94db634c __assert_fail (libc.so.6) +#4 0x000002aa20db067a aio_get_linux_aio (qemu-system-s390x) +#5 0x000002aa20d229a8 raw_aio_plug (qemu-system-s390x) +#6 0x000002aa20d309ee bdrv_io_plug (qemu-system-s390x) +#7 0x000002aa20b5a8ea virtio_blk_handle_vq (qemu-system-s390x) +#8 0x000002aa20db2f6e aio_dispatch_handlers (qemu-system-s390x) +#9 0x000002aa20db3c34 aio_poll (qemu-system-s390x) +#10 0x000002aa20be32a2 iothread_run (qemu-system-s390x) +#11 0x000003ff94f879a8 start_thread (libpthread.so.0) +#12 0x000003ff94e797ee thread_start (libc.so.6) + + +Thanks for taking a look and responding. + +Thanks +Farhan +Trying to debug a little further, the block device in this case is a +"host device". And looking at your commit carefully you use the +bdrv_attach_aio_context callback to setup a Linux AioContext. +For some reason the "host device" struct (BlockDriver bdrv_host_device +in block/file-posix.c) does not have a bdrv_attach_aio_context defined. +So a simple change of adding the callback to the struct solves the issue +and the guest starts fine. +diff --git a/block/file-posix.c b/block/file-posix.c +index 28824aa..b8d59fb 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -3135,6 +3135,7 @@ static BlockDriver bdrv_host_device = { + .bdrv_refresh_limits = raw_refresh_limits, + .bdrv_io_plug = raw_aio_plug, + .bdrv_io_unplug = raw_aio_unplug, ++ .bdrv_attach_aio_context = raw_aio_attach_aio_context, + + .bdrv_co_truncate = raw_co_truncate, + .bdrv_getlength = raw_getlength, +I am not too familiar with block device code in QEMU, so not sure if +this is the right fix or if there are some underlying problems. +Thanks +Farhan + +On 18.07.2018 [11:10:27 -0400], Farhan Ali wrote: +> +> +> +On 07/18/2018 09:42 AM, Farhan Ali wrote: +> +> +> +> +> +> On 07/17/2018 04:52 PM, Nishanth Aravamudan wrote: +> +> > iiuc, this possibly implies AIO was not actually used previously on this +> +> > guest (it might have silently been falling back to threaded IO?). I +> +> > don't have access to s390x, but would it be possible to run qemu under +> +> > gdb and see if aio_setup_linux_aio is being called at all (I think it +> +> > might not be, but I'm not sure why), and if so, if it's for the context +> +> > in question? +> +> > +> +> > If it's not being called first, could you see what callpath is calling +> +> > aio_get_linux_aio when this assertion trips? +> +> > +> +> > Thanks! +> +> > -Nish +> +> +> +> +> +> Hi Nishant, +> +> +> +> From the coredump of the guest this is the call trace that calls +> +> aio_get_linux_aio: +> +> +> +> +> +> Stack trace of thread 145158: +> +> #0 0x000003ff94dbe274 raise (libc.so.6) +> +> #1 0x000003ff94da39a8 abort (libc.so.6) +> +> #2 0x000003ff94db62ce __assert_fail_base (libc.so.6) +> +> #3 0x000003ff94db634c __assert_fail (libc.so.6) +> +> #4 0x000002aa20db067a aio_get_linux_aio (qemu-system-s390x) +> +> #5 0x000002aa20d229a8 raw_aio_plug (qemu-system-s390x) +> +> #6 0x000002aa20d309ee bdrv_io_plug (qemu-system-s390x) +> +> #7 0x000002aa20b5a8ea virtio_blk_handle_vq (qemu-system-s390x) +> +> #8 0x000002aa20db2f6e aio_dispatch_handlers (qemu-system-s390x) +> +> #9 0x000002aa20db3c34 aio_poll (qemu-system-s390x) +> +> #10 0x000002aa20be32a2 iothread_run (qemu-system-s390x) +> +> #11 0x000003ff94f879a8 start_thread (libpthread.so.0) +> +> #12 0x000003ff94e797ee thread_start (libc.so.6) +> +> +> +> +> +> Thanks for taking a look and responding. +> +> +> +> Thanks +> +> Farhan +> +> +> +> +> +> +> +> +Trying to debug a little further, the block device in this case is a "host +> +device". And looking at your commit carefully you use the +> +bdrv_attach_aio_context callback to setup a Linux AioContext. +> +> +For some reason the "host device" struct (BlockDriver bdrv_host_device in +> +block/file-posix.c) does not have a bdrv_attach_aio_context defined. +> +So a simple change of adding the callback to the struct solves the issue and +> +the guest starts fine. +> +> +> +diff --git a/block/file-posix.c b/block/file-posix.c +> +index 28824aa..b8d59fb 100644 +> +--- a/block/file-posix.c +> ++++ b/block/file-posix.c +> +@@ -3135,6 +3135,7 @@ static BlockDriver bdrv_host_device = { +> +.bdrv_refresh_limits = raw_refresh_limits, +> +.bdrv_io_plug = raw_aio_plug, +> +.bdrv_io_unplug = raw_aio_unplug, +> ++ .bdrv_attach_aio_context = raw_aio_attach_aio_context, +> +> +.bdrv_co_truncate = raw_co_truncate, +> +.bdrv_getlength = raw_getlength, +> +> +> +> +I am not too familiar with block device code in QEMU, so not sure if +> +this is the right fix or if there are some underlying problems. +Oh this is quite embarassing! I only added the bdrv_attach_aio_context +callback for the file-backed device. Your fix is definitely corect for +host device. Let me make sure there weren't any others missed and I will +send out a properly formatted patch. Thank you for the quick testing and +turnaround! + +-Nish + +On 07/18/2018 08:52 PM, Nishanth Aravamudan wrote: +> +On 18.07.2018 [11:10:27 -0400], Farhan Ali wrote: +> +> +> +> +> +> On 07/18/2018 09:42 AM, Farhan Ali wrote: +> +>> +> +>> +> +>> On 07/17/2018 04:52 PM, Nishanth Aravamudan wrote: +> +>>> iiuc, this possibly implies AIO was not actually used previously on this +> +>>> guest (it might have silently been falling back to threaded IO?). I +> +>>> don't have access to s390x, but would it be possible to run qemu under +> +>>> gdb and see if aio_setup_linux_aio is being called at all (I think it +> +>>> might not be, but I'm not sure why), and if so, if it's for the context +> +>>> in question? +> +>>> +> +>>> If it's not being called first, could you see what callpath is calling +> +>>> aio_get_linux_aio when this assertion trips? +> +>>> +> +>>> Thanks! +> +>>> -Nish +> +>> +> +>> +> +>> Hi Nishant, +> +>> +> +>> From the coredump of the guest this is the call trace that calls +> +>> aio_get_linux_aio: +> +>> +> +>> +> +>> Stack trace of thread 145158: +> +>> #0 0x000003ff94dbe274 raise (libc.so.6) +> +>> #1 0x000003ff94da39a8 abort (libc.so.6) +> +>> #2 0x000003ff94db62ce __assert_fail_base (libc.so.6) +> +>> #3 0x000003ff94db634c __assert_fail (libc.so.6) +> +>> #4 0x000002aa20db067a aio_get_linux_aio (qemu-system-s390x) +> +>> #5 0x000002aa20d229a8 raw_aio_plug (qemu-system-s390x) +> +>> #6 0x000002aa20d309ee bdrv_io_plug (qemu-system-s390x) +> +>> #7 0x000002aa20b5a8ea virtio_blk_handle_vq (qemu-system-s390x) +> +>> #8 0x000002aa20db2f6e aio_dispatch_handlers (qemu-system-s390x) +> +>> #9 0x000002aa20db3c34 aio_poll (qemu-system-s390x) +> +>> #10 0x000002aa20be32a2 iothread_run (qemu-system-s390x) +> +>> #11 0x000003ff94f879a8 start_thread (libpthread.so.0) +> +>> #12 0x000003ff94e797ee thread_start (libc.so.6) +> +>> +> +>> +> +>> Thanks for taking a look and responding. +> +>> +> +>> Thanks +> +>> Farhan +> +>> +> +>> +> +>> +> +> +> +> Trying to debug a little further, the block device in this case is a "host +> +> device". And looking at your commit carefully you use the +> +> bdrv_attach_aio_context callback to setup a Linux AioContext. +> +> +> +> For some reason the "host device" struct (BlockDriver bdrv_host_device in +> +> block/file-posix.c) does not have a bdrv_attach_aio_context defined. +> +> So a simple change of adding the callback to the struct solves the issue and +> +> the guest starts fine. +> +> +> +> +> +> diff --git a/block/file-posix.c b/block/file-posix.c +> +> index 28824aa..b8d59fb 100644 +> +> --- a/block/file-posix.c +> +> +++ b/block/file-posix.c +> +> @@ -3135,6 +3135,7 @@ static BlockDriver bdrv_host_device = { +> +> .bdrv_refresh_limits = raw_refresh_limits, +> +> .bdrv_io_plug = raw_aio_plug, +> +> .bdrv_io_unplug = raw_aio_unplug, +> +> + .bdrv_attach_aio_context = raw_aio_attach_aio_context, +> +> +> +> .bdrv_co_truncate = raw_co_truncate, +> +> .bdrv_getlength = raw_getlength, +> +> +> +> +> +> +> +> I am not too familiar with block device code in QEMU, so not sure if +> +> this is the right fix or if there are some underlying problems. +> +> +Oh this is quite embarassing! I only added the bdrv_attach_aio_context +> +callback for the file-backed device. Your fix is definitely corect for +> +host device. Let me make sure there weren't any others missed and I will +> +send out a properly formatted patch. Thank you for the quick testing and +> +turnaround! +Farhan, can you respin your patch with proper sign-off and patch description? +Adding qemu-block. + +Hi Christian, + +On 19.07.2018 [08:55:20 +0200], Christian Borntraeger wrote: +> +> +> +On 07/18/2018 08:52 PM, Nishanth Aravamudan wrote: +> +> On 18.07.2018 [11:10:27 -0400], Farhan Ali wrote: +> +>> +> +>> +> +>> On 07/18/2018 09:42 AM, Farhan Ali wrote: +<snip> + +> +>> I am not too familiar with block device code in QEMU, so not sure if +> +>> this is the right fix or if there are some underlying problems. +> +> +> +> Oh this is quite embarassing! I only added the bdrv_attach_aio_context +> +> callback for the file-backed device. Your fix is definitely corect for +> +> host device. Let me make sure there weren't any others missed and I will +> +> send out a properly formatted patch. Thank you for the quick testing and +> +> turnaround! +> +> +Farhan, can you respin your patch with proper sign-off and patch description? +> +Adding qemu-block. +I sent it yesterday, sorry I didn't cc everyone from this e-mail: +http://lists.nongnu.org/archive/html/qemu-block/2018-07/msg00516.html +Thanks, +Nish + diff --git a/results/classifier/016/debug/30680944 b/results/classifier/016/debug/30680944 new file mode 100644 index 00000000..5b3e6f1c --- /dev/null +++ b/results/classifier/016/debug/30680944 @@ -0,0 +1,622 @@ +debug: 0.994 +kernel: 0.991 +operating system: 0.120 +TCG: 0.064 +files: 0.057 +assembly: 0.047 +hypervisor: 0.036 +VMM: 0.031 +architecture: 0.029 +user-level: 0.024 +PID: 0.020 +virtual: 0.020 +arm: 0.015 +device: 0.013 +register: 0.011 +network: 0.007 +performance: 0.007 +socket: 0.005 +semantic: 0.004 +vnc: 0.002 +graphic: 0.002 +risc-v: 0.002 +permissions: 0.002 +x86: 0.002 +alpha: 0.001 +boot: 0.001 +KVM: 0.001 +mistranslation: 0.001 +ppc: 0.001 +peripherals: 0.000 +i386: 0.000 + +[BUG]QEMU jump into interrupt when single-stepping on aarch64 + +Dear, folks, + +I try to debug Linux kernel with QEMU in single-stepping mode on aarch64 +platform, +the added breakpoint hits but after I type `step`, the gdb always jumps into +interrupt. + +My env: + + gdb-10.2 + qemu-6.2.0 + host kernel: 5.10.84 + VM kernel: 5.10.84 + +The steps to reproduce: + # host console: run a VM with only one core, the import arg: <qemu:arg +value='-s'/> + # details can be found here: +https://www.redhat.com/en/blog/debugging-kernel-qemulibvirt +virsh create dev_core0.xml + + # run gdb client + gdb ./vmlinux + + # gdb client on host console + (gdb) dir +./usr/src/debug/kernel-5.10.84/linux-5.10.84-004.alpha.ali5000.alios7.aarch64 + (gdb) target remote localhost:1234 + (gdb) info b + Num Type Disp Enb Address What + 1 breakpoint keep y <MULTIPLE> + 1.1 y 0xffff800010361444 +mm/memory-failure.c:1318 + 1.2 y 0xffff800010361450 in memory_failure + at mm/memory-failure.c:1488 + (gdb) c + Continuing. + + # console in VM, use madvise to inject a hwposion at virtual address +vaddr, + # which will hit the b inmemory_failur: madvise(vaddr, pagesize, +MADV_HWPOISON); + # and the VM pause + ./run_madvise.c + + # gdb client on host console + (gdb) + Continuing. + Breakpoint 1, 0xffff800010361444 in memory_failure () at +mm/memory-failure.c:1318 + 1318 res = -EHWPOISON; + (gdb) n + vectors () at arch/arm64/kernel/entry.S:552 + 552 kernel_ventry 1, irq // IRQ +EL1h + (gdb) n + (gdb) n + (gdb) n + (gdb) n + gic_handle_irq (regs=0xffff8000147c3b80) at +drivers/irqchip/irq-gic-v3.c:721 + # after several step, I got the irqnr + (gdb) p irqnr + $5 = 8262 + +Sometimes, the irqnr is 27ï¼ which is used for arch_timer. + +I was wondering do you have any comments on this? And feedback are welcomed. + +Thank you. + +Best Regards. +Shuai + +On 4/6/22 09:30, Shuai Xue wrote: +Dear, folks, + +I try to debug Linux kernel with QEMU in single-stepping mode on aarch64 +platform, +the added breakpoint hits but after I type `step`, the gdb always jumps into +interrupt. + +My env: + + gdb-10.2 + qemu-6.2.0 + host kernel: 5.10.84 + VM kernel: 5.10.84 + +The steps to reproduce: + # host console: run a VM with only one core, the import arg: <qemu:arg +value='-s'/> + # details can be found here: +https://www.redhat.com/en/blog/debugging-kernel-qemulibvirt +virsh create dev_core0.xml + + # run gdb client + gdb ./vmlinux + + # gdb client on host console + (gdb) dir +./usr/src/debug/kernel-5.10.84/linux-5.10.84-004.alpha.ali5000.alios7.aarch64 + (gdb) target remote localhost:1234 + (gdb) info b + Num Type Disp Enb Address What + 1 breakpoint keep y <MULTIPLE> + 1.1 y 0xffff800010361444 +mm/memory-failure.c:1318 + 1.2 y 0xffff800010361450 in memory_failure + at mm/memory-failure.c:1488 + (gdb) c + Continuing. + + # console in VM, use madvise to inject a hwposion at virtual address +vaddr, + # which will hit the b inmemory_failur: madvise(vaddr, pagesize, +MADV_HWPOISON); + # and the VM pause + ./run_madvise.c + + # gdb client on host console + (gdb) + Continuing. + Breakpoint 1, 0xffff800010361444 in memory_failure () at +mm/memory-failure.c:1318 + 1318 res = -EHWPOISON; + (gdb) n + vectors () at arch/arm64/kernel/entry.S:552 + 552 kernel_ventry 1, irq // IRQ +EL1h +The 'n' command is not a single-step: use stepi, which will suppress interrupts. +Anyway, not a bug. + +r~ + +å¨ 2022/4/7 AM12:57, Richard Henderson åé: +> +On 4/6/22 09:30, Shuai Xue wrote: +> +> Dear, folks, +> +> +> +> I try to debug Linux kernel with QEMU in single-stepping mode on aarch64 +> +> platform, +> +> the added breakpoint hits but after I type `step`, the gdb always jumps into +> +> interrupt. +> +> +> +> My env: +> +> +> +>     gdb-10.2 +> +>     qemu-6.2.0 +> +>     host kernel: 5.10.84 +> +>     VM kernel: 5.10.84 +> +> +> +> The steps to reproduce: +> +>     # host console: run a VM with only one core, the import arg: <qemu:arg +> +> value='-s'/> +> +>     # details can be found here: +> +> +https://www.redhat.com/en/blog/debugging-kernel-qemulibvirt +> +>     virsh create dev_core0.xml +> +>     +> +>     # run gdb client +> +>     gdb ./vmlinux +> +> +> +>     # gdb client on host console +> +>     (gdb) dir +> +> ./usr/src/debug/kernel-5.10.84/linux-5.10.84-004.alpha.ali5000.alios7.aarch64 +> +>     (gdb) target remote localhost:1234 +> +>     (gdb) info b +> +>     Num    Type          Disp Enb Address           What +> +>     1      breakpoint    keep y  <MULTIPLE> +> +>     1.1                        y  0xffff800010361444 +> +> mm/memory-failure.c:1318 +> +>     1.2                        y  0xffff800010361450 in memory_failure +> +>                                                    at +> +> mm/memory-failure.c:1488 +> +>     (gdb) c +> +>     Continuing. +> +> +> +>     # console in VM, use madvise to inject a hwposion at virtual address +> +> vaddr, +> +>     # which will hit the b inmemory_failur: madvise(vaddr, pagesize, +> +> MADV_HWPOISON); +> +>     # and the VM pause +> +>     ./run_madvise.c +> +> +> +>     # gdb client on host console +> +>     (gdb) +> +>     Continuing. +> +>     Breakpoint 1, 0xffff800010361444 in memory_failure () at +> +> mm/memory-failure.c:1318 +> +>     1318                   res = -EHWPOISON; +> +>     (gdb) n +> +>     vectors () at arch/arm64/kernel/entry.S:552 +> +>     552            kernel_ventry  1, irq                         // IRQ +> +> EL1h +> +> +The 'n' command is not a single-step: use stepi, which will suppress +> +interrupts. +> +Anyway, not a bug. +> +> +r~ +Hi, Richard, + +Thank you for your quick reply, I also try `stepi`, but it does NOT work either. + + (gdb) c + Continuing. + + Breakpoint 1, memory_failure (pfn=1273982, flags=1) at +mm/memory-failure.c:1488 + 1488 { + (gdb) stepi + vectors () at arch/arm64/kernel/entry.S:552 + 552 kernel_ventry 1, irq // IRQ +EL1h + +According to QEMU doc[1]: the default single stepping behavior is step with the +IRQs +and timer service routines off. I checked the MASK bits used to control the +single +stepping IE on my machine as bellow: + + # gdb client on host (x86 plafrom) + (gdb) maintenance packet qqemu.sstepbits + sending: "qqemu.sstepbits" + received: "ENABLE=1,NOIRQ=2,NOTIMER=4" + +The sstep MASK looks as expected, but does not work as expected. + +I also try the same kernel and qemu version on X86 platform: +> +> gdb-10.2 +> +> qemu-6.2.0 +> +> host kernel: 5.10.84 +> +> VM kernel: 5.10.84 +The command `n` jumps to the next instruction. + + # gdb client on host (x86 plafrom) + (gdb) b memory-failure.c:1488 + Breakpoint 1, memory_failure (pfn=1128931, flags=1) at +mm/memory-failure.c:1488 + 1488 { + (gdb) n + 1497 if (!sysctl_memory_failure_recovery) + (gdb) stepi + 0xffffffff812efdbc 1497 if +(!sysctl_memory_failure_recovery) + (gdb) stepi + 0xffffffff812efdbe 1497 if +(!sysctl_memory_failure_recovery) + (gdb) n + 1500 p = pfn_to_online_page(pfn); + (gdb) l + 1496 + 1497 if (!sysctl_memory_failure_recovery) + 1498 panic("Memory failure on page %lx", pfn); + 1499 + 1500 p = pfn_to_online_page(pfn); + 1501 if (!p) { + +Best Regrades, +Shuai + + +[1] +https://github.com/qemu/qemu/blob/master/docs/system/gdb.rst + +å¨ 2022/4/7 PM12:10, Shuai Xue åé: +> +å¨ 2022/4/7 AM12:57, Richard Henderson åé: +> +> On 4/6/22 09:30, Shuai Xue wrote: +> +>> Dear, folks, +> +>> +> +>> I try to debug Linux kernel with QEMU in single-stepping mode on aarch64 +> +>> platform, +> +>> the added breakpoint hits but after I type `step`, the gdb always jumps +> +>> into interrupt. +> +>> +> +>> My env: +> +>> +> +>>     gdb-10.2 +> +>>     qemu-6.2.0 +> +>>     host kernel: 5.10.84 +> +>>     VM kernel: 5.10.84 +> +>> +> +>> The steps to reproduce: +> +>>     # host console: run a VM with only one core, the import arg: <qemu:arg +> +>> value='-s'/> +> +>>     # details can be found here: +> +>> +https://www.redhat.com/en/blog/debugging-kernel-qemulibvirt +> +>>     virsh create dev_core0.xml +> +>>     +> +>>     # run gdb client +> +>>     gdb ./vmlinux +> +>> +> +>>     # gdb client on host console +> +>>     (gdb) dir +> +>> ./usr/src/debug/kernel-5.10.84/linux-5.10.84-004.alpha.ali5000.alios7.aarch64 +> +>>     (gdb) target remote localhost:1234 +> +>>     (gdb) info b +> +>>     Num    Type          Disp Enb Address           What +> +>>     1      breakpoint    keep y  <MULTIPLE> +> +>>     1.1                        y  0xffff800010361444 +> +>> mm/memory-failure.c:1318 +> +>>     1.2                        y  0xffff800010361450 in memory_failure +> +>>                                                    at +> +>> mm/memory-failure.c:1488 +> +>>     (gdb) c +> +>>     Continuing. +> +>> +> +>>     # console in VM, use madvise to inject a hwposion at virtual address +> +>> vaddr, +> +>>     # which will hit the b inmemory_failur: madvise(vaddr, pagesize, +> +>> MADV_HWPOISON); +> +>>     # and the VM pause +> +>>     ./run_madvise.c +> +>> +> +>>     # gdb client on host console +> +>>     (gdb) +> +>>     Continuing. +> +>>     Breakpoint 1, 0xffff800010361444 in memory_failure () at +> +>> mm/memory-failure.c:1318 +> +>>     1318                   res = -EHWPOISON; +> +>>     (gdb) n +> +>>     vectors () at arch/arm64/kernel/entry.S:552 +> +>>     552            kernel_ventry  1, irq                         // IRQ +> +>> EL1h +> +> +> +> The 'n' command is not a single-step: use stepi, which will suppress +> +> interrupts. +> +> Anyway, not a bug. +> +> +> +> r~ +> +> +Hi, Richard, +> +> +Thank you for your quick reply, I also try `stepi`, but it does NOT work +> +either. +> +> +(gdb) c +> +Continuing. +> +> +Breakpoint 1, memory_failure (pfn=1273982, flags=1) at +> +mm/memory-failure.c:1488 +> +1488 { +> +(gdb) stepi +> +vectors () at arch/arm64/kernel/entry.S:552 +> +552 kernel_ventry 1, irq // IRQ +> +EL1h +> +> +According to QEMU doc[1]: the default single stepping behavior is step with +> +the IRQs +> +and timer service routines off. I checked the MASK bits used to control the +> +single +> +stepping IE on my machine as bellow: +> +> +# gdb client on host (x86 plafrom) +> +(gdb) maintenance packet qqemu.sstepbits +> +sending: "qqemu.sstepbits" +> +received: "ENABLE=1,NOIRQ=2,NOTIMER=4" +> +> +The sstep MASK looks as expected, but does not work as expected. +> +> +I also try the same kernel and qemu version on X86 platform: +> +>> gdb-10.2 +> +>> qemu-6.2.0 +> +>> host kernel: 5.10.84 +> +>> VM kernel: 5.10.84 +> +> +> +The command `n` jumps to the next instruction. +> +> +# gdb client on host (x86 plafrom) +> +(gdb) b memory-failure.c:1488 +> +Breakpoint 1, memory_failure (pfn=1128931, flags=1) at +> +mm/memory-failure.c:1488 +> +1488 { +> +(gdb) n +> +1497 if (!sysctl_memory_failure_recovery) +> +(gdb) stepi +> +0xffffffff812efdbc 1497 if +> +(!sysctl_memory_failure_recovery) +> +(gdb) stepi +> +0xffffffff812efdbe 1497 if +> +(!sysctl_memory_failure_recovery) +> +(gdb) n +> +1500 p = pfn_to_online_page(pfn); +> +(gdb) l +> +1496 +> +1497 if (!sysctl_memory_failure_recovery) +> +1498 panic("Memory failure on page %lx", pfn); +> +1499 +> +1500 p = pfn_to_online_page(pfn); +> +1501 if (!p) { +> +> +Best Regrades, +> +Shuai +> +> +> +[1] +https://github.com/qemu/qemu/blob/master/docs/system/gdb.rst +Hi, Richard, + +I was wondering that do you have any comments to this? + +Best Regrades, +Shuai + diff --git a/results/classifier/016/debug/32484936 b/results/classifier/016/debug/32484936 new file mode 100644 index 00000000..93b8bf6c --- /dev/null +++ b/results/classifier/016/debug/32484936 @@ -0,0 +1,250 @@ +debug: 0.860 +virtual: 0.676 +files: 0.094 +user-level: 0.045 +hypervisor: 0.042 +x86: 0.035 +TCG: 0.028 +register: 0.024 +operating system: 0.016 +ppc: 0.014 +PID: 0.010 +assembly: 0.009 +i386: 0.009 +semantic: 0.007 +alpha: 0.007 +risc-v: 0.006 +VMM: 0.006 +device: 0.006 +network: 0.005 +arm: 0.004 +graphic: 0.004 +kernel: 0.004 +performance: 0.004 +socket: 0.003 +vnc: 0.003 +KVM: 0.003 +peripherals: 0.002 +boot: 0.002 +architecture: 0.002 +mistranslation: 0.001 +permissions: 0.001 + +[Qemu-devel] [Snapshot Bug?]Qcow2 meta data corruption + +Hi all, +There was a problem about qcow2 image file happened in my serval vms and I could not figure it out, +so have to ask for some help. +Here is the thing: +At first, I found there were some data corruption in a vm, so I did qemu-img check to all my vms. +parts of check report: +3-Leaked cluster 2926229 refcount=1 reference=0 +4-Leaked cluster 3021181 refcount=1 reference=0 +5-Leaked cluster 3021182 refcount=1 reference=0 +6-Leaked cluster 3021183 refcount=1 reference=0 +7-Leaked cluster 3021184 refcount=1 reference=0 +8-ERROR cluster 3102547 refcount=3 reference=4 +9-ERROR cluster 3111536 refcount=3 reference=4 +10-ERROR cluster 3113369 refcount=3 reference=4 +11-ERROR cluster 3235590 refcount=10 reference=11 +12-ERROR cluster 3235591 refcount=10 reference=11 +423-Warning: cluster offset=0xc000c00020000 is after the end of the image file, can't properly check refcounts. +424-Warning: cluster offset=0xc000c000c0000 is after the end of the image file, can't properly check refcounts. +425-Warning: cluster offset=0xc0001000c0000 is after the end of the image file, can't properly check refcounts. +426-Warning: cluster offset=0xc000c000c0000 is after the end of the image file, can't properly check refcounts. +427-Warning: cluster offset=0xc000c000c0000 is after the end of the image file, can't properly check refcounts. +428-Warning: cluster offset=0xc000c000c0000 is after the end of the image file, can't properly check refcounts. +429-Warning: cluster offset=0xc000c000c0000 is after the end of the image file, can't properly check refcounts. +430-Warning: cluster offset=0xc000c00010000 is after the end of the image file, can't properly check refcounts. +After a futher look in, I found two l2 entries point to the same cluster, and that was found in serval qcow2 image files of different vms. +Like this: +table entry conflict (with our qcow2 check +tool): +a table offset : 0x00000093f7080000 level : 2, l1 table entry 100, l2 table entry 7 +b table offset : 0x00000093f7080000 level : 2, l1 table entry 5, l2 table entry 7 +table entry conflict : +a table offset : 0x00000000a01e0000 level : 2, l1 table entry 100, l2 table entry 19 +b table offset : 0x00000000a01e0000 level : 2, l1 table entry 5, l2 table entry 19 +table entry conflict : +a table offset : 0x00000000a01d0000 level : 2, l1 table entry 100, l2 table entry 18 +b table offset : 0x00000000a01d0000 level : 2, l1 table entry 5, l2 table entry 18 +table entry conflict : +a table offset : 0x00000000a01c0000 level : 2, l1 table entry 100, l2 table entry 17 +b table offset : 0x00000000a01c0000 level : 2, l1 table entry 5, l2 table entry 17 +table entry conflict : +a table offset : 0x00000000a01b0000 level : 2, l1 table entry 100, l2 table entry 16 +b table offset : 0x00000000a01b0000 level : 2, l1 table entry 5, l2 table entry 16 +I think the problem is relate to the snapshot create, delete. But I cant reproduce it . +Can Anyone give a hint about how this happen? +Qemu version 2.0.1, I download the source code and make install it. +Qemu parameters: +/usr/bin/kvm -chardev socket,id=qmp,path=/var/run/qemu-server/5855899639838.qmp,server,nowait -mon chardev=qmp,mode=control -vnc :0,websocket,to=200 -enable-kvm -pidfile /var/run/qemu-server/5855899639838.pid -daemonize -name yfMailSvr-200.200.0.14 -smp sockets=1,cores=4 -cpu core2duo,hv_spinlocks=0xffff,hv_relaxed,hv_time,hv_vapic,+sse4.1,+sse4.2,+x2apic,+erms,+smep,+fsgsbase,+f16c,+dca,+pcid,+pdcm,+xtpr,+ht,+ss,+acpi,+ds -nodefaults -vga cirrus -k en-us -boot menu=on,splash-time=8000 -m 8192 -usb -drive if=none,id=drive-ide0,media=cdrom,aio=native -device ide-cd,bus=ide.0,unit=0,drive=drive-ide0,id=ide0 -drive file=/sf/data/36b82a720d3a278001ba904e80c20c13e_ecf4bbbf3e94/images/host-ecf4bbbf3e94/784f3f08532a/yfMailSvr-200.200.0.14.vm/vm-disk-1.qcow2,if=none,id=drive-virtio1,cache=none,aio=native -device virtio-blk-pci,drive=drive-virtio1,id=virtio1,bus=pci.0,addr=0xb -drive file=/sf/data/36b82a720d3a278001ba904e80c20c13e_ecf4bbbf3e94/images/host-ecf4bbbf3e94/784f3f08532a/yfMailSvr-200.200.0.14.vm/vm-disk-2.qcow2,if=none,id=drive-virtio2,cache=none,aio=native -device virtio-blk-pci,drive=drive-virtio2,id=virtio2,bus=pci.0,addr=0xc,bootindex=101 -netdev type=tap,id=net0,ifname=585589963983800,script=/sf/etc/kvm/vtp-bridge,vhost=on,vhostforce=on -device virtio-net-pci,romfile=,mac=FE:FC:FE:F0:AB:BA,netdev=net0,bus=pci.0,addr=0x12,id=net0 -rtc driftfix=slew,clock=rt,base=localtime -global kvm-pit.lost_tick_policy=discard -global PIIX4_PM.disable_s3=1 -global PIIX4_PM.disable_s4=1 +Thanks +Sangfor VT. +leijian + +Hi all, +There was a problem about qcow2 image file happened in my serval vms and I could not figure it out, +so have to ask for some help. +Here is the thing: +At first, I found there were some data corruption in a vm, so I did qemu-img check to all my vms. +parts of check report: +3-Leaked cluster 2926229 refcount=1 reference=0 +4-Leaked cluster 3021181 refcount=1 reference=0 +5-Leaked cluster 3021182 refcount=1 reference=0 +6-Leaked cluster 3021183 refcount=1 reference=0 +7-Leaked cluster 3021184 refcount=1 reference=0 +8-ERROR cluster 3102547 refcount=3 reference=4 +9-ERROR cluster 3111536 refcount=3 reference=4 +10-ERROR cluster 3113369 refcount=3 reference=4 +11-ERROR cluster 3235590 refcount=10 reference=11 +12-ERROR cluster 3235591 refcount=10 reference=11 +423-Warning: cluster offset=0xc000c00020000 is after the end of the image file, can't properly check refcounts. +424-Warning: cluster offset=0xc000c000c0000 is after the end of the image file, can't properly check refcounts. +425-Warning: cluster offset=0xc0001000c0000 is after the end of the image file, can't properly check refcounts. +426-Warning: cluster offset=0xc000c000c0000 is after the end of the image file, can't properly check refcounts. +427-Warning: cluster offset=0xc000c000c0000 is after the end of the image file, can't properly check refcounts. +428-Warning: cluster offset=0xc000c000c0000 is after the end of the image file, can't properly check refcounts. +429-Warning: cluster offset=0xc000c000c0000 is after the end of the image file, can't properly check refcounts. +430-Warning: cluster offset=0xc000c00010000 is after the end of the image file, can't properly check refcounts. +After a futher look in, I found two l2 entries point to the same cluster, and that was found in serval qcow2 image files of different vms. +Like this: +table entry conflict (with our qcow2 check +tool): +a table offset : 0x00000093f7080000 level : 2, l1 table entry 100, l2 table entry 7 +b table offset : 0x00000093f7080000 level : 2, l1 table entry 5, l2 table entry 7 +table entry conflict : +a table offset : 0x00000000a01e0000 level : 2, l1 table entry 100, l2 table entry 19 +b table offset : 0x00000000a01e0000 level : 2, l1 table entry 5, l2 table entry 19 +table entry conflict : +a table offset : 0x00000000a01d0000 level : 2, l1 table entry 100, l2 table entry 18 +b table offset : 0x00000000a01d0000 level : 2, l1 table entry 5, l2 table entry 18 +table entry conflict : +a table offset : 0x00000000a01c0000 level : 2, l1 table entry 100, l2 table entry 17 +b table offset : 0x00000000a01c0000 level : 2, l1 table entry 5, l2 table entry 17 +table entry conflict : +a table offset : 0x00000000a01b0000 level : 2, l1 table entry 100, l2 table entry 16 +b table offset : 0x00000000a01b0000 level : 2, l1 table entry 5, l2 table entry 16 +I think the problem is relate to the snapshot create, delete. But I cant reproduce it . +Can Anyone give a hint about how this happen? +Qemu version 2.0.1, I download the source code and make install it. +Qemu parameters: +/usr/bin/kvm -chardev socket,id=qmp,path=/var/run/qemu-server/5855899639838.qmp,server,nowait -mon chardev=qmp,mode=control -vnc :0,websocket,to=200 -enable-kvm -pidfile /var/run/qemu-server/5855899639838.pid -daemonize -name yfMailSvr-200.200.0.14 -smp sockets=1,cores=4 -cpu core2duo,hv_spinlocks=0xffff,hv_relaxed,hv_time,hv_vapic,+sse4.1,+sse4.2,+x2apic,+erms,+smep,+fsgsbase,+f16c,+dca,+pcid,+pdcm,+xtpr,+ht,+ss,+acpi,+ds -nodefaults -vga cirrus -k en-us -boot menu=on,splash-time=8000 -m 8192 -usb -drive if=none,id=drive-ide0,media=cdrom,aio=native -device ide-cd,bus=ide.0,unit=0,drive=drive-ide0,id=ide0 -drive file=/sf/data/36b82a720d3a278001ba904e80c20c13e_ecf4bbbf3e94/images/host-ecf4bbbf3e94/784f3f08532a/yfMailSvr-200.200.0.14.vm/vm-disk-1.qcow2,if=none,id=drive-virtio1,cache=none,aio=native -device virtio-blk-pci,drive=drive-virtio1,id=virtio1,bus=pci.0,addr=0xb -drive file=/sf/data/36b82a720d3a278001ba904e80c20c13e_ecf4bbbf3e94/images/host-ecf4bbbf3e94/784f3f08532a/yfMailSvr-200.200.0.14.vm/vm-disk-2.qcow2,if=none,id=drive-virtio2,cache=none,aio=native -device virtio-blk-pci,drive=drive-virtio2,id=virtio2,bus=pci.0,addr=0xc,bootindex=101 -netdev type=tap,id=net0,ifname=585589963983800,script=/sf/etc/kvm/vtp-bridge,vhost=on,vhostforce=on -device virtio-net-pci,romfile=,mac=FE:FC:FE:F0:AB:BA,netdev=net0,bus=pci.0,addr=0x12,id=net0 -rtc driftfix=slew,clock=rt,base=localtime -global kvm-pit.lost_tick_policy=discard -global PIIX4_PM.disable_s3=1 -global PIIX4_PM.disable_s4=1 +Thanks +Sangfor VT. +leijian + +Am 03.04.2015 um 12:04 hat leijian geschrieben: +> +Hi all, +> +> +There was a problem about qcow2 image file happened in my serval vms and I +> +could not figure it out, +> +so have to ask for some help. +> +[...] +> +I think the problem is relate to the snapshot create, delete. But I cant +> +reproduce it . +> +Can Anyone give a hint about how this happen? +How did you create/delete your snapshots? + +More specifically, did you take care to never access your image from +more than one process (except if both are read-only)? It happens +occasionally that people use 'qemu-img snapshot' while the VM is +running. This is wrong and can corrupt the image. + +Kevin + +On 04/07/2015 03:33 AM, Kevin Wolf wrote: +> +More specifically, did you take care to never access your image from +> +more than one process (except if both are read-only)? It happens +> +occasionally that people use 'qemu-img snapshot' while the VM is +> +running. This is wrong and can corrupt the image. +Since this has been done by more than one person, I'm wondering if there +is something we can do in the qcow2 format itself to make it harder for +the casual user to cause corruption. Maybe if we declare some bit or +extension header for an image open for writing, which other readers can +use as a warning ("this image is being actively modified; reading it may +fail"), and other writers can use to deny access ("another process is +already modifying this image"), where a writer should set that bit +before writing anything else in the file, then clear it on exit. Of +course, you'd need a way to override the bit to actively clear it to +recover from the case of a writer dying unexpectedly without resetting +it normally. And it won't help the case of a reader opening the file +first, followed by a writer, where the reader could still get thrown off +track. + +Or maybe we could document in the qcow2 format that all readers and +writers should attempt to obtain the appropriate flock() permissions [or +other appropriate advisory locking scheme] over the file header, so that +cooperating processes that both use advisory locking will know when the +file is in use by another process. + +-- +Eric Blake eblake redhat com +1-919-301-3266 +Libvirt virtualization library +http://libvirt.org +signature.asc +Description: +OpenPGP digital signature + + +I created/deleted the snapshot by using qmp command "snapshot_blkdev_internal"/"snapshot_delete_blkdev_internal", and for avoiding the case you mentioned above, I have added the flock() permission in the qemu_open(). +Here is the test of doing qemu-img snapshot to a running vm: +Diskfile:/sf/data/36c81f660e38b3b001b183da50b477d89_f8bc123b3e74/images/host-f8bc123b3e74/4a8d8728fcdc/Devried30030.vm/vm-disk-1.qcow2 is used! errno=Resource temporarily unavailable +Does the two cluster entry happen to be the same because of the refcount of using cluster decrease to 0 unexpectedly and is allocated again? +If it was not accessing the image from more than one process, any other exceptions I can test for? +Thanks +leijian +From: +Eric Blake +Date: +2015-04-07 23:27 +To: +Kevin Wolf +; +leijian +CC: +qemu-devel +; +stefanha +Subject: +Re: [Qemu-devel] [Snapshot Bug?]Qcow2 meta data +corruption +On 04/07/2015 03:33 AM, Kevin Wolf wrote: +> More specifically, did you take care to never access your image from +> more than one process (except if both are read-only)? It happens +> occasionally that people use 'qemu-img snapshot' while the VM is +> running. This is wrong and can corrupt the image. +Since this has been done by more than one person, I'm wondering if there +is something we can do in the qcow2 format itself to make it harder for +the casual user to cause corruption. Maybe if we declare some bit or +extension header for an image open for writing, which other readers can +use as a warning ("this image is being actively modified; reading it may +fail"), and other writers can use to deny access ("another process is +already modifying this image"), where a writer should set that bit +before writing anything else in the file, then clear it on exit. Of +course, you'd need a way to override the bit to actively clear it to +recover from the case of a writer dying unexpectedly without resetting +it normally. And it won't help the case of a reader opening the file +first, followed by a writer, where the reader could still get thrown off +track. +Or maybe we could document in the qcow2 format that all readers and +writers should attempt to obtain the appropriate flock() permissions [or +other appropriate advisory locking scheme] over the file header, so that +cooperating processes that both use advisory locking will know when the +file is in use by another process. +-- +Eric Blake eblake redhat com +1-919-301-3266 +Libvirt virtualization library http://libvirt.org + diff --git a/results/classifier/016/debug/42226390 b/results/classifier/016/debug/42226390 new file mode 100644 index 00000000..889e7ae9 --- /dev/null +++ b/results/classifier/016/debug/42226390 @@ -0,0 +1,214 @@ +debug: 0.971 +kernel: 0.970 +boot: 0.967 +operating system: 0.854 +user-level: 0.441 +TCG: 0.311 +hypervisor: 0.124 +architecture: 0.110 +register: 0.089 +virtual: 0.087 +PID: 0.068 +device: 0.061 +VMM: 0.050 +files: 0.035 +socket: 0.034 +vnc: 0.024 +semantic: 0.020 +performance: 0.020 +risc-v: 0.014 +KVM: 0.012 +arm: 0.008 +assembly: 0.008 +network: 0.006 +peripherals: 0.005 +graphic: 0.002 +permissions: 0.002 +alpha: 0.002 +mistranslation: 0.001 +ppc: 0.001 +x86: 0.001 +i386: 0.000 + +[BUG] AArch64 boot hang with -icount and -smp >1 (iothread locking issue?) + +Hello, + +I am encountering one or more bugs when using -icount and -smp >1 that I am +attempting to sort out. My current theory is that it is an iothread locking +issue. + +I am using a command-line like the following where $kernel is a recent upstream +AArch64 Linux kernel Image (I can provide a binary if that would be helpful - +let me know how is best to post): + + qemu-system-aarch64 \ + -M virt -cpu cortex-a57 -m 1G \ + -nographic \ + -smp 2 \ + -icount 0 \ + -kernel $kernel + +For any/all of the symptoms described below, they seem to disappear when I +either remove `-icount 0` or change smp to `-smp 1`. In other words, it is the +combination of `-smp >1` and `-icount` which triggers what I'm seeing. + +I am seeing two different (but seemingly related) behaviors. The first (and +what I originally started debugging) shows up as a boot hang. When booting +using the above command after Peter's "icount: Take iothread lock when running +QEMU timers" patch [1], The kernel boots for a while and then hangs after: + +> +...snip... +> +[ 0.010764] Serial: AMBA PL011 UART driver +> +[ 0.016334] 9000000.pl011: ttyAMA0 at MMIO 0x9000000 (irq = 13, base_baud +> += 0) is a PL011 rev1 +> +[ 0.016907] printk: console [ttyAMA0] enabled +> +[ 0.017624] KASLR enabled +> +[ 0.031986] HugeTLB: registered 16.0 GiB page size, pre-allocated 0 pages +> +[ 0.031986] HugeTLB: 16320 KiB vmemmap can be freed for a 16.0 GiB page +> +[ 0.031986] HugeTLB: registered 512 MiB page size, pre-allocated 0 pages +> +[ 0.031986] HugeTLB: 448 KiB vmemmap can be freed for a 512 MiB page +> +[ 0.031986] HugeTLB: registered 2.00 MiB page size, pre-allocated 0 pages +> +[ 0.031986] HugeTLB: 0 KiB vmemmap can be freed for a 2.00 MiB page +When it hangs here, I drop into QEMU's console, attach to the gdbserver, and it +always reports that it is at address 0xffff800008dc42e8 (as shown below from an +objdump of the vmlinux). I note this is in the middle of messing with timer +system registers - which makes me suspect we're attempting to take the iothread +lock when its already held: + +> +ffff800008dc42b8 <arch_timer_set_next_event_virt>: +> +ffff800008dc42b8: d503201f nop +> +ffff800008dc42bc: d503201f nop +> +ffff800008dc42c0: d503233f paciasp +> +ffff800008dc42c4: d53be321 mrs x1, cntv_ctl_el0 +> +ffff800008dc42c8: 32000021 orr w1, w1, #0x1 +> +ffff800008dc42cc: d5033fdf isb +> +ffff800008dc42d0: d53be042 mrs x2, cntvct_el0 +> +ffff800008dc42d4: ca020043 eor x3, x2, x2 +> +ffff800008dc42d8: 8b2363e3 add x3, sp, x3 +> +ffff800008dc42dc: f940007f ldr xzr, [x3] +> +ffff800008dc42e0: 8b020000 add x0, x0, x2 +> +ffff800008dc42e4: d51be340 msr cntv_cval_el0, x0 +> +* ffff800008dc42e8: 927ef820 and x0, x1, #0xfffffffffffffffd +> +ffff800008dc42ec: d51be320 msr cntv_ctl_el0, x0 +> +ffff800008dc42f0: d5033fdf isb +> +ffff800008dc42f4: 52800000 mov w0, #0x0 +> +// #0 +> +ffff800008dc42f8: d50323bf autiasp +> +ffff800008dc42fc: d65f03c0 ret +The second behavior is that prior to Peter's "icount: Take iothread lock when +running QEMU timers" patch [1], I observe the following message (same command +as above): + +> +ERROR:../accel/tcg/tcg-accel-ops.c:79:tcg_handle_interrupt: assertion failed: +> +(qemu_mutex_iothread_locked()) +> +Aborted (core dumped) +This is the same behavior described in Gitlab issue 1130 [0] and addressed by +[1]. I bisected the appearance of this assertion, and found it was introduced +by Pavel's "replay: rewrite async event handling" commit [2]. Commits prior to +that one boot successfully (neither assertions nor hangs) with `-icount 0 -smp +2`. + +I've looked over these two commits ([1], [2]), but it is not obvious to me +how/why they might be interacting to produce the boot hangs I'm seeing and +I welcome any help investigating further. + +Thanks! + +-Aaron Lindsay + +[0] - +https://gitlab.com/qemu-project/qemu/-/issues/1130 +[1] - +https://gitlab.com/qemu-project/qemu/-/commit/c7f26ded6d5065e4116f630f6a490b55f6c5f58e +[2] - +https://gitlab.com/qemu-project/qemu/-/commit/60618e2d77691e44bb78e23b2b0cf07b5c405e56 + +On Fri, 21 Oct 2022 at 16:48, Aaron Lindsay +<aaron@os.amperecomputing.com> wrote: +> +> +Hello, +> +> +I am encountering one or more bugs when using -icount and -smp >1 that I am +> +attempting to sort out. My current theory is that it is an iothread locking +> +issue. +Weird coincidence, that is a bug that's been in the tree for months +but was only reported to me earlier this week. Try reverting +commit a82fd5a4ec24d923ff1e -- that should fix it. +CAFEAcA_i8x00hD-4XX18ySLNbCB6ds1-DSazVb4yDnF8skjd9A@mail.gmail.com +/">https://lore.kernel.org/qemu-devel/ +CAFEAcA_i8x00hD-4XX18ySLNbCB6ds1-DSazVb4yDnF8skjd9A@mail.gmail.com +/ +has the explanation. + +thanks +-- PMM + +On Oct 21 17:00, Peter Maydell wrote: +> +On Fri, 21 Oct 2022 at 16:48, Aaron Lindsay +> +<aaron@os.amperecomputing.com> wrote: +> +> +> +> Hello, +> +> +> +> I am encountering one or more bugs when using -icount and -smp >1 that I am +> +> attempting to sort out. My current theory is that it is an iothread locking +> +> issue. +> +> +Weird coincidence, that is a bug that's been in the tree for months +> +but was only reported to me earlier this week. Try reverting +> +commit a82fd5a4ec24d923ff1e -- that should fix it. +I can confirm that reverting a82fd5a4ec24d923ff1e fixes it for me. +Thanks for the help and fast response! + +-Aaron + diff --git a/results/classifier/016/debug/55247116 b/results/classifier/016/debug/55247116 new file mode 100644 index 00000000..169b851f --- /dev/null +++ b/results/classifier/016/debug/55247116 @@ -0,0 +1,1337 @@ +debug: 0.906 +virtual: 0.351 +register: 0.235 +x86: 0.100 +network: 0.086 +TCG: 0.069 +files: 0.067 +performance: 0.064 +hypervisor: 0.058 +operating system: 0.053 +i386: 0.035 +ppc: 0.033 +kernel: 0.032 +PID: 0.031 +alpha: 0.031 +arm: 0.018 +socket: 0.017 +semantic: 0.016 +architecture: 0.016 +boot: 0.015 +VMM: 0.015 +user-level: 0.012 +assembly: 0.011 +device: 0.011 +KVM: 0.010 +permissions: 0.009 +vnc: 0.009 +risc-v: 0.005 +peripherals: 0.005 +graphic: 0.003 +mistranslation: 0.001 + +[Qemu-devel] [RFC/BUG] xen-mapcache: buggy invalidate map cache? + +Hi, + +In xen_map_cache_unlocked(), map to guest memory maybe in entry->next +instead of first level entry (if map to rom other than guest memory +comes first), while in xen_invalidate_map_cache(), when VM ballooned +out memory, qemu did not invalidate cache entries in linked +list(entry->next), so when VM balloon back in memory, gfns probably +mapped to different mfns, thus if guest asks device to DMA to these +GPA, qemu may DMA to stale MFNs. + +So I think in xen_invalidate_map_cache() linked lists should also be +checked and invalidated. + +Whatâs your opinion? Is this a bug? Is my analyze correct? + +On Sun, Apr 9, 2017 at 11:52 PM, hrg <address@hidden> wrote: +> +Hi, +> +> +In xen_map_cache_unlocked(), map to guest memory maybe in entry->next +> +instead of first level entry (if map to rom other than guest memory +> +comes first), while in xen_invalidate_map_cache(), when VM ballooned +> +out memory, qemu did not invalidate cache entries in linked +> +list(entry->next), so when VM balloon back in memory, gfns probably +> +mapped to different mfns, thus if guest asks device to DMA to these +> +GPA, qemu may DMA to stale MFNs. +> +> +So I think in xen_invalidate_map_cache() linked lists should also be +> +checked and invalidated. +> +> +Whatâs your opinion? Is this a bug? Is my analyze correct? +Added Jun Nakajima and Alexander Graf + +On Sun, Apr 9, 2017 at 11:55 PM, hrg <address@hidden> wrote: +> +On Sun, Apr 9, 2017 at 11:52 PM, hrg <address@hidden> wrote: +> +> Hi, +> +> +> +> In xen_map_cache_unlocked(), map to guest memory maybe in entry->next +> +> instead of first level entry (if map to rom other than guest memory +> +> comes first), while in xen_invalidate_map_cache(), when VM ballooned +> +> out memory, qemu did not invalidate cache entries in linked +> +> list(entry->next), so when VM balloon back in memory, gfns probably +> +> mapped to different mfns, thus if guest asks device to DMA to these +> +> GPA, qemu may DMA to stale MFNs. +> +> +> +> So I think in xen_invalidate_map_cache() linked lists should also be +> +> checked and invalidated. +> +> +> +> Whatâs your opinion? Is this a bug? Is my analyze correct? +> +> +Added Jun Nakajima and Alexander Graf +And correct Stefano Stabellini's email address. + +On Mon, 10 Apr 2017 00:36:02 +0800 +hrg <address@hidden> wrote: + +Hi, + +> +On Sun, Apr 9, 2017 at 11:55 PM, hrg <address@hidden> wrote: +> +> On Sun, Apr 9, 2017 at 11:52 PM, hrg <address@hidden> wrote: +> +>> Hi, +> +>> +> +>> In xen_map_cache_unlocked(), map to guest memory maybe in entry->next +> +>> instead of first level entry (if map to rom other than guest memory +> +>> comes first), while in xen_invalidate_map_cache(), when VM ballooned +> +>> out memory, qemu did not invalidate cache entries in linked +> +>> list(entry->next), so when VM balloon back in memory, gfns probably +> +>> mapped to different mfns, thus if guest asks device to DMA to these +> +>> GPA, qemu may DMA to stale MFNs. +> +>> +> +>> So I think in xen_invalidate_map_cache() linked lists should also be +> +>> checked and invalidated. +> +>> +> +>> Whatâs your opinion? Is this a bug? Is my analyze correct? +> +> +> +> Added Jun Nakajima and Alexander Graf +> +And correct Stefano Stabellini's email address. +There is a real issue with the xen-mapcache corruption in fact. I encountered +it a few months ago while experimenting with Q35 support on Xen. Q35 emulation +uses an AHCI controller by default, along with NCQ mode enabled. The issue can +be (somewhat) easily reproduced there, though using a normal i440 emulation +might possibly allow to reproduce the issue as well, using a dedicated test +code from a guest side. In case of Q35+NCQ the issue can be reproduced "as is". + +The issue occurs when a guest domain performs an intensive disk I/O, ex. while +guest OS booting. QEMU crashes with "Bad ram offset 980aa000" +message logged, where the address is different each time. The hard thing with +this issue is that it has a very low reproducibility rate. + +The corruption happens when there are multiple I/O commands in the NCQ queue. +So there are overlapping emulated DMA operations in flight and QEMU uses a +sequence of mapcache actions which can be executed in the "wrong" order thus +leading to an inconsistent xen-mapcache - so a bad address from the wrong +entry is returned. + +The bad thing with this issue is that QEMU crash due to "Bad ram offset" +appearance is a relatively good situation in the sense that this is a caught +error. But there might be a much worse (artificial) situation where the returned +address looks valid but points to a different mapped memory. + +The fix itself is not hard (ex. an additional checked field in MapCacheEntry), +but there is a need of some reliable way to test it considering the low +reproducibility rate. + +Regards, +Alex + +On Mon, 10 Apr 2017, hrg wrote: +> +On Sun, Apr 9, 2017 at 11:55 PM, hrg <address@hidden> wrote: +> +> On Sun, Apr 9, 2017 at 11:52 PM, hrg <address@hidden> wrote: +> +>> Hi, +> +>> +> +>> In xen_map_cache_unlocked(), map to guest memory maybe in entry->next +> +>> instead of first level entry (if map to rom other than guest memory +> +>> comes first), while in xen_invalidate_map_cache(), when VM ballooned +> +>> out memory, qemu did not invalidate cache entries in linked +> +>> list(entry->next), so when VM balloon back in memory, gfns probably +> +>> mapped to different mfns, thus if guest asks device to DMA to these +> +>> GPA, qemu may DMA to stale MFNs. +> +>> +> +>> So I think in xen_invalidate_map_cache() linked lists should also be +> +>> checked and invalidated. +> +>> +> +>> Whatâs your opinion? Is this a bug? Is my analyze correct? +Yes, you are right. We need to go through the list for each element of +the array in xen_invalidate_map_cache. Can you come up with a patch? + +On Mon, 10 Apr 2017, Stefano Stabellini wrote: +> +On Mon, 10 Apr 2017, hrg wrote: +> +> On Sun, Apr 9, 2017 at 11:55 PM, hrg <address@hidden> wrote: +> +> > On Sun, Apr 9, 2017 at 11:52 PM, hrg <address@hidden> wrote: +> +> >> Hi, +> +> >> +> +> >> In xen_map_cache_unlocked(), map to guest memory maybe in entry->next +> +> >> instead of first level entry (if map to rom other than guest memory +> +> >> comes first), while in xen_invalidate_map_cache(), when VM ballooned +> +> >> out memory, qemu did not invalidate cache entries in linked +> +> >> list(entry->next), so when VM balloon back in memory, gfns probably +> +> >> mapped to different mfns, thus if guest asks device to DMA to these +> +> >> GPA, qemu may DMA to stale MFNs. +> +> >> +> +> >> So I think in xen_invalidate_map_cache() linked lists should also be +> +> >> checked and invalidated. +> +> >> +> +> >> Whatâs your opinion? Is this a bug? Is my analyze correct? +> +> +Yes, you are right. We need to go through the list for each element of +> +the array in xen_invalidate_map_cache. Can you come up with a patch? +I spoke too soon. In the regular case there should be no locked mappings +when xen_invalidate_map_cache is called (see the DPRINTF warning at the +beginning of the functions). Without locked mappings, there should never +be more than one element in each list (see xen_map_cache_unlocked: +entry->lock == true is a necessary condition to append a new entry to +the list, otherwise it is just remapped). + +Can you confirm that what you are seeing are locked mappings +when xen_invalidate_map_cache is called? To find out, enable the DPRINTK +by turning it into a printf or by defininig MAPCACHE_DEBUG. + +On Tue, Apr 11, 2017 at 3:50 AM, Stefano Stabellini +<address@hidden> wrote: +> +On Mon, 10 Apr 2017, Stefano Stabellini wrote: +> +> On Mon, 10 Apr 2017, hrg wrote: +> +> > On Sun, Apr 9, 2017 at 11:55 PM, hrg <address@hidden> wrote: +> +> > > On Sun, Apr 9, 2017 at 11:52 PM, hrg <address@hidden> wrote: +> +> > >> Hi, +> +> > >> +> +> > >> In xen_map_cache_unlocked(), map to guest memory maybe in entry->next +> +> > >> instead of first level entry (if map to rom other than guest memory +> +> > >> comes first), while in xen_invalidate_map_cache(), when VM ballooned +> +> > >> out memory, qemu did not invalidate cache entries in linked +> +> > >> list(entry->next), so when VM balloon back in memory, gfns probably +> +> > >> mapped to different mfns, thus if guest asks device to DMA to these +> +> > >> GPA, qemu may DMA to stale MFNs. +> +> > >> +> +> > >> So I think in xen_invalidate_map_cache() linked lists should also be +> +> > >> checked and invalidated. +> +> > >> +> +> > >> Whatâs your opinion? Is this a bug? Is my analyze correct? +> +> +> +> Yes, you are right. We need to go through the list for each element of +> +> the array in xen_invalidate_map_cache. Can you come up with a patch? +> +> +I spoke too soon. In the regular case there should be no locked mappings +> +when xen_invalidate_map_cache is called (see the DPRINTF warning at the +> +beginning of the functions). Without locked mappings, there should never +> +be more than one element in each list (see xen_map_cache_unlocked: +> +entry->lock == true is a necessary condition to append a new entry to +> +the list, otherwise it is just remapped). +> +> +Can you confirm that what you are seeing are locked mappings +> +when xen_invalidate_map_cache is called? To find out, enable the DPRINTK +> +by turning it into a printf or by defininig MAPCACHE_DEBUG. +In fact, I think the DPRINTF above is incorrect too. In +pci_add_option_rom(), rtl8139 rom is locked mapped in +pci_add_option_rom->memory_region_get_ram_ptr (after +memory_region_init_ram). So actually I think we should remove the +DPRINTF warning as it is normal. + +On Tue, 11 Apr 2017, hrg wrote: +> +On Tue, Apr 11, 2017 at 3:50 AM, Stefano Stabellini +> +<address@hidden> wrote: +> +> On Mon, 10 Apr 2017, Stefano Stabellini wrote: +> +>> On Mon, 10 Apr 2017, hrg wrote: +> +>> > On Sun, Apr 9, 2017 at 11:55 PM, hrg <address@hidden> wrote: +> +>> > > On Sun, Apr 9, 2017 at 11:52 PM, hrg <address@hidden> wrote: +> +>> > >> Hi, +> +>> > >> +> +>> > >> In xen_map_cache_unlocked(), map to guest memory maybe in entry->next +> +>> > >> instead of first level entry (if map to rom other than guest memory +> +>> > >> comes first), while in xen_invalidate_map_cache(), when VM ballooned +> +>> > >> out memory, qemu did not invalidate cache entries in linked +> +>> > >> list(entry->next), so when VM balloon back in memory, gfns probably +> +>> > >> mapped to different mfns, thus if guest asks device to DMA to these +> +>> > >> GPA, qemu may DMA to stale MFNs. +> +>> > >> +> +>> > >> So I think in xen_invalidate_map_cache() linked lists should also be +> +>> > >> checked and invalidated. +> +>> > >> +> +>> > >> Whatâs your opinion? Is this a bug? Is my analyze correct? +> +>> +> +>> Yes, you are right. We need to go through the list for each element of +> +>> the array in xen_invalidate_map_cache. Can you come up with a patch? +> +> +> +> I spoke too soon. In the regular case there should be no locked mappings +> +> when xen_invalidate_map_cache is called (see the DPRINTF warning at the +> +> beginning of the functions). Without locked mappings, there should never +> +> be more than one element in each list (see xen_map_cache_unlocked: +> +> entry->lock == true is a necessary condition to append a new entry to +> +> the list, otherwise it is just remapped). +> +> +> +> Can you confirm that what you are seeing are locked mappings +> +> when xen_invalidate_map_cache is called? To find out, enable the DPRINTK +> +> by turning it into a printf or by defininig MAPCACHE_DEBUG. +> +> +In fact, I think the DPRINTF above is incorrect too. In +> +pci_add_option_rom(), rtl8139 rom is locked mapped in +> +pci_add_option_rom->memory_region_get_ram_ptr (after +> +memory_region_init_ram). So actually I think we should remove the +> +DPRINTF warning as it is normal. +Let me explain why the DPRINTF warning is there: emulated dma operations +can involve locked mappings. Once a dma operation completes, the related +mapping is unlocked and can be safely destroyed. But if we destroy a +locked mapping in xen_invalidate_map_cache, while a dma is still +ongoing, QEMU will crash. We cannot handle that case. + +However, the scenario you described is different. It has nothing to do +with DMA. It looks like pci_add_option_rom calls +memory_region_get_ram_ptr to map the rtl8139 rom. The mapping is a +locked mapping and it is never unlocked or destroyed. + +It looks like "ptr" is not used after pci_add_option_rom returns. Does +the append patch fix the problem you are seeing? For the proper fix, I +think we probably need some sort of memory_region_unmap wrapper or maybe +a call to address_space_unmap. + + +diff --git a/hw/pci/pci.c b/hw/pci/pci.c +index e6b08e1..04f98b7 100644 +--- a/hw/pci/pci.c ++++ b/hw/pci/pci.c +@@ -2242,6 +2242,7 @@ static void pci_add_option_rom(PCIDevice *pdev, bool +is_default_rom, + } + + pci_register_bar(pdev, PCI_ROM_SLOT, 0, &pdev->rom); ++ xen_invalidate_map_cache_entry(ptr); + } + + static void pci_del_option_rom(PCIDevice *pdev) + +On Tue, 11 Apr 2017 15:32:09 -0700 (PDT) +Stefano Stabellini <address@hidden> wrote: + +> +On Tue, 11 Apr 2017, hrg wrote: +> +> On Tue, Apr 11, 2017 at 3:50 AM, Stefano Stabellini +> +> <address@hidden> wrote: +> +> > On Mon, 10 Apr 2017, Stefano Stabellini wrote: +> +> >> On Mon, 10 Apr 2017, hrg wrote: +> +> >> > On Sun, Apr 9, 2017 at 11:55 PM, hrg <address@hidden> wrote: +> +> >> > > On Sun, Apr 9, 2017 at 11:52 PM, hrg <address@hidden> wrote: +> +> >> > >> Hi, +> +> >> > >> +> +> >> > >> In xen_map_cache_unlocked(), map to guest memory maybe in +> +> >> > >> entry->next instead of first level entry (if map to rom other than +> +> >> > >> guest memory comes first), while in xen_invalidate_map_cache(), +> +> >> > >> when VM ballooned out memory, qemu did not invalidate cache entries +> +> >> > >> in linked list(entry->next), so when VM balloon back in memory, +> +> >> > >> gfns probably mapped to different mfns, thus if guest asks device +> +> >> > >> to DMA to these GPA, qemu may DMA to stale MFNs. +> +> >> > >> +> +> >> > >> So I think in xen_invalidate_map_cache() linked lists should also be +> +> >> > >> checked and invalidated. +> +> >> > >> +> +> >> > >> Whatâs your opinion? Is this a bug? Is my analyze correct? +> +> >> +> +> >> Yes, you are right. We need to go through the list for each element of +> +> >> the array in xen_invalidate_map_cache. Can you come up with a patch? +> +> > +> +> > I spoke too soon. In the regular case there should be no locked mappings +> +> > when xen_invalidate_map_cache is called (see the DPRINTF warning at the +> +> > beginning of the functions). Without locked mappings, there should never +> +> > be more than one element in each list (see xen_map_cache_unlocked: +> +> > entry->lock == true is a necessary condition to append a new entry to +> +> > the list, otherwise it is just remapped). +> +> > +> +> > Can you confirm that what you are seeing are locked mappings +> +> > when xen_invalidate_map_cache is called? To find out, enable the DPRINTK +> +> > by turning it into a printf or by defininig MAPCACHE_DEBUG. +> +> +> +> In fact, I think the DPRINTF above is incorrect too. In +> +> pci_add_option_rom(), rtl8139 rom is locked mapped in +> +> pci_add_option_rom->memory_region_get_ram_ptr (after +> +> memory_region_init_ram). So actually I think we should remove the +> +> DPRINTF warning as it is normal. +> +> +Let me explain why the DPRINTF warning is there: emulated dma operations +> +can involve locked mappings. Once a dma operation completes, the related +> +mapping is unlocked and can be safely destroyed. But if we destroy a +> +locked mapping in xen_invalidate_map_cache, while a dma is still +> +ongoing, QEMU will crash. We cannot handle that case. +> +> +However, the scenario you described is different. It has nothing to do +> +with DMA. It looks like pci_add_option_rom calls +> +memory_region_get_ram_ptr to map the rtl8139 rom. The mapping is a +> +locked mapping and it is never unlocked or destroyed. +> +> +It looks like "ptr" is not used after pci_add_option_rom returns. Does +> +the append patch fix the problem you are seeing? For the proper fix, I +> +think we probably need some sort of memory_region_unmap wrapper or maybe +> +a call to address_space_unmap. +Hmm, for some reason my message to the Xen-devel list got rejected but was sent +to Qemu-devel instead, without any notice. Sorry if I'm missing something +obvious as a list newbie. + +Stefano, hrg, + +There is an issue with inconsistency between the list of normal MapCacheEntry's +and their 'reverse' counterparts - MapCacheRev's in locked_entries. +When bad situation happens, there are multiple (locked) MapCacheEntry +entries in the bucket's linked list along with a number of MapCacheRev's. And +when it comes to a reverse lookup, xen-mapcache picks the wrong entry from the +first list and calculates a wrong pointer from it which may then be caught with +the "Bad RAM offset" check (or not). Mapcache invalidation might be related to +this issue as well I think. + +I'll try to provide a test code which can reproduce the issue from the +guest side using an emulated IDE controller, though it's much simpler to achieve +this result with an AHCI controller using multiple NCQ I/O commands. So far I've +seen this issue only with Windows 7 (and above) guest on AHCI, but any block I/O +DMA should be enough I think. + +On 2017/4/12 14:17, Alexey G wrote: +On Tue, 11 Apr 2017 15:32:09 -0700 (PDT) +Stefano Stabellini <address@hidden> wrote: +On Tue, 11 Apr 2017, hrg wrote: +On Tue, Apr 11, 2017 at 3:50 AM, Stefano Stabellini +<address@hidden> wrote: +On Mon, 10 Apr 2017, Stefano Stabellini wrote: +On Mon, 10 Apr 2017, hrg wrote: +On Sun, Apr 9, 2017 at 11:55 PM, hrg <address@hidden> wrote: +On Sun, Apr 9, 2017 at 11:52 PM, hrg <address@hidden> wrote: +Hi, + +In xen_map_cache_unlocked(), map to guest memory maybe in +entry->next instead of first level entry (if map to rom other than +guest memory comes first), while in xen_invalidate_map_cache(), +when VM ballooned out memory, qemu did not invalidate cache entries +in linked list(entry->next), so when VM balloon back in memory, +gfns probably mapped to different mfns, thus if guest asks device +to DMA to these GPA, qemu may DMA to stale MFNs. + +So I think in xen_invalidate_map_cache() linked lists should also be +checked and invalidated. + +Whatâs your opinion? Is this a bug? Is my analyze correct? +Yes, you are right. We need to go through the list for each element of +the array in xen_invalidate_map_cache. Can you come up with a patch? +I spoke too soon. In the regular case there should be no locked mappings +when xen_invalidate_map_cache is called (see the DPRINTF warning at the +beginning of the functions). Without locked mappings, there should never +be more than one element in each list (see xen_map_cache_unlocked: +entry->lock == true is a necessary condition to append a new entry to +the list, otherwise it is just remapped). + +Can you confirm that what you are seeing are locked mappings +when xen_invalidate_map_cache is called? To find out, enable the DPRINTK +by turning it into a printf or by defininig MAPCACHE_DEBUG. +In fact, I think the DPRINTF above is incorrect too. In +pci_add_option_rom(), rtl8139 rom is locked mapped in +pci_add_option_rom->memory_region_get_ram_ptr (after +memory_region_init_ram). So actually I think we should remove the +DPRINTF warning as it is normal. +Let me explain why the DPRINTF warning is there: emulated dma operations +can involve locked mappings. Once a dma operation completes, the related +mapping is unlocked and can be safely destroyed. But if we destroy a +locked mapping in xen_invalidate_map_cache, while a dma is still +ongoing, QEMU will crash. We cannot handle that case. + +However, the scenario you described is different. It has nothing to do +with DMA. It looks like pci_add_option_rom calls +memory_region_get_ram_ptr to map the rtl8139 rom. The mapping is a +locked mapping and it is never unlocked or destroyed. + +It looks like "ptr" is not used after pci_add_option_rom returns. Does +the append patch fix the problem you are seeing? For the proper fix, I +think we probably need some sort of memory_region_unmap wrapper or maybe +a call to address_space_unmap. +Hmm, for some reason my message to the Xen-devel list got rejected but was sent +to Qemu-devel instead, without any notice. Sorry if I'm missing something +obvious as a list newbie. + +Stefano, hrg, + +There is an issue with inconsistency between the list of normal MapCacheEntry's +and their 'reverse' counterparts - MapCacheRev's in locked_entries. +When bad situation happens, there are multiple (locked) MapCacheEntry +entries in the bucket's linked list along with a number of MapCacheRev's. And +when it comes to a reverse lookup, xen-mapcache picks the wrong entry from the +first list and calculates a wrong pointer from it which may then be caught with +the "Bad RAM offset" check (or not). Mapcache invalidation might be related to +this issue as well I think. + +I'll try to provide a test code which can reproduce the issue from the +guest side using an emulated IDE controller, though it's much simpler to achieve +this result with an AHCI controller using multiple NCQ I/O commands. So far I've +seen this issue only with Windows 7 (and above) guest on AHCI, but any block I/O +DMA should be enough I think. +Yes, I think there may be other bugs lurking, considering the complexity, +though we need to reproduce it if we want to delve into it. + +On Wed, 12 Apr 2017, Alexey G wrote: +> +On Tue, 11 Apr 2017 15:32:09 -0700 (PDT) +> +Stefano Stabellini <address@hidden> wrote: +> +> +> On Tue, 11 Apr 2017, hrg wrote: +> +> > On Tue, Apr 11, 2017 at 3:50 AM, Stefano Stabellini +> +> > <address@hidden> wrote: +> +> > > On Mon, 10 Apr 2017, Stefano Stabellini wrote: +> +> > >> On Mon, 10 Apr 2017, hrg wrote: +> +> > >> > On Sun, Apr 9, 2017 at 11:55 PM, hrg <address@hidden> wrote: +> +> > >> > > On Sun, Apr 9, 2017 at 11:52 PM, hrg <address@hidden> wrote: +> +> > >> > >> Hi, +> +> > >> > >> +> +> > >> > >> In xen_map_cache_unlocked(), map to guest memory maybe in +> +> > >> > >> entry->next instead of first level entry (if map to rom other than +> +> > >> > >> guest memory comes first), while in xen_invalidate_map_cache(), +> +> > >> > >> when VM ballooned out memory, qemu did not invalidate cache +> +> > >> > >> entries +> +> > >> > >> in linked list(entry->next), so when VM balloon back in memory, +> +> > >> > >> gfns probably mapped to different mfns, thus if guest asks device +> +> > >> > >> to DMA to these GPA, qemu may DMA to stale MFNs. +> +> > >> > >> +> +> > >> > >> So I think in xen_invalidate_map_cache() linked lists should also +> +> > >> > >> be +> +> > >> > >> checked and invalidated. +> +> > >> > >> +> +> > >> > >> Whatâs your opinion? Is this a bug? Is my analyze correct? +> +> > >> +> +> > >> Yes, you are right. We need to go through the list for each element of +> +> > >> the array in xen_invalidate_map_cache. Can you come up with a patch? +> +> > > +> +> > > I spoke too soon. In the regular case there should be no locked mappings +> +> > > when xen_invalidate_map_cache is called (see the DPRINTF warning at the +> +> > > beginning of the functions). Without locked mappings, there should never +> +> > > be more than one element in each list (see xen_map_cache_unlocked: +> +> > > entry->lock == true is a necessary condition to append a new entry to +> +> > > the list, otherwise it is just remapped). +> +> > > +> +> > > Can you confirm that what you are seeing are locked mappings +> +> > > when xen_invalidate_map_cache is called? To find out, enable the DPRINTK +> +> > > by turning it into a printf or by defininig MAPCACHE_DEBUG. +> +> > +> +> > In fact, I think the DPRINTF above is incorrect too. In +> +> > pci_add_option_rom(), rtl8139 rom is locked mapped in +> +> > pci_add_option_rom->memory_region_get_ram_ptr (after +> +> > memory_region_init_ram). So actually I think we should remove the +> +> > DPRINTF warning as it is normal. +> +> +> +> Let me explain why the DPRINTF warning is there: emulated dma operations +> +> can involve locked mappings. Once a dma operation completes, the related +> +> mapping is unlocked and can be safely destroyed. But if we destroy a +> +> locked mapping in xen_invalidate_map_cache, while a dma is still +> +> ongoing, QEMU will crash. We cannot handle that case. +> +> +> +> However, the scenario you described is different. It has nothing to do +> +> with DMA. It looks like pci_add_option_rom calls +> +> memory_region_get_ram_ptr to map the rtl8139 rom. The mapping is a +> +> locked mapping and it is never unlocked or destroyed. +> +> +> +> It looks like "ptr" is not used after pci_add_option_rom returns. Does +> +> the append patch fix the problem you are seeing? For the proper fix, I +> +> think we probably need some sort of memory_region_unmap wrapper or maybe +> +> a call to address_space_unmap. +> +> +Hmm, for some reason my message to the Xen-devel list got rejected but was +> +sent +> +to Qemu-devel instead, without any notice. Sorry if I'm missing something +> +obvious as a list newbie. +> +> +Stefano, hrg, +> +> +There is an issue with inconsistency between the list of normal +> +MapCacheEntry's +> +and their 'reverse' counterparts - MapCacheRev's in locked_entries. +> +When bad situation happens, there are multiple (locked) MapCacheEntry +> +entries in the bucket's linked list along with a number of MapCacheRev's. And +> +when it comes to a reverse lookup, xen-mapcache picks the wrong entry from the +> +first list and calculates a wrong pointer from it which may then be caught +> +with +> +the "Bad RAM offset" check (or not). Mapcache invalidation might be related to +> +this issue as well I think. +> +> +I'll try to provide a test code which can reproduce the issue from the +> +guest side using an emulated IDE controller, though it's much simpler to +> +achieve +> +this result with an AHCI controller using multiple NCQ I/O commands. So far +> +I've +> +seen this issue only with Windows 7 (and above) guest on AHCI, but any block +> +I/O +> +DMA should be enough I think. +That would be helpful. Please see if you can reproduce it after fixing +the other issue ( +http://marc.info/?l=qemu-devel&m=149195042500707&w=2 +). + +On 2017/4/12 6:32, Stefano Stabellini wrote: +On Tue, 11 Apr 2017, hrg wrote: +On Tue, Apr 11, 2017 at 3:50 AM, Stefano Stabellini +<address@hidden> wrote: +On Mon, 10 Apr 2017, Stefano Stabellini wrote: +On Mon, 10 Apr 2017, hrg wrote: +On Sun, Apr 9, 2017 at 11:55 PM, hrg <address@hidden> wrote: +On Sun, Apr 9, 2017 at 11:52 PM, hrg <address@hidden> wrote: +Hi, + +In xen_map_cache_unlocked(), map to guest memory maybe in entry->next +instead of first level entry (if map to rom other than guest memory +comes first), while in xen_invalidate_map_cache(), when VM ballooned +out memory, qemu did not invalidate cache entries in linked +list(entry->next), so when VM balloon back in memory, gfns probably +mapped to different mfns, thus if guest asks device to DMA to these +GPA, qemu may DMA to stale MFNs. + +So I think in xen_invalidate_map_cache() linked lists should also be +checked and invalidated. + +Whatâs your opinion? Is this a bug? Is my analyze correct? +Yes, you are right. We need to go through the list for each element of +the array in xen_invalidate_map_cache. Can you come up with a patch? +I spoke too soon. In the regular case there should be no locked mappings +when xen_invalidate_map_cache is called (see the DPRINTF warning at the +beginning of the functions). Without locked mappings, there should never +be more than one element in each list (see xen_map_cache_unlocked: +entry->lock == true is a necessary condition to append a new entry to +the list, otherwise it is just remapped). + +Can you confirm that what you are seeing are locked mappings +when xen_invalidate_map_cache is called? To find out, enable the DPRINTK +by turning it into a printf or by defininig MAPCACHE_DEBUG. +In fact, I think the DPRINTF above is incorrect too. In +pci_add_option_rom(), rtl8139 rom is locked mapped in +pci_add_option_rom->memory_region_get_ram_ptr (after +memory_region_init_ram). So actually I think we should remove the +DPRINTF warning as it is normal. +Let me explain why the DPRINTF warning is there: emulated dma operations +can involve locked mappings. Once a dma operation completes, the related +mapping is unlocked and can be safely destroyed. But if we destroy a +locked mapping in xen_invalidate_map_cache, while a dma is still +ongoing, QEMU will crash. We cannot handle that case. + +However, the scenario you described is different. It has nothing to do +with DMA. It looks like pci_add_option_rom calls +memory_region_get_ram_ptr to map the rtl8139 rom. The mapping is a +locked mapping and it is never unlocked or destroyed. + +It looks like "ptr" is not used after pci_add_option_rom returns. Does +the append patch fix the problem you are seeing? For the proper fix, I +think we probably need some sort of memory_region_unmap wrapper or maybe +a call to address_space_unmap. +Yes, I think so, maybe this is the proper way to fix this. +diff --git a/hw/pci/pci.c b/hw/pci/pci.c +index e6b08e1..04f98b7 100644 +--- a/hw/pci/pci.c ++++ b/hw/pci/pci.c +@@ -2242,6 +2242,7 @@ static void pci_add_option_rom(PCIDevice *pdev, bool +is_default_rom, + } +pci_register_bar(pdev, PCI_ROM_SLOT, 0, &pdev->rom); ++ xen_invalidate_map_cache_entry(ptr); + } +static void pci_del_option_rom(PCIDevice *pdev) + +On Wed, 12 Apr 2017, Herongguang (Stephen) wrote: +> +On 2017/4/12 6:32, Stefano Stabellini wrote: +> +> On Tue, 11 Apr 2017, hrg wrote: +> +> > On Tue, Apr 11, 2017 at 3:50 AM, Stefano Stabellini +> +> > <address@hidden> wrote: +> +> > > On Mon, 10 Apr 2017, Stefano Stabellini wrote: +> +> > > > On Mon, 10 Apr 2017, hrg wrote: +> +> > > > > On Sun, Apr 9, 2017 at 11:55 PM, hrg <address@hidden> wrote: +> +> > > > > > On Sun, Apr 9, 2017 at 11:52 PM, hrg <address@hidden> wrote: +> +> > > > > > > Hi, +> +> > > > > > > +> +> > > > > > > In xen_map_cache_unlocked(), map to guest memory maybe in +> +> > > > > > > entry->next +> +> > > > > > > instead of first level entry (if map to rom other than guest +> +> > > > > > > memory +> +> > > > > > > comes first), while in xen_invalidate_map_cache(), when VM +> +> > > > > > > ballooned +> +> > > > > > > out memory, qemu did not invalidate cache entries in linked +> +> > > > > > > list(entry->next), so when VM balloon back in memory, gfns +> +> > > > > > > probably +> +> > > > > > > mapped to different mfns, thus if guest asks device to DMA to +> +> > > > > > > these +> +> > > > > > > GPA, qemu may DMA to stale MFNs. +> +> > > > > > > +> +> > > > > > > So I think in xen_invalidate_map_cache() linked lists should +> +> > > > > > > also be +> +> > > > > > > checked and invalidated. +> +> > > > > > > +> +> > > > > > > Whatâs your opinion? Is this a bug? Is my analyze correct? +> +> > > > Yes, you are right. We need to go through the list for each element of +> +> > > > the array in xen_invalidate_map_cache. Can you come up with a patch? +> +> > > I spoke too soon. In the regular case there should be no locked mappings +> +> > > when xen_invalidate_map_cache is called (see the DPRINTF warning at the +> +> > > beginning of the functions). Without locked mappings, there should never +> +> > > be more than one element in each list (see xen_map_cache_unlocked: +> +> > > entry->lock == true is a necessary condition to append a new entry to +> +> > > the list, otherwise it is just remapped). +> +> > > +> +> > > Can you confirm that what you are seeing are locked mappings +> +> > > when xen_invalidate_map_cache is called? To find out, enable the DPRINTK +> +> > > by turning it into a printf or by defininig MAPCACHE_DEBUG. +> +> > In fact, I think the DPRINTF above is incorrect too. In +> +> > pci_add_option_rom(), rtl8139 rom is locked mapped in +> +> > pci_add_option_rom->memory_region_get_ram_ptr (after +> +> > memory_region_init_ram). So actually I think we should remove the +> +> > DPRINTF warning as it is normal. +> +> Let me explain why the DPRINTF warning is there: emulated dma operations +> +> can involve locked mappings. Once a dma operation completes, the related +> +> mapping is unlocked and can be safely destroyed. But if we destroy a +> +> locked mapping in xen_invalidate_map_cache, while a dma is still +> +> ongoing, QEMU will crash. We cannot handle that case. +> +> +> +> However, the scenario you described is different. It has nothing to do +> +> with DMA. It looks like pci_add_option_rom calls +> +> memory_region_get_ram_ptr to map the rtl8139 rom. The mapping is a +> +> locked mapping and it is never unlocked or destroyed. +> +> +> +> It looks like "ptr" is not used after pci_add_option_rom returns. Does +> +> the append patch fix the problem you are seeing? For the proper fix, I +> +> think we probably need some sort of memory_region_unmap wrapper or maybe +> +> a call to address_space_unmap. +> +> +Yes, I think so, maybe this is the proper way to fix this. +Would you be up for sending a proper patch and testing it? We cannot call +xen_invalidate_map_cache_entry directly from pci.c though, it would need +to be one of the other functions like address_space_unmap for example. + + +> +> diff --git a/hw/pci/pci.c b/hw/pci/pci.c +> +> index e6b08e1..04f98b7 100644 +> +> --- a/hw/pci/pci.c +> +> +++ b/hw/pci/pci.c +> +> @@ -2242,6 +2242,7 @@ static void pci_add_option_rom(PCIDevice *pdev, bool +> +> is_default_rom, +> +> } +> +> pci_register_bar(pdev, PCI_ROM_SLOT, 0, &pdev->rom); +> +> + xen_invalidate_map_cache_entry(ptr); +> +> } +> +> static void pci_del_option_rom(PCIDevice *pdev) + +On 2017/4/13 7:51, Stefano Stabellini wrote: +On Wed, 12 Apr 2017, Herongguang (Stephen) wrote: +On 2017/4/12 6:32, Stefano Stabellini wrote: +On Tue, 11 Apr 2017, hrg wrote: +On Tue, Apr 11, 2017 at 3:50 AM, Stefano Stabellini +<address@hidden> wrote: +On Mon, 10 Apr 2017, Stefano Stabellini wrote: +On Mon, 10 Apr 2017, hrg wrote: +On Sun, Apr 9, 2017 at 11:55 PM, hrg <address@hidden> wrote: +On Sun, Apr 9, 2017 at 11:52 PM, hrg <address@hidden> wrote: +Hi, + +In xen_map_cache_unlocked(), map to guest memory maybe in +entry->next +instead of first level entry (if map to rom other than guest +memory +comes first), while in xen_invalidate_map_cache(), when VM +ballooned +out memory, qemu did not invalidate cache entries in linked +list(entry->next), so when VM balloon back in memory, gfns +probably +mapped to different mfns, thus if guest asks device to DMA to +these +GPA, qemu may DMA to stale MFNs. + +So I think in xen_invalidate_map_cache() linked lists should +also be +checked and invalidated. + +Whatâs your opinion? Is this a bug? Is my analyze correct? +Yes, you are right. We need to go through the list for each element of +the array in xen_invalidate_map_cache. Can you come up with a patch? +I spoke too soon. In the regular case there should be no locked mappings +when xen_invalidate_map_cache is called (see the DPRINTF warning at the +beginning of the functions). Without locked mappings, there should never +be more than one element in each list (see xen_map_cache_unlocked: +entry->lock == true is a necessary condition to append a new entry to +the list, otherwise it is just remapped). + +Can you confirm that what you are seeing are locked mappings +when xen_invalidate_map_cache is called? To find out, enable the DPRINTK +by turning it into a printf or by defininig MAPCACHE_DEBUG. +In fact, I think the DPRINTF above is incorrect too. In +pci_add_option_rom(), rtl8139 rom is locked mapped in +pci_add_option_rom->memory_region_get_ram_ptr (after +memory_region_init_ram). So actually I think we should remove the +DPRINTF warning as it is normal. +Let me explain why the DPRINTF warning is there: emulated dma operations +can involve locked mappings. Once a dma operation completes, the related +mapping is unlocked and can be safely destroyed. But if we destroy a +locked mapping in xen_invalidate_map_cache, while a dma is still +ongoing, QEMU will crash. We cannot handle that case. + +However, the scenario you described is different. It has nothing to do +with DMA. It looks like pci_add_option_rom calls +memory_region_get_ram_ptr to map the rtl8139 rom. The mapping is a +locked mapping and it is never unlocked or destroyed. + +It looks like "ptr" is not used after pci_add_option_rom returns. Does +the append patch fix the problem you are seeing? For the proper fix, I +think we probably need some sort of memory_region_unmap wrapper or maybe +a call to address_space_unmap. +Yes, I think so, maybe this is the proper way to fix this. +Would you be up for sending a proper patch and testing it? We cannot call +xen_invalidate_map_cache_entry directly from pci.c though, it would need +to be one of the other functions like address_space_unmap for example. +Yes, I will look into this. +diff --git a/hw/pci/pci.c b/hw/pci/pci.c +index e6b08e1..04f98b7 100644 +--- a/hw/pci/pci.c ++++ b/hw/pci/pci.c +@@ -2242,6 +2242,7 @@ static void pci_add_option_rom(PCIDevice *pdev, bool +is_default_rom, + } + pci_register_bar(pdev, PCI_ROM_SLOT, 0, &pdev->rom); ++ xen_invalidate_map_cache_entry(ptr); + } + static void pci_del_option_rom(PCIDevice *pdev) + +On Thu, 13 Apr 2017, Herongguang (Stephen) wrote: +> +On 2017/4/13 7:51, Stefano Stabellini wrote: +> +> On Wed, 12 Apr 2017, Herongguang (Stephen) wrote: +> +> > On 2017/4/12 6:32, Stefano Stabellini wrote: +> +> > > On Tue, 11 Apr 2017, hrg wrote: +> +> > > > On Tue, Apr 11, 2017 at 3:50 AM, Stefano Stabellini +> +> > > > <address@hidden> wrote: +> +> > > > > On Mon, 10 Apr 2017, Stefano Stabellini wrote: +> +> > > > > > On Mon, 10 Apr 2017, hrg wrote: +> +> > > > > > > On Sun, Apr 9, 2017 at 11:55 PM, hrg <address@hidden> +> +> > > > > > > wrote: +> +> > > > > > > > On Sun, Apr 9, 2017 at 11:52 PM, hrg <address@hidden> +> +> > > > > > > > wrote: +> +> > > > > > > > > Hi, +> +> > > > > > > > > +> +> > > > > > > > > In xen_map_cache_unlocked(), map to guest memory maybe in +> +> > > > > > > > > entry->next +> +> > > > > > > > > instead of first level entry (if map to rom other than guest +> +> > > > > > > > > memory +> +> > > > > > > > > comes first), while in xen_invalidate_map_cache(), when VM +> +> > > > > > > > > ballooned +> +> > > > > > > > > out memory, qemu did not invalidate cache entries in linked +> +> > > > > > > > > list(entry->next), so when VM balloon back in memory, gfns +> +> > > > > > > > > probably +> +> > > > > > > > > mapped to different mfns, thus if guest asks device to DMA +> +> > > > > > > > > to +> +> > > > > > > > > these +> +> > > > > > > > > GPA, qemu may DMA to stale MFNs. +> +> > > > > > > > > +> +> > > > > > > > > So I think in xen_invalidate_map_cache() linked lists should +> +> > > > > > > > > also be +> +> > > > > > > > > checked and invalidated. +> +> > > > > > > > > +> +> > > > > > > > > Whatâs your opinion? Is this a bug? Is my analyze correct? +> +> > > > > > Yes, you are right. We need to go through the list for each +> +> > > > > > element of +> +> > > > > > the array in xen_invalidate_map_cache. Can you come up with a +> +> > > > > > patch? +> +> > > > > I spoke too soon. In the regular case there should be no locked +> +> > > > > mappings +> +> > > > > when xen_invalidate_map_cache is called (see the DPRINTF warning at +> +> > > > > the +> +> > > > > beginning of the functions). Without locked mappings, there should +> +> > > > > never +> +> > > > > be more than one element in each list (see xen_map_cache_unlocked: +> +> > > > > entry->lock == true is a necessary condition to append a new entry +> +> > > > > to +> +> > > > > the list, otherwise it is just remapped). +> +> > > > > +> +> > > > > Can you confirm that what you are seeing are locked mappings +> +> > > > > when xen_invalidate_map_cache is called? To find out, enable the +> +> > > > > DPRINTK +> +> > > > > by turning it into a printf or by defininig MAPCACHE_DEBUG. +> +> > > > In fact, I think the DPRINTF above is incorrect too. In +> +> > > > pci_add_option_rom(), rtl8139 rom is locked mapped in +> +> > > > pci_add_option_rom->memory_region_get_ram_ptr (after +> +> > > > memory_region_init_ram). So actually I think we should remove the +> +> > > > DPRINTF warning as it is normal. +> +> > > Let me explain why the DPRINTF warning is there: emulated dma operations +> +> > > can involve locked mappings. Once a dma operation completes, the related +> +> > > mapping is unlocked and can be safely destroyed. But if we destroy a +> +> > > locked mapping in xen_invalidate_map_cache, while a dma is still +> +> > > ongoing, QEMU will crash. We cannot handle that case. +> +> > > +> +> > > However, the scenario you described is different. It has nothing to do +> +> > > with DMA. It looks like pci_add_option_rom calls +> +> > > memory_region_get_ram_ptr to map the rtl8139 rom. The mapping is a +> +> > > locked mapping and it is never unlocked or destroyed. +> +> > > +> +> > > It looks like "ptr" is not used after pci_add_option_rom returns. Does +> +> > > the append patch fix the problem you are seeing? For the proper fix, I +> +> > > think we probably need some sort of memory_region_unmap wrapper or maybe +> +> > > a call to address_space_unmap. +> +> > +> +> > Yes, I think so, maybe this is the proper way to fix this. +> +> +> +> Would you be up for sending a proper patch and testing it? We cannot call +> +> xen_invalidate_map_cache_entry directly from pci.c though, it would need +> +> to be one of the other functions like address_space_unmap for example. +> +> +> +> +> +Yes, I will look into this. +Any updates? + + +> +> > > diff --git a/hw/pci/pci.c b/hw/pci/pci.c +> +> > > index e6b08e1..04f98b7 100644 +> +> > > --- a/hw/pci/pci.c +> +> > > +++ b/hw/pci/pci.c +> +> > > @@ -2242,6 +2242,7 @@ static void pci_add_option_rom(PCIDevice *pdev, +> +> > > bool +> +> > > is_default_rom, +> +> > > } +> +> > > pci_register_bar(pdev, PCI_ROM_SLOT, 0, &pdev->rom); +> +> > > + xen_invalidate_map_cache_entry(ptr); +> +> > > } +> +> > > static void pci_del_option_rom(PCIDevice *pdev) +> + diff --git a/results/classifier/016/debug/56937788 b/results/classifier/016/debug/56937788 new file mode 100644 index 00000000..06116b76 --- /dev/null +++ b/results/classifier/016/debug/56937788 @@ -0,0 +1,371 @@ +debug: 0.971 +virtual: 0.939 +hypervisor: 0.419 +x86: 0.253 +files: 0.082 +PID: 0.062 +operating system: 0.049 +device: 0.048 +i386: 0.031 +TCG: 0.023 +user-level: 0.016 +assembly: 0.015 +kernel: 0.013 +register: 0.011 +peripherals: 0.008 +ppc: 0.007 +arm: 0.006 +VMM: 0.006 +socket: 0.006 +performance: 0.005 +architecture: 0.005 +semantic: 0.004 +boot: 0.003 +network: 0.003 +alpha: 0.003 +KVM: 0.002 +risc-v: 0.001 +graphic: 0.001 +permissions: 0.001 +vnc: 0.001 +mistranslation: 0.000 + +[Qemu-devel] [Bug] virtio-blk: qemu will crash if hotplug virtio-blk device failed + +I found that hotplug virtio-blk device will lead to qemu crash. + +Re-production steps: + +1. Run VM named vm001 + +2. Create a virtio-blk.xml which contains wrong configurations: +<disk device="lun" rawio="yes" type="block"> + <driver cache="none" io="native" name="qemu" type="raw" /> + <source dev="/dev/mapper/11-dm" /> + <target bus="virtio" dev="vdx" /> +</disk> + +3. Run command : virsh attach-device vm001 vm001 + +Libvirt will return err msg: + +error: Failed to attach device from blk-scsi.xml + +error: internal error: unable to execute QEMU command 'device_add': Please set +scsi=off for virtio-blk devices in order to use virtio 1.0 + +it means hotplug virtio-blk device failed. + +4. Suspend or shutdown VM will leads to qemu crash + + + +from gdb: + + +(gdb) bt +#0 object_get_class (address@hidden) at qom/object.c:750 +#1 0x00007f9a72582e01 in virtio_vmstate_change (opaque=0x7f9a73d10960, +running=0, state=<optimized out>) at +/mnt/sdb/lzc/code/open/qemu/hw/virtio/virtio.c:2203 +#2 0x00007f9a7261ef52 in vm_state_notify (address@hidden, address@hidden) at +vl.c:1685 +#3 0x00007f9a7252603a in do_vm_stop (state=RUN_STATE_PAUSED) at +/mnt/sdb/lzc/code/open/qemu/cpus.c:941 +#4 vm_stop (address@hidden) at /mnt/sdb/lzc/code/open/qemu/cpus.c:1807 +#5 0x00007f9a7262eb1b in qmp_stop (address@hidden) at qmp.c:102 +#6 0x00007f9a7262c70a in qmp_marshal_stop (args=<optimized out>, +ret=<optimized out>, errp=0x7ffe63e255d8) at qmp-marshal.c:5854 +#7 0x00007f9a72897e79 in do_qmp_dispatch (errp=0x7ffe63e255d0, +request=0x7f9a76510120, cmds=0x7f9a72ee7980 <qmp_commands>) at +qapi/qmp-dispatch.c:104 +#8 qmp_dispatch (cmds=0x7f9a72ee7980 <qmp_commands>, address@hidden) at +qapi/qmp-dispatch.c:131 +#9 0x00007f9a725288d5 in handle_qmp_command (parser=<optimized out>, +tokens=<optimized out>) at /mnt/sdb/lzc/code/open/qemu/monitor.c:3852 +#10 0x00007f9a7289d514 in json_message_process_token (lexer=0x7f9a73ce4498, +input=0x7f9a73cc6880, type=JSON_RCURLY, x=36, y=17) at +qobject/json-streamer.c:105 +#11 0x00007f9a728bb69b in json_lexer_feed_char (address@hidden, ch=125 '}', +address@hidden) at qobject/json-lexer.c:323 +#12 0x00007f9a728bb75e in json_lexer_feed (lexer=0x7f9a73ce4498, +buffer=<optimized out>, size=<optimized out>) at qobject/json-lexer.c:373 +#13 0x00007f9a7289d5d9 in json_message_parser_feed (parser=<optimized out>, +buffer=<optimized out>, size=<optimized out>) at qobject/json-streamer.c:124 +#14 0x00007f9a7252722e in monitor_qmp_read (opaque=<optimized out>, +buf=<optimized out>, size=<optimized out>) at +/mnt/sdb/lzc/code/open/qemu/monitor.c:3894 +#15 0x00007f9a7284ee1b in tcp_chr_read (chan=<optimized out>, cond=<optimized +out>, opaque=<optimized out>) at chardev/char-socket.c:441 +#16 0x00007f9a6e03e99a in g_main_context_dispatch () from +/usr/lib64/libglib-2.0.so.0 +#17 0x00007f9a728a342c in glib_pollfds_poll () at util/main-loop.c:214 +#18 os_host_main_loop_wait (timeout=<optimized out>) at util/main-loop.c:261 +#19 main_loop_wait (address@hidden) at util/main-loop.c:515 +#20 0x00007f9a724e7547 in main_loop () at vl.c:1999 +#21 main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at +vl.c:4877 + +Problem happens in virtio_vmstate_change which is called by vm_state_notify, +static void virtio_vmstate_change(void *opaque, int running, RunState state) +{ + VirtIODevice *vdev = opaque; + BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK); + vdev->vm_running = running; + + if (backend_run) { + virtio_set_status(vdev, vdev->status); + } + + if (k->vmstate_change) { + k->vmstate_change(qbus->parent, backend_run); + } + + if (!backend_run) { + virtio_set_status(vdev, vdev->status); + } +} + +Vdev's parent_bus is NULL, so qdev_get_parent_bus(DEVICE(vdev)) will crash. +virtio_vmstate_change is added to the list vm_change_state_head at +virtio_blk_device_realize(virtio_init), +but after hotplug virtio-blk failed, virtio_vmstate_change will not be removed +from vm_change_state_head. + + +I apply a patch as follews: + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 5884ce3..ea532dc 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2491,6 +2491,7 @@ static void virtio_device_realize(DeviceState *dev, Error +**errp) + virtio_bus_device_plugged(vdev, &err); + if (err != NULL) { + error_propagate(errp, err); ++ vdc->unrealize(dev, NULL); + return; + } + +On Tue, Oct 31, 2017 at 05:19:08AM +0000, linzhecheng wrote: +> +I found that hotplug virtio-blk device will lead to qemu crash. +The author posted a patch in a separate email thread. Please see +"[PATCH] fix: unrealize virtio device if we fail to hotplug it". + +> +Re-production steps: +> +> +1. Run VM named vm001 +> +> +2. Create a virtio-blk.xml which contains wrong configurations: +> +<disk device="lun" rawio="yes" type="block"> +> +<driver cache="none" io="native" name="qemu" type="raw" /> +> +<source dev="/dev/mapper/11-dm" /> +> +<target bus="virtio" dev="vdx" /> +> +</disk> +> +> +3. Run command : virsh attach-device vm001 vm001 +> +> +Libvirt will return err msg: +> +> +error: Failed to attach device from blk-scsi.xml +> +> +error: internal error: unable to execute QEMU command 'device_add': Please +> +set scsi=off for virtio-blk devices in order to use virtio 1.0 +> +> +it means hotplug virtio-blk device failed. +> +> +4. Suspend or shutdown VM will leads to qemu crash +> +> +> +> +from gdb: +> +> +> +(gdb) bt +> +#0 object_get_class (address@hidden) at qom/object.c:750 +> +#1 0x00007f9a72582e01 in virtio_vmstate_change (opaque=0x7f9a73d10960, +> +running=0, state=<optimized out>) at +> +/mnt/sdb/lzc/code/open/qemu/hw/virtio/virtio.c:2203 +> +#2 0x00007f9a7261ef52 in vm_state_notify (address@hidden, address@hidden) at +> +vl.c:1685 +> +#3 0x00007f9a7252603a in do_vm_stop (state=RUN_STATE_PAUSED) at +> +/mnt/sdb/lzc/code/open/qemu/cpus.c:941 +> +#4 vm_stop (address@hidden) at /mnt/sdb/lzc/code/open/qemu/cpus.c:1807 +> +#5 0x00007f9a7262eb1b in qmp_stop (address@hidden) at qmp.c:102 +> +#6 0x00007f9a7262c70a in qmp_marshal_stop (args=<optimized out>, +> +ret=<optimized out>, errp=0x7ffe63e255d8) at qmp-marshal.c:5854 +> +#7 0x00007f9a72897e79 in do_qmp_dispatch (errp=0x7ffe63e255d0, +> +request=0x7f9a76510120, cmds=0x7f9a72ee7980 <qmp_commands>) at +> +qapi/qmp-dispatch.c:104 +> +#8 qmp_dispatch (cmds=0x7f9a72ee7980 <qmp_commands>, address@hidden) at +> +qapi/qmp-dispatch.c:131 +> +#9 0x00007f9a725288d5 in handle_qmp_command (parser=<optimized out>, +> +tokens=<optimized out>) at /mnt/sdb/lzc/code/open/qemu/monitor.c:3852 +> +#10 0x00007f9a7289d514 in json_message_process_token (lexer=0x7f9a73ce4498, +> +input=0x7f9a73cc6880, type=JSON_RCURLY, x=36, y=17) at +> +qobject/json-streamer.c:105 +> +#11 0x00007f9a728bb69b in json_lexer_feed_char (address@hidden, ch=125 '}', +> +address@hidden) at qobject/json-lexer.c:323 +> +#12 0x00007f9a728bb75e in json_lexer_feed (lexer=0x7f9a73ce4498, +> +buffer=<optimized out>, size=<optimized out>) at qobject/json-lexer.c:373 +> +#13 0x00007f9a7289d5d9 in json_message_parser_feed (parser=<optimized out>, +> +buffer=<optimized out>, size=<optimized out>) at qobject/json-streamer.c:124 +> +#14 0x00007f9a7252722e in monitor_qmp_read (opaque=<optimized out>, +> +buf=<optimized out>, size=<optimized out>) at +> +/mnt/sdb/lzc/code/open/qemu/monitor.c:3894 +> +#15 0x00007f9a7284ee1b in tcp_chr_read (chan=<optimized out>, cond=<optimized +> +out>, opaque=<optimized out>) at chardev/char-socket.c:441 +> +#16 0x00007f9a6e03e99a in g_main_context_dispatch () from +> +/usr/lib64/libglib-2.0.so.0 +> +#17 0x00007f9a728a342c in glib_pollfds_poll () at util/main-loop.c:214 +> +#18 os_host_main_loop_wait (timeout=<optimized out>) at util/main-loop.c:261 +> +#19 main_loop_wait (address@hidden) at util/main-loop.c:515 +> +#20 0x00007f9a724e7547 in main_loop () at vl.c:1999 +> +#21 main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) +> +at vl.c:4877 +> +> +Problem happens in virtio_vmstate_change which is called by vm_state_notify, +> +static void virtio_vmstate_change(void *opaque, int running, RunState state) +> +{ +> +VirtIODevice *vdev = opaque; +> +BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); +> +VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); +> +bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK); +> +vdev->vm_running = running; +> +> +if (backend_run) { +> +virtio_set_status(vdev, vdev->status); +> +} +> +> +if (k->vmstate_change) { +> +k->vmstate_change(qbus->parent, backend_run); +> +} +> +> +if (!backend_run) { +> +virtio_set_status(vdev, vdev->status); +> +} +> +} +> +> +Vdev's parent_bus is NULL, so qdev_get_parent_bus(DEVICE(vdev)) will crash. +> +virtio_vmstate_change is added to the list vm_change_state_head at +> +virtio_blk_device_realize(virtio_init), +> +but after hotplug virtio-blk failed, virtio_vmstate_change will not be +> +removed from vm_change_state_head. +> +> +> +I apply a patch as follews: +> +> +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +> +index 5884ce3..ea532dc 100644 +> +--- a/hw/virtio/virtio.c +> ++++ b/hw/virtio/virtio.c +> +@@ -2491,6 +2491,7 @@ static void virtio_device_realize(DeviceState *dev, +> +Error **errp) +> +virtio_bus_device_plugged(vdev, &err); +> +if (err != NULL) { +> +error_propagate(errp, err); +> ++ vdc->unrealize(dev, NULL); +> +return; +> +} +signature.asc +Description: +PGP signature + diff --git a/results/classifier/016/debug/63565653 b/results/classifier/016/debug/63565653 new file mode 100644 index 00000000..9d4d0fd4 --- /dev/null +++ b/results/classifier/016/debug/63565653 @@ -0,0 +1,76 @@ +debug: 0.927 +virtual: 0.903 +x86: 0.153 +hypervisor: 0.100 +files: 0.064 +PID: 0.045 +kernel: 0.029 +TCG: 0.026 +assembly: 0.025 +performance: 0.024 +register: 0.019 +operating system: 0.017 +VMM: 0.015 +device: 0.012 +i386: 0.011 +architecture: 0.007 +user-level: 0.007 +boot: 0.007 +network: 0.006 +ppc: 0.004 +arm: 0.003 +semantic: 0.003 +peripherals: 0.002 +vnc: 0.002 +graphic: 0.002 +KVM: 0.001 +socket: 0.001 +alpha: 0.001 +permissions: 0.001 +risc-v: 0.001 +mistranslation: 0.001 + +[Qemu-devel] [BUG]pcibus_reset assertion failure on guest reboot + +Qemu-2.6.2 + +Start a vm with vhost-net , do reboot and hot-unplug viritio-net nic in short +time, we touch +pcibus_reset assertion failure. + +Here is qemu log: +22:29:46.359386+08:00 acpi_pm1_cnt_write -> guest do soft power off +22:29:46.785310+08:00 qemu_devices_reset +22:29:46.788093+08:00 virtio_pci_device_unplugged -> virtio net unpluged +22:29:46.803427+08:00 pcibus_reset: Assertion `bus->irq_count[i] == 0' failed. + +Here is stack info: +(gdb) bt +#0 0x00007f9a336795d7 in raise () from /usr/lib64/libc.so.6 +#1 0x00007f9a3367acc8 in abort () from /usr/lib64/libc.so.6 +#2 0x00007f9a33672546 in __assert_fail_base () from /usr/lib64/libc.so.6 +#3 0x00007f9a336725f2 in __assert_fail () from /usr/lib64/libc.so.6 +#4 0x0000000000641884 in pcibus_reset (qbus=0x29eee60) at hw/pci/pci.c:283 +#5 0x00000000005bfc30 in qbus_reset_one (bus=0x29eee60, opaque=<optimized +out>) at hw/core/qdev.c:319 +#6 0x00000000005c1b19 in qdev_walk_children (dev=0x29ed2b0, pre_devfn=0x0, +pre_busfn=0x0, post_devfn=0x5c2440 ... +#7 0x00000000005c1c59 in qbus_walk_children (bus=0x2736f80, pre_devfn=0x0, +pre_busfn=0x0, post_devfn=0x5c2440 ... +#8 0x00000000005513f5 in qemu_devices_reset () at vl.c:1998 +#9 0x00000000004cab9d in pc_machine_reset () at +/home/abuild/rpmbuild/BUILD/qemu-kvm-2.6.0/hw/i386/pc.c:1976 +#10 0x000000000055148b in qemu_system_reset (address@hidden) at vl.c:2011 +#11 0x000000000055164f in main_loop_should_exit () at vl.c:2169 +#12 0x0000000000551719 in main_loop () at vl.c:2212 +#13 0x000000000041c9a8 in main (argc=<optimized out>, argv=<optimized out>, +envp=<optimized out>) at vl.c:5130 +(gdb) f 4 +... +(gdb) p bus->irq_count[0] +$6 = 1 + +Seems pci_update_irq_disabled doesn't work well + +can anyone help? + diff --git a/results/classifier/016/debug/64322995 b/results/classifier/016/debug/64322995 new file mode 100644 index 00000000..08fd09d7 --- /dev/null +++ b/results/classifier/016/debug/64322995 @@ -0,0 +1,81 @@ +debug: 0.886 +performance: 0.705 +boot: 0.569 +x86: 0.245 +PID: 0.125 +i386: 0.114 +arm: 0.083 +virtual: 0.072 +files: 0.067 +TCG: 0.066 +operating system: 0.064 +device: 0.053 +alpha: 0.048 +register: 0.046 +hypervisor: 0.040 +user-level: 0.035 +ppc: 0.032 +socket: 0.031 +network: 0.031 +VMM: 0.030 +vnc: 0.029 +risc-v: 0.023 +assembly: 0.022 +peripherals: 0.021 +semantic: 0.013 +architecture: 0.011 +kernel: 0.010 +graphic: 0.008 +permissions: 0.004 +mistranslation: 0.002 +KVM: 0.001 + +[Qemu-devel] [BUG] trace: QEMU hangs on initialization with the "simple" backend + +While starting the softmmu version of QEMU, the simple backend waits for the +writeout thread to signal a condition variable when initializing the output file +path. But since the writeout thread has not been created, it just waits forever. + +Thanks, + Lluis + +On Tue, Feb 09, 2016 at 09:24:04PM +0100, LluÃs Vilanova wrote: +> +While starting the softmmu version of QEMU, the simple backend waits for the +> +writeout thread to signal a condition variable when initializing the output +> +file +> +path. But since the writeout thread has not been created, it just waits +> +forever. +Denis Lunev posted a fix: +https://patchwork.ozlabs.org/patch/580968/ +Stefan +signature.asc +Description: +PGP signature + +Stefan Hajnoczi writes: + +> +On Tue, Feb 09, 2016 at 09:24:04PM +0100, LluÃs Vilanova wrote: +> +> While starting the softmmu version of QEMU, the simple backend waits for the +> +> writeout thread to signal a condition variable when initializing the output +> +> file +> +> path. But since the writeout thread has not been created, it just waits +> +> forever. +> +Denis Lunev posted a fix: +> +https://patchwork.ozlabs.org/patch/580968/ +Great, thanks. + +Lluis + diff --git a/results/classifier/016/debug/68897003 b/results/classifier/016/debug/68897003 new file mode 100644 index 00000000..c3492f86 --- /dev/null +++ b/results/classifier/016/debug/68897003 @@ -0,0 +1,743 @@ +debug: 0.863 +x86: 0.733 +virtual: 0.289 +PID: 0.235 +TCG: 0.131 +files: 0.082 +operating system: 0.079 +VMM: 0.063 +performance: 0.062 +i386: 0.055 +arm: 0.054 +device: 0.047 +ppc: 0.044 +hypervisor: 0.039 +register: 0.030 +socket: 0.020 +assembly: 0.019 +network: 0.017 +risc-v: 0.015 +kernel: 0.014 +boot: 0.013 +peripherals: 0.012 +vnc: 0.012 +architecture: 0.011 +semantic: 0.011 +user-level: 0.006 +alpha: 0.004 +permissions: 0.003 +graphic: 0.002 +KVM: 0.001 +mistranslation: 0.001 + +[Qemu-devel] [BUG] VM abort after migration + +Hi guys, + +We found a qemu core in our testing environment, the assertion +'assert(bus->irq_count[i] == 0)' in pcibus_reset() was triggered and +the bus->irq_count[i] is '-1'. + +Through analysis, it was happened after VM migration and we think +it was caused by the following sequence: + +*Migration Source* +1. save bus pci.0 state, including irq_count[x] ( =0 , old ) +2. save E1000: + e1000_pre_save + e1000_mit_timer + set_interrupt_cause + pci_set_irq --> update pci_dev->irq_state to 1 and + update bus->irq_count[x] to 1 ( new ) + the irq_state sent to dest. + +*Migration Dest* +1. Receive the irq_count[x] of pci.0 is 0 , but the irq_state of e1000 is 1. +2. If the e1000 need change irqline , it would call to pci_irq_handler(), + the irq_state maybe change to 0 and bus->irq_count[x] will become + -1 in this situation. +3. do VM reboot then the assertion will be triggered. + +We also found some guys faced the similar problem: +[1] +https://lists.gnu.org/archive/html/qemu-devel/2016-11/msg02525.html +[2] +https://bugs.launchpad.net/qemu/+bug/1702621 +Is there some patches to fix this problem ? +Can we save pcibus state after all the pci devs are saved ? + +Thanks, +Longpeng(Mike) + +* longpeng (address@hidden) wrote: +> +Hi guys, +> +> +We found a qemu core in our testing environment, the assertion +> +'assert(bus->irq_count[i] == 0)' in pcibus_reset() was triggered and +> +the bus->irq_count[i] is '-1'. +> +> +Through analysis, it was happened after VM migration and we think +> +it was caused by the following sequence: +> +> +*Migration Source* +> +1. save bus pci.0 state, including irq_count[x] ( =0 , old ) +> +2. save E1000: +> +e1000_pre_save +> +e1000_mit_timer +> +set_interrupt_cause +> +pci_set_irq --> update pci_dev->irq_state to 1 and +> +update bus->irq_count[x] to 1 ( new ) +> +the irq_state sent to dest. +> +> +*Migration Dest* +> +1. Receive the irq_count[x] of pci.0 is 0 , but the irq_state of e1000 is 1. +> +2. If the e1000 need change irqline , it would call to pci_irq_handler(), +> +the irq_state maybe change to 0 and bus->irq_count[x] will become +> +-1 in this situation. +> +3. do VM reboot then the assertion will be triggered. +> +> +We also found some guys faced the similar problem: +> +[1] +https://lists.gnu.org/archive/html/qemu-devel/2016-11/msg02525.html +> +[2] +https://bugs.launchpad.net/qemu/+bug/1702621 +> +> +Is there some patches to fix this problem ? +I don't remember any. + +> +Can we save pcibus state after all the pci devs are saved ? +Does this problem only happen with e1000? I think so. +If it's only e1000 I think we should fix it - I think once the VM is +stopped for doing the device migration it shouldn't be raising +interrupts. + +Dave + +> +Thanks, +> +Longpeng(Mike) +-- +Dr. David Alan Gilbert / address@hidden / Manchester, UK + +On 2019/7/8 ä¸å5:47, Dr. David Alan Gilbert wrote: +* longpeng (address@hidden) wrote: +Hi guys, + +We found a qemu core in our testing environment, the assertion +'assert(bus->irq_count[i] == 0)' in pcibus_reset() was triggered and +the bus->irq_count[i] is '-1'. + +Through analysis, it was happened after VM migration and we think +it was caused by the following sequence: + +*Migration Source* +1. save bus pci.0 state, including irq_count[x] ( =0 , old ) +2. save E1000: + e1000_pre_save + e1000_mit_timer + set_interrupt_cause + pci_set_irq --> update pci_dev->irq_state to 1 and + update bus->irq_count[x] to 1 ( new ) + the irq_state sent to dest. + +*Migration Dest* +1. Receive the irq_count[x] of pci.0 is 0 , but the irq_state of e1000 is 1. +2. If the e1000 need change irqline , it would call to pci_irq_handler(), + the irq_state maybe change to 0 and bus->irq_count[x] will become + -1 in this situation. +3. do VM reboot then the assertion will be triggered. + +We also found some guys faced the similar problem: +[1] +https://lists.gnu.org/archive/html/qemu-devel/2016-11/msg02525.html +[2] +https://bugs.launchpad.net/qemu/+bug/1702621 +Is there some patches to fix this problem ? +I don't remember any. +Can we save pcibus state after all the pci devs are saved ? +Does this problem only happen with e1000? I think so. +If it's only e1000 I think we should fix it - I think once the VM is +stopped for doing the device migration it shouldn't be raising +interrupts. +I wonder maybe we can simply fix this by no setting ICS on pre_save() +but scheduling mit timer unconditionally in post_load(). +Thanks +Dave +Thanks, +Longpeng(Mike) +-- +Dr. David Alan Gilbert / address@hidden / Manchester, UK + +å¨ 2019/7/10 11:25, Jason Wang åé: +> +> +On 2019/7/8 ä¸å5:47, Dr. David Alan Gilbert wrote: +> +> * longpeng (address@hidden) wrote: +> +>> Hi guys, +> +>> +> +>> We found a qemu core in our testing environment, the assertion +> +>> 'assert(bus->irq_count[i] == 0)' in pcibus_reset() was triggered and +> +>> the bus->irq_count[i] is '-1'. +> +>> +> +>> Through analysis, it was happened after VM migration and we think +> +>> it was caused by the following sequence: +> +>> +> +>> *Migration Source* +> +>> 1. save bus pci.0 state, including irq_count[x] ( =0 , old ) +> +>> 2. save E1000: +> +>>    e1000_pre_save +> +>>     e1000_mit_timer +> +>>      set_interrupt_cause +> +>>       pci_set_irq --> update pci_dev->irq_state to 1 and +> +>>                   update bus->irq_count[x] to 1 ( new ) +> +>>     the irq_state sent to dest. +> +>> +> +>> *Migration Dest* +> +>> 1. Receive the irq_count[x] of pci.0 is 0 , but the irq_state of e1000 is 1. +> +>> 2. If the e1000 need change irqline , it would call to pci_irq_handler(), +> +>>   the irq_state maybe change to 0 and bus->irq_count[x] will become +> +>>   -1 in this situation. +> +>> 3. do VM reboot then the assertion will be triggered. +> +>> +> +>> We also found some guys faced the similar problem: +> +>> [1] +https://lists.gnu.org/archive/html/qemu-devel/2016-11/msg02525.html +> +>> [2] +https://bugs.launchpad.net/qemu/+bug/1702621 +> +>> +> +>> Is there some patches to fix this problem ? +> +> I don't remember any. +> +> +> +>> Can we save pcibus state after all the pci devs are saved ? +> +> Does this problem only happen with e1000? I think so. +> +> If it's only e1000 I think we should fix it - I think once the VM is +> +> stopped for doing the device migration it shouldn't be raising +> +> interrupts. +> +> +> +I wonder maybe we can simply fix this by no setting ICS on pre_save() but +> +scheduling mit timer unconditionally in post_load(). +> +I also think this is a bug of e1000 because we find more cores with the same +frame thease days. + +I'm not familiar with e1000 so hope someone could fix it, thanks. :) + +> +Thanks +> +> +> +> +> +> Dave +> +> +> +>> Thanks, +> +>> Longpeng(Mike) +> +> -- +> +> Dr. David Alan Gilbert / address@hidden / Manchester, UK +> +> +. +> +-- +Regards, +Longpeng(Mike) + +On 2019/7/10 ä¸å11:36, Longpeng (Mike) wrote: +å¨ 2019/7/10 11:25, Jason Wang åé: +On 2019/7/8 ä¸å5:47, Dr. David Alan Gilbert wrote: +* longpeng (address@hidden) wrote: +Hi guys, + +We found a qemu core in our testing environment, the assertion +'assert(bus->irq_count[i] == 0)' in pcibus_reset() was triggered and +the bus->irq_count[i] is '-1'. + +Through analysis, it was happened after VM migration and we think +it was caused by the following sequence: + +*Migration Source* +1. save bus pci.0 state, including irq_count[x] ( =0 , old ) +2. save E1000: +    e1000_pre_save +     e1000_mit_timer +      set_interrupt_cause +       pci_set_irq --> update pci_dev->irq_state to 1 and +                   update bus->irq_count[x] to 1 ( new ) +     the irq_state sent to dest. + +*Migration Dest* +1. Receive the irq_count[x] of pci.0 is 0 , but the irq_state of e1000 is 1. +2. If the e1000 need change irqline , it would call to pci_irq_handler(), +   the irq_state maybe change to 0 and bus->irq_count[x] will become +   -1 in this situation. +3. do VM reboot then the assertion will be triggered. + +We also found some guys faced the similar problem: +[1] +https://lists.gnu.org/archive/html/qemu-devel/2016-11/msg02525.html +[2] +https://bugs.launchpad.net/qemu/+bug/1702621 +Is there some patches to fix this problem ? +I don't remember any. +Can we save pcibus state after all the pci devs are saved ? +Does this problem only happen with e1000? I think so. +If it's only e1000 I think we should fix it - I think once the VM is +stopped for doing the device migration it shouldn't be raising +interrupts. +I wonder maybe we can simply fix this by no setting ICS on pre_save() but +scheduling mit timer unconditionally in post_load(). +I also think this is a bug of e1000 because we find more cores with the same +frame thease days. + +I'm not familiar with e1000 so hope someone could fix it, thanks. :) +Draft a path in attachment, please test. + +Thanks +Thanks +Dave +Thanks, +Longpeng(Mike) +-- +Dr. David Alan Gilbert / address@hidden / Manchester, UK +. +0001-e1000-don-t-raise-interrupt-in-pre_save.patch +Description: +Text Data + +å¨ 2019/7/10 11:57, Jason Wang åé: +> +> +On 2019/7/10 ä¸å11:36, Longpeng (Mike) wrote: +> +> å¨ 2019/7/10 11:25, Jason Wang åé: +> +>> On 2019/7/8 ä¸å5:47, Dr. David Alan Gilbert wrote: +> +>>> * longpeng (address@hidden) wrote: +> +>>>> Hi guys, +> +>>>> +> +>>>> We found a qemu core in our testing environment, the assertion +> +>>>> 'assert(bus->irq_count[i] == 0)' in pcibus_reset() was triggered and +> +>>>> the bus->irq_count[i] is '-1'. +> +>>>> +> +>>>> Through analysis, it was happened after VM migration and we think +> +>>>> it was caused by the following sequence: +> +>>>> +> +>>>> *Migration Source* +> +>>>> 1. save bus pci.0 state, including irq_count[x] ( =0 , old ) +> +>>>> 2. save E1000: +> +>>>>     e1000_pre_save +> +>>>>      e1000_mit_timer +> +>>>>       set_interrupt_cause +> +>>>>        pci_set_irq --> update pci_dev->irq_state to 1 and +> +>>>>                    update bus->irq_count[x] to 1 ( new ) +> +>>>>      the irq_state sent to dest. +> +>>>> +> +>>>> *Migration Dest* +> +>>>> 1. Receive the irq_count[x] of pci.0 is 0 , but the irq_state of e1000 is +> +>>>> 1. +> +>>>> 2. If the e1000 need change irqline , it would call to pci_irq_handler(), +> +>>>>    the irq_state maybe change to 0 and bus->irq_count[x] will become +> +>>>>    -1 in this situation. +> +>>>> 3. do VM reboot then the assertion will be triggered. +> +>>>> +> +>>>> We also found some guys faced the similar problem: +> +>>>> [1] +https://lists.gnu.org/archive/html/qemu-devel/2016-11/msg02525.html +> +>>>> [2] +https://bugs.launchpad.net/qemu/+bug/1702621 +> +>>>> +> +>>>> Is there some patches to fix this problem ? +> +>>> I don't remember any. +> +>>> +> +>>>> Can we save pcibus state after all the pci devs are saved ? +> +>>> Does this problem only happen with e1000? I think so. +> +>>> If it's only e1000 I think we should fix it - I think once the VM is +> +>>> stopped for doing the device migration it shouldn't be raising +> +>>> interrupts. +> +>> +> +>> I wonder maybe we can simply fix this by no setting ICS on pre_save() but +> +>> scheduling mit timer unconditionally in post_load(). +> +>> +> +> I also think this is a bug of e1000 because we find more cores with the same +> +> frame thease days. +> +> +> +> I'm not familiar with e1000 so hope someone could fix it, thanks. :) +> +> +> +> +Draft a path in attachment, please test. +> +Thanks. We'll test it for a few weeks and then give you the feedback. :) + +> +Thanks +> +> +> +>> Thanks +> +>> +> +>> +> +>>> Dave +> +>>> +> +>>>> Thanks, +> +>>>> Longpeng(Mike) +> +>>> -- +> +>>> Dr. David Alan Gilbert / address@hidden / Manchester, UK +> +>> . +> +>> +-- +Regards, +Longpeng(Mike) + +å¨ 2019/7/10 11:57, Jason Wang åé: +> +> +On 2019/7/10 ä¸å11:36, Longpeng (Mike) wrote: +> +> å¨ 2019/7/10 11:25, Jason Wang åé: +> +>> On 2019/7/8 ä¸å5:47, Dr. David Alan Gilbert wrote: +> +>>> * longpeng (address@hidden) wrote: +> +>>>> Hi guys, +> +>>>> +> +>>>> We found a qemu core in our testing environment, the assertion +> +>>>> 'assert(bus->irq_count[i] == 0)' in pcibus_reset() was triggered and +> +>>>> the bus->irq_count[i] is '-1'. +> +>>>> +> +>>>> Through analysis, it was happened after VM migration and we think +> +>>>> it was caused by the following sequence: +> +>>>> +> +>>>> *Migration Source* +> +>>>> 1. save bus pci.0 state, including irq_count[x] ( =0 , old ) +> +>>>> 2. save E1000: +> +>>>>     e1000_pre_save +> +>>>>      e1000_mit_timer +> +>>>>       set_interrupt_cause +> +>>>>        pci_set_irq --> update pci_dev->irq_state to 1 and +> +>>>>                    update bus->irq_count[x] to 1 ( new ) +> +>>>>      the irq_state sent to dest. +> +>>>> +> +>>>> *Migration Dest* +> +>>>> 1. Receive the irq_count[x] of pci.0 is 0 , but the irq_state of e1000 is +> +>>>> 1. +> +>>>> 2. If the e1000 need change irqline , it would call to pci_irq_handler(), +> +>>>>    the irq_state maybe change to 0 and bus->irq_count[x] will become +> +>>>>    -1 in this situation. +> +>>>> 3. do VM reboot then the assertion will be triggered. +> +>>>> +> +>>>> We also found some guys faced the similar problem: +> +>>>> [1] +https://lists.gnu.org/archive/html/qemu-devel/2016-11/msg02525.html +> +>>>> [2] +https://bugs.launchpad.net/qemu/+bug/1702621 +> +>>>> +> +>>>> Is there some patches to fix this problem ? +> +>>> I don't remember any. +> +>>> +> +>>>> Can we save pcibus state after all the pci devs are saved ? +> +>>> Does this problem only happen with e1000? I think so. +> +>>> If it's only e1000 I think we should fix it - I think once the VM is +> +>>> stopped for doing the device migration it shouldn't be raising +> +>>> interrupts. +> +>> +> +>> I wonder maybe we can simply fix this by no setting ICS on pre_save() but +> +>> scheduling mit timer unconditionally in post_load(). +> +>> +> +> I also think this is a bug of e1000 because we find more cores with the same +> +> frame thease days. +> +> +> +> I'm not familiar with e1000 so hope someone could fix it, thanks. :) +> +> +> +> +Draft a path in attachment, please test. +> +Hi Jason, + +We've tested the patch for about two weeks, everything went well, thanks! + +Feel free to add my: +Reported-and-tested-by: Longpeng <address@hidden> + +> +Thanks +> +> +> +>> Thanks +> +>> +> +>> +> +>>> Dave +> +>>> +> +>>>> Thanks, +> +>>>> Longpeng(Mike) +> +>>> -- +> +>>> Dr. David Alan Gilbert / address@hidden / Manchester, UK +> +>> . +> +>> +-- +Regards, +Longpeng(Mike) + +On 2019/7/27 ä¸å2:10, Longpeng (Mike) wrote: +å¨ 2019/7/10 11:57, Jason Wang åé: +On 2019/7/10 ä¸å11:36, Longpeng (Mike) wrote: +å¨ 2019/7/10 11:25, Jason Wang åé: +On 2019/7/8 ä¸å5:47, Dr. David Alan Gilbert wrote: +* longpeng (address@hidden) wrote: +Hi guys, + +We found a qemu core in our testing environment, the assertion +'assert(bus->irq_count[i] == 0)' in pcibus_reset() was triggered and +the bus->irq_count[i] is '-1'. + +Through analysis, it was happened after VM migration and we think +it was caused by the following sequence: + +*Migration Source* +1. save bus pci.0 state, including irq_count[x] ( =0 , old ) +2. save E1000: +     e1000_pre_save +      e1000_mit_timer +       set_interrupt_cause +        pci_set_irq --> update pci_dev->irq_state to 1 and +                    update bus->irq_count[x] to 1 ( new ) +      the irq_state sent to dest. + +*Migration Dest* +1. Receive the irq_count[x] of pci.0 is 0 , but the irq_state of e1000 is 1. +2. If the e1000 need change irqline , it would call to pci_irq_handler(), +    the irq_state maybe change to 0 and bus->irq_count[x] will become +    -1 in this situation. +3. do VM reboot then the assertion will be triggered. + +We also found some guys faced the similar problem: +[1] +https://lists.gnu.org/archive/html/qemu-devel/2016-11/msg02525.html +[2] +https://bugs.launchpad.net/qemu/+bug/1702621 +Is there some patches to fix this problem ? +I don't remember any. +Can we save pcibus state after all the pci devs are saved ? +Does this problem only happen with e1000? I think so. +If it's only e1000 I think we should fix it - I think once the VM is +stopped for doing the device migration it shouldn't be raising +interrupts. +I wonder maybe we can simply fix this by no setting ICS on pre_save() but +scheduling mit timer unconditionally in post_load(). +I also think this is a bug of e1000 because we find more cores with the same +frame thease days. + +I'm not familiar with e1000 so hope someone could fix it, thanks. :) +Draft a path in attachment, please test. +Hi Jason, + +We've tested the patch for about two weeks, everything went well, thanks! + +Feel free to add my: +Reported-and-tested-by: Longpeng <address@hidden> +Applied. + +Thanks +Thanks +Thanks +Dave +Thanks, +Longpeng(Mike) +-- +Dr. David Alan Gilbert / address@hidden / Manchester, UK +. + diff --git a/results/classifier/016/debug/71456293 b/results/classifier/016/debug/71456293 new file mode 100644 index 00000000..6797674a --- /dev/null +++ b/results/classifier/016/debug/71456293 @@ -0,0 +1,1513 @@ +debug: 0.853 +virtual: 0.677 +hypervisor: 0.392 +files: 0.173 +operating system: 0.120 +PID: 0.059 +x86: 0.058 +register: 0.029 +TCG: 0.028 +KVM: 0.025 +kernel: 0.020 +performance: 0.019 +VMM: 0.011 +device: 0.010 +user-level: 0.007 +assembly: 0.007 +ppc: 0.006 +i386: 0.006 +arm: 0.003 +semantic: 0.003 +architecture: 0.002 +network: 0.002 +vnc: 0.002 +boot: 0.002 +socket: 0.002 +graphic: 0.001 +peripherals: 0.001 +alpha: 0.001 +risc-v: 0.001 +permissions: 0.001 +mistranslation: 0.000 + +[Qemu-devel][bug] qemu crash when migrate vm and vm's disks + +When migrate vm and vmâs disks target host qemu crash due to an invalid free. +#0 object_unref (obj=0x1000) at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/qom/object.c:920 +#1 0x0000560434d79e79 in memory_region_unref (mr=<optimized out>) +at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:1730 +#2 flatview_destroy (view=0x560439653880) at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:292 +#3 0x000056043514dfbe in call_rcu_thread (opaque=<optimized out>) +at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/util/rcu.c:284 +#4 0x00007fbc2b36fe25 in start_thread () from /lib64/libpthread.so.0 +#5 0x00007fbc2b099bad in clone () from /lib64/libc.so.6 +test base qemu-2.12.0 +ï¼ +but use lastest qemu(v6.0.0-rc2) also reproduce. +As follow patch can resolve this problem: +https://lists.gnu.org/archive/html/qemu-devel/2018-07/msg02272.html +Steps to reproduce: +(1) Create VM (virsh define) +(2) Add 64 virtio scsi disks +(3) migrate vm and vmâdisks +------------------------------------------------------------------------------------------------------------------------------------- +æ¬é®ä»¶åå ¶é件嫿æ°åä¸éå¢çä¿å¯ä¿¡æ¯ï¼ä» éäºåéç»ä¸é¢å°åä¸ååº +ç个人æç¾¤ç»ãç¦æ¢ä»»ä½å ¶ä»äººä»¥ä»»ä½å½¢å¼ä½¿ç¨ï¼å æ¬ä½ä¸éäºå ¨é¨æé¨åå°æ³é²ãå¤å¶ã +ææ£åï¼æ¬é®ä»¶ä¸çä¿¡æ¯ã妿æ¨éæ¶äºæ¬é®ä»¶ï¼è¯·æ¨ç«å³çµè¯æé®ä»¶éç¥å件人并å 餿¬ +é®ä»¶ï¼ +This e-mail and its attachments contain confidential information from New H3C, which is +intended only for the person or entity whose address is listed above. Any use of the +information contained herein in any way (including, but not limited to, total or partial +disclosure, reproduction, or dissemination) by persons other than the intended +recipient(s) is prohibited. If you receive this e-mail in error, please notify the sender +by phone or email immediately and delete it! + +* Yuchen (yu.chen@h3c.com) wrote: +> +When migrate vm and vmâs disks target host qemu crash due to an invalid free. +> +> +#0 object_unref (obj=0x1000) at +> +/qemu-2.12/rpmbuild/BUILD/qemu-2.12/qom/object.c:920 +> +#1 0x0000560434d79e79 in memory_region_unref (mr=<optimized out>) +> +at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:1730 +> +#2 flatview_destroy (view=0x560439653880) at +> +/qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:292 +> +#3 0x000056043514dfbe in call_rcu_thread (opaque=<optimized out>) +> +at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/util/rcu.c:284 +> +#4 0x00007fbc2b36fe25 in start_thread () from /lib64/libpthread.so.0 +> +#5 0x00007fbc2b099bad in clone () from /lib64/libc.so.6 +> +> +test base qemu-2.12.0ï¼but use lastest qemu(v6.0.0-rc2) also reproduce. +Interesting. + +> +As follow patch can resolve this problem: +> +https://lists.gnu.org/archive/html/qemu-devel/2018-07/msg02272.html +That's a pci/rcu change; ccing Paolo and Micahel. + +> +Steps to reproduce: +> +(1) Create VM (virsh define) +> +(2) Add 64 virtio scsi disks +Is that hot adding the disks later, or are they included in the VM at +creation? +Can you provide a libvirt XML example? + +> +(3) migrate vm and vmâdisks +What do you mean by 'and vm disks' - are you doing a block migration? + +Dave + +> +------------------------------------------------------------------------------------------------------------------------------------- +> +æ¬é®ä»¶åå ¶é件嫿æ°åä¸éå¢çä¿å¯ä¿¡æ¯ï¼ä» éäºåéç»ä¸é¢å°åä¸ååº +> +ç个人æç¾¤ç»ãç¦æ¢ä»»ä½å ¶ä»äººä»¥ä»»ä½å½¢å¼ä½¿ç¨ï¼å æ¬ä½ä¸éäºå ¨é¨æé¨åå°æ³é²ãå¤å¶ã +> +ææ£åï¼æ¬é®ä»¶ä¸çä¿¡æ¯ã妿æ¨éæ¶äºæ¬é®ä»¶ï¼è¯·æ¨ç«å³çµè¯æé®ä»¶éç¥å件人并å 餿¬ +> +é®ä»¶ï¼ +> +This e-mail and its attachments contain confidential information from New +> +H3C, which is +> +intended only for the person or entity whose address is listed above. Any use +> +of the +> +information contained herein in any way (including, but not limited to, total +> +or partial +> +disclosure, reproduction, or dissemination) by persons other than the intended +> +recipient(s) is prohibited. If you receive this e-mail in error, please +> +notify the sender +> +by phone or email immediately and delete it! +-- +Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK + +> +-----é®ä»¶åä»¶----- +> +å件人: Dr. David Alan Gilbert [ +mailto:dgilbert@redhat.com +] +> +åéæ¶é´: 2021å¹´4æ8æ¥ 19:27 +> +æ¶ä»¶äºº: yuchen (Cloud) <yu.chen@h3c.com>; pbonzini@redhat.com; +> +mst@redhat.com +> +æé: qemu-devel@nongnu.org +> +主é¢: Re: [Qemu-devel][bug] qemu crash when migrate vm and vm's disks +> +> +* Yuchen (yu.chen@h3c.com) wrote: +> +> When migrate vm and vmâs disks target host qemu crash due to an invalid +> +free. +> +> +> +> #0 object_unref (obj=0x1000) at +> +> /qemu-2.12/rpmbuild/BUILD/qemu-2.12/qom/object.c:920 +> +> #1 0x0000560434d79e79 in memory_region_unref (mr=<optimized out>) +> +> at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:1730 +> +> #2 flatview_destroy (view=0x560439653880) at +> +> /qemu-2.12/rpmbuild/BUILD/qemu-2.12/memory.c:292 +> +> #3 0x000056043514dfbe in call_rcu_thread (opaque=<optimized out>) +> +> at /qemu-2.12/rpmbuild/BUILD/qemu-2.12/util/rcu.c:284 +> +> #4 0x00007fbc2b36fe25 in start_thread () from /lib64/libpthread.so.0 +> +> #5 0x00007fbc2b099bad in clone () from /lib64/libc.so.6 +> +> +> +> test base qemu-2.12.0ï¼but use lastest qemu(v6.0.0-rc2) also reproduce. +> +> +Interesting. +> +> +> As follow patch can resolve this problem: +> +> +https://lists.gnu.org/archive/html/qemu-devel/2018-07/msg02272.html +> +> +That's a pci/rcu change; ccing Paolo and Micahel. +> +> +> Steps to reproduce: +> +> (1) Create VM (virsh define) +> +> (2) Add 64 virtio scsi disks +> +> +Is that hot adding the disks later, or are they included in the VM at +> +creation? +> +Can you provide a libvirt XML example? +> +Include disks in the VM at creation + +vm disks xml (only virtio scsi disks): + <devices> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native'/> + <source file='/vms/tempp/vm-os'/> + <target dev='vda' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x08' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data1'/> + <target dev='sda' bus='scsi'/> + <address type='drive' controller='2' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data2'/> + <target dev='sdb' bus='scsi'/> + <address type='drive' controller='3' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data3'/> + <target dev='sdc' bus='scsi'/> + <address type='drive' controller='4' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data4'/> + <target dev='sdd' bus='scsi'/> + <address type='drive' controller='5' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data5'/> + <target dev='sde' bus='scsi'/> + <address type='drive' controller='6' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data6'/> + <target dev='sdf' bus='scsi'/> + <address type='drive' controller='7' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data7'/> + <target dev='sdg' bus='scsi'/> + <address type='drive' controller='8' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data8'/> + <target dev='sdh' bus='scsi'/> + <address type='drive' controller='9' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data9'/> + <target dev='sdi' bus='scsi'/> + <address type='drive' controller='10' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data10'/> + <target dev='sdj' bus='scsi'/> + <address type='drive' controller='11' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data11'/> + <target dev='sdk' bus='scsi'/> + <address type='drive' controller='12' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data12'/> + <target dev='sdl' bus='scsi'/> + <address type='drive' controller='13' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data13'/> + <target dev='sdm' bus='scsi'/> + <address type='drive' controller='14' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data14'/> + <target dev='sdn' bus='scsi'/> + <address type='drive' controller='15' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data15'/> + <target dev='sdo' bus='scsi'/> + <address type='drive' controller='16' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data16'/> + <target dev='sdp' bus='scsi'/> + <address type='drive' controller='17' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data17'/> + <target dev='sdq' bus='scsi'/> + <address type='drive' controller='18' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data18'/> + <target dev='sdr' bus='scsi'/> + <address type='drive' controller='19' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data19'/> + <target dev='sds' bus='scsi'/> + <address type='drive' controller='20' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data20'/> + <target dev='sdt' bus='scsi'/> + <address type='drive' controller='21' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data21'/> + <target dev='sdu' bus='scsi'/> + <address type='drive' controller='22' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data22'/> + <target dev='sdv' bus='scsi'/> + <address type='drive' controller='23' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data23'/> + <target dev='sdw' bus='scsi'/> + <address type='drive' controller='24' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data24'/> + <target dev='sdx' bus='scsi'/> + <address type='drive' controller='25' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data25'/> + <target dev='sdy' bus='scsi'/> + <address type='drive' controller='26' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data26'/> + <target dev='sdz' bus='scsi'/> + <address type='drive' controller='27' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data27'/> + <target dev='sdaa' bus='scsi'/> + <address type='drive' controller='28' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data28'/> + <target dev='sdab' bus='scsi'/> + <address type='drive' controller='29' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data29'/> + <target dev='sdac' bus='scsi'/> + <address type='drive' controller='30' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data30'/> + <target dev='sdad' bus='scsi'/> + <address type='drive' controller='31' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data31'/> + <target dev='sdae' bus='scsi'/> + <address type='drive' controller='32' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data32'/> + <target dev='sdaf' bus='scsi'/> + <address type='drive' controller='33' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data33'/> + <target dev='sdag' bus='scsi'/> + <address type='drive' controller='34' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data34'/> + <target dev='sdah' bus='scsi'/> + <address type='drive' controller='35' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data35'/> + <target dev='sdai' bus='scsi'/> + <address type='drive' controller='36' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data36'/> + <target dev='sdaj' bus='scsi'/> + <address type='drive' controller='37' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data37'/> + <target dev='sdak' bus='scsi'/> + <address type='drive' controller='38' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data38'/> + <target dev='sdal' bus='scsi'/> + <address type='drive' controller='39' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data39'/> + <target dev='sdam' bus='scsi'/> + <address type='drive' controller='40' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data40'/> + <target dev='sdan' bus='scsi'/> + <address type='drive' controller='41' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data41'/> + <target dev='sdao' bus='scsi'/> + <address type='drive' controller='42' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data42'/> + <target dev='sdap' bus='scsi'/> + <address type='drive' controller='43' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data43'/> + <target dev='sdaq' bus='scsi'/> + <address type='drive' controller='44' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data44'/> + <target dev='sdar' bus='scsi'/> + <address type='drive' controller='45' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data45'/> + <target dev='sdas' bus='scsi'/> + <address type='drive' controller='46' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data46'/> + <target dev='sdat' bus='scsi'/> + <address type='drive' controller='47' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data47'/> + <target dev='sdau' bus='scsi'/> + <address type='drive' controller='48' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data48'/> + <target dev='sdav' bus='scsi'/> + <address type='drive' controller='49' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data49'/> + <target dev='sdaw' bus='scsi'/> + <address type='drive' controller='50' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data50'/> + <target dev='sdax' bus='scsi'/> + <address type='drive' controller='51' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data51'/> + <target dev='sday' bus='scsi'/> + <address type='drive' controller='52' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data52'/> + <target dev='sdaz' bus='scsi'/> + <address type='drive' controller='53' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data53'/> + <target dev='sdba' bus='scsi'/> + <address type='drive' controller='54' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data54'/> + <target dev='sdbb' bus='scsi'/> + <address type='drive' controller='55' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data55'/> + <target dev='sdbc' bus='scsi'/> + <address type='drive' controller='56' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data56'/> + <target dev='sdbd' bus='scsi'/> + <address type='drive' controller='57' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data57'/> + <target dev='sdbe' bus='scsi'/> + <address type='drive' controller='58' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data58'/> + <target dev='sdbf' bus='scsi'/> + <address type='drive' controller='59' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data59'/> + <target dev='sdbg' bus='scsi'/> + <address type='drive' controller='60' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data60'/> + <target dev='sdbh' bus='scsi'/> + <address type='drive' controller='61' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data61'/> + <target dev='sdbi' bus='scsi'/> + <address type='drive' controller='62' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data62'/> + <target dev='sdbj' bus='scsi'/> + <address type='drive' controller='63' bus='0' target='0' unit='0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data63'/> + <target dev='sdbk' bus='scsi'/> + <address type='drive' controller='64' bus='0' target='0' unit='0'/> + </disk> + <controller type='scsi' index='0'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x02' +function='0x0'/> + </controller> + <controller type='scsi' index='1' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x06' +function='0x0'/> + </controller> + <controller type='scsi' index='2' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x01' +function='0x0'/> + </controller> + <controller type='scsi' index='3' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x03' +function='0x0'/> + </controller> + <controller type='scsi' index='4' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x04' +function='0x0'/> + </controller> + <controller type='scsi' index='5' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x05' +function='0x0'/> + </controller> + <controller type='scsi' index='6' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x06' +function='0x0'/> + </controller> + <controller type='scsi' index='7' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x07' +function='0x0'/> + </controller> + <controller type='scsi' index='8' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x08' +function='0x0'/> + </controller> + <controller type='scsi' index='9' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x09' +function='0x0'/> + </controller> + <controller type='scsi' index='10' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0a' +function='0x0'/> + </controller> + <controller type='scsi' index='11' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0b' +function='0x0'/> + </controller> + <controller type='scsi' index='12' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0c' +function='0x0'/> + </controller> + <controller type='scsi' index='13' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0d' +function='0x0'/> + </controller> + <controller type='scsi' index='14' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0e' +function='0x0'/> + </controller> + <controller type='scsi' index='15' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0f' +function='0x0'/> + </controller> + <controller type='scsi' index='16' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x10' +function='0x0'/> + </controller> + <controller type='scsi' index='17' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x11' +function='0x0'/> + </controller> + <controller type='scsi' index='18' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x12' +function='0x0'/> + </controller> + <controller type='scsi' index='19' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x13' +function='0x0'/> + </controller> + <controller type='scsi' index='20' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x14' +function='0x0'/> + </controller> + <controller type='scsi' index='21' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x15' +function='0x0'/> + </controller> + <controller type='scsi' index='22' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x16' +function='0x0'/> + </controller> + <controller type='scsi' index='23' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x17' +function='0x0'/> + </controller> + <controller type='scsi' index='24' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x18' +function='0x0'/> + </controller> + <controller type='scsi' index='25' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x19' +function='0x0'/> + </controller> + <controller type='scsi' index='26' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1a' +function='0x0'/> + </controller> + <controller type='scsi' index='27' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1b' +function='0x0'/> + </controller> + <controller type='scsi' index='28' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1c' +function='0x0'/> + </controller> + <controller type='scsi' index='29' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1d' +function='0x0'/> + </controller> + <controller type='scsi' index='30' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1e' +function='0x0'/> + </controller> + <controller type='scsi' index='31' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x01' +function='0x0'/> + </controller> + <controller type='scsi' index='32' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x02' +function='0x0'/> + </controller> + <controller type='scsi' index='33' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x03' +function='0x0'/> + </controller> + <controller type='scsi' index='34' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x04' +function='0x0'/> + </controller> + <controller type='scsi' index='35' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x05' +function='0x0'/> + </controller> + <controller type='scsi' index='36' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x06' +function='0x0'/> + </controller> + <controller type='scsi' index='37' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x07' +function='0x0'/> + </controller> + <controller type='scsi' index='38' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x08' +function='0x0'/> + </controller> + <controller type='scsi' index='39' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x09' +function='0x0'/> + </controller> + <controller type='scsi' index='40' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x0a' +function='0x0'/> + </controller> + <controller type='scsi' index='41' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x0b' +function='0x0'/> + </controller> + <controller type='scsi' index='42' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x0c' +function='0x0'/> + </controller> + <controller type='scsi' index='43' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x02' slot='0x0d' +function='0x0'/> + </controller> + <controller type='scsi' index='44' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' +function='0x0'/> + </controller> + <controller type='scsi' index='45' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x09' +function='0x0'/> + </controller> + <controller type='scsi' index='46' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0b' +function='0x0'/> + </controller> + <controller type='scsi' index='47' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0c' +function='0x0'/> + </controller> + <controller type='scsi' index='48' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0d' +function='0x0'/> + </controller> + <controller type='scsi' index='49' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0e' +function='0x0'/> + </controller> + <controller type='scsi' index='50' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0f' +function='0x0'/> + </controller> + <controller type='scsi' index='51' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x10' +function='0x0'/> + </controller> + <controller type='scsi' index='52' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x11' +function='0x0'/> + </controller> + <controller type='scsi' index='53' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x12' +function='0x0'/> + </controller> + <controller type='scsi' index='54' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x13' +function='0x0'/> + </controller> + <controller type='scsi' index='55' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x14' +function='0x0'/> + </controller> + <controller type='scsi' index='56' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x15' +function='0x0'/> + </controller> + <controller type='scsi' index='57' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x16' +function='0x0'/> + </controller> + <controller type='scsi' index='58' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x17' +function='0x0'/> + </controller> + <controller type='scsi' index='59' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x18' +function='0x0'/> + </controller> + <controller type='scsi' index='60' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x19' +function='0x0'/> + </controller> + <controller type='scsi' index='61' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1a' +function='0x0'/> + </controller> + <controller type='scsi' index='62' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1b' +function='0x0'/> + </controller> + <controller type='scsi' index='63' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1c' +function='0x0'/> + </controller> + <controller type='scsi' index='64' model='virtio-scsi'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1d' +function='0x0'/> + </controller> + <controller type='pci' index='0' model='pci-root'/> + <controller type='pci' index='1' model='pci-bridge'> + <model name='pci-bridge'/> + <target chassisNr='1'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1f' +function='0x0'/> + </controller> + <controller type='pci' index='2' model='pci-bridge'> + <model name='pci-bridge'/> + <target chassisNr='2'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1f' +function='0x0'/> + </controller> + </devices> + +vm disks xml (only virtio disks): + <devices> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native'/> + <source file='/vms/tempp/vm-os'/> + <target dev='vda' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x08' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data2'/> + <target dev='vdb' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x06' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data3'/> + <target dev='vdc' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x09' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data4'/> + <target dev='vdd' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0b' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data5'/> + <target dev='vde' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0c' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data6'/> + <target dev='vdf' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0d' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data7'/> + <target dev='vdg' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0e' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data8'/> + <target dev='vdh' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x0f' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data9'/> + <target dev='vdi' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x10' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data10'/> + <target dev='vdj' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x11' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data11'/> + <target dev='vdk' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x12' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data12'/> + <target dev='vdl' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x13' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data13'/> + <target dev='vdm' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x14' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data14'/> + <target dev='vdn' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x15' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data15'/> + <target dev='vdo' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x16' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data16'/> + <target dev='vdp' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x17' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data17'/> + <target dev='vdq' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x18' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data18'/> + <target dev='vdr' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x19' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data19'/> + <target dev='vds' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1a' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data20'/> + <target dev='vdt' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1b' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data21'/> + <target dev='vdu' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1c' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data22'/> + <target dev='vdv' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1d' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data23'/> + <target dev='vdw' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1e' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data24'/> + <target dev='vdx' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x01' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data25'/> + <target dev='vdy' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x03' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data26'/> + <target dev='vdz' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x04' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data27'/> + <target dev='vdaa' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x05' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data28'/> + <target dev='vdab' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x06' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data29'/> + <target dev='vdac' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x07' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data30'/> + <target dev='vdad' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x08' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data31'/> + <target dev='vdae' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x09' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data32'/> + <target dev='vdaf' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0a' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data33'/> + <target dev='vdag' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0b' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data34'/> + <target dev='vdah' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0c' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data35'/> + <target dev='vdai' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0d' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data36'/> + <target dev='vdaj' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0e' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data37'/> + <target dev='vdak' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x0f' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data38'/> + <target dev='vdal' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x10' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data39'/> + <target dev='vdam' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x11' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data40'/> + <target dev='vdan' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x12' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data41'/> + <target dev='vdao' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x13' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data42'/> + <target dev='vdap' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x14' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data43'/> + <target dev='vdaq' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x15' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data44'/> + <target dev='vdar' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x16' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data45'/> + <target dev='vdas' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x17' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data46'/> + <target dev='vdat' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x18' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data47'/> + <target dev='vdau' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x19' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data48'/> + <target dev='vdav' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1a' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data49'/> + <target dev='vdaw' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1b' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data50'/> + <target dev='vdax' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1c' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data51'/> + <target dev='vday' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1d' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data52'/> + <target dev='vdaz' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1e' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data53'/> + <target dev='vdba' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x01' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data54'/> + <target dev='vdbb' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x02' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data55'/> + <target dev='vdbc' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x03' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data56'/> + <target dev='vdbd' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x04' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data57'/> + <target dev='vdbe' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x05' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data58'/> + <target dev='vdbf' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x06' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data59'/> + <target dev='vdbg' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x07' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data60'/> + <target dev='vdbh' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x08' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data61'/> + <target dev='vdbi' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x09' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data62'/> + <target dev='vdbj' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x0a' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data63'/> + <target dev='vdbk' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x02' slot='0x0b' +function='0x0'/> + </disk> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' cache='directsync' io='native' +discard='unmap'/> + <source file='/vms/tempp/vm-data1'/> + <target dev='vdbl' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' +function='0x0'/> + </disk> + <controller type='pci' index='0' model='pci-root'/> + <controller type='pci' index='1' model='pci-bridge'> + <model name='pci-bridge'/> + <target chassisNr='1'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x1f' +function='0x0'/> + </controller> + <controller type='pci' index='2' model='pci-bridge'> + <model name='pci-bridge'/> + <target chassisNr='2'/> + <address type='pci' domain='0x0000' bus='0x01' slot='0x1f' +function='0x0'/> + </controller> + </devices> + +> +> (3) migrate vm and vmâdisks +> +> +What do you mean by 'and vm disks' - are you doing a block migration? +> +Yes, block migration. +In fact, only migration domain also reproduced. + +> +Dave +> +> +> ---------------------------------------------------------------------- +> +> --------------------------------------------------------------- +> +Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK +------------------------------------------------------------------------------------------------------------------------------------- +æ¬é®ä»¶åå ¶é件嫿æ°åä¸éå¢çä¿å¯ä¿¡æ¯ï¼ä» éäºåéç»ä¸é¢å°åä¸ååº +ç个人æç¾¤ç»ãç¦æ¢ä»»ä½å ¶ä»äººä»¥ä»»ä½å½¢å¼ä½¿ç¨ï¼å æ¬ä½ä¸éäºå ¨é¨æé¨åå°æ³é²ãå¤å¶ã +ææ£åï¼æ¬é®ä»¶ä¸çä¿¡æ¯ã妿æ¨éæ¶äºæ¬é®ä»¶ï¼è¯·æ¨ç«å³çµè¯æé®ä»¶éç¥å件人并å 餿¬ +é®ä»¶ï¼ +This e-mail and its attachments contain confidential information from New H3C, +which is +intended only for the person or entity whose address is listed above. Any use +of the +information contained herein in any way (including, but not limited to, total +or partial +disclosure, reproduction, or dissemination) by persons other than the intended +recipient(s) is prohibited. If you receive this e-mail in error, please notify +the sender +by phone or email immediately and delete it! + diff --git a/results/classifier/016/debug/73660729 b/results/classifier/016/debug/73660729 new file mode 100644 index 00000000..257d78b3 --- /dev/null +++ b/results/classifier/016/debug/73660729 @@ -0,0 +1,58 @@ +arm: 0.988 +debug: 0.925 +operating system: 0.895 +kernel: 0.875 +hypervisor: 0.821 +user-level: 0.793 +virtual: 0.156 +files: 0.096 +architecture: 0.062 +device: 0.035 +VMM: 0.033 +TCG: 0.028 +network: 0.013 +PID: 0.009 +register: 0.009 +performance: 0.006 +socket: 0.005 +alpha: 0.004 +risc-v: 0.003 +semantic: 0.003 +ppc: 0.002 +assembly: 0.002 +peripherals: 0.002 +KVM: 0.001 +boot: 0.001 +graphic: 0.001 +permissions: 0.001 +vnc: 0.001 +x86: 0.001 +i386: 0.000 +mistranslation: 0.000 + +[BUG]The latest qemu crashed when I tested cxl + +I test cxl with the patch:[v11,0/2] arm/virt: + CXL support via pxb_cxl. +https://patchwork.kernel.org/project/cxl/cover/20220616141950.23374-1-Jonathan.Cameron@huawei.com/ +But the qemu crashed,and showing an error: +qemu-system-aarch64: ../hw/arm/virt.c:1735: virt_get_high_memmap_enabled: + Assertion `ARRAY_SIZE(extended_memmap) - VIRT_LOWMEMMAP_LAST == ARRAY_SIZE(enabled_array)' failed. +Then I modify the patch to fix the bug: +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index ea2413a0ba..3d4cee3491 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1710,6 +1730,7 @@ static inline bool *virt_get_high_memmap_enabled(VirtMachineState + *vms, +&vms->highmem_redists, +&vms->highmem_ecam, +&vms->highmem_mmio, ++ &vms->cxl_devices_state.is_enabled, +}; +Now qemu works good. +Could you tell me when the patch( +arm/virt: + CXL support via pxb_cxl +) will be merged into upstream? + diff --git a/results/classifier/016/debug/74545755 b/results/classifier/016/debug/74545755 new file mode 100644 index 00000000..7a117222 --- /dev/null +++ b/results/classifier/016/debug/74545755 @@ -0,0 +1,371 @@ +debug: 0.973 +virtual: 0.913 +hypervisor: 0.760 +operating system: 0.581 +kernel: 0.276 +x86: 0.136 +PID: 0.132 +files: 0.051 +register: 0.046 +VMM: 0.042 +TCG: 0.035 +user-level: 0.019 +KVM: 0.014 +performance: 0.009 +semantic: 0.009 +risc-v: 0.007 +assembly: 0.007 +device: 0.006 +ppc: 0.004 +alpha: 0.004 +network: 0.004 +socket: 0.002 +architecture: 0.001 +graphic: 0.001 +vnc: 0.001 +permissions: 0.001 +peripherals: 0.001 +boot: 0.001 +arm: 0.001 +i386: 0.001 +mistranslation: 0.000 + +[Bug Report][RFC PATCH 0/1] block: fix failing assert on paused VM migration + +There's a bug (failing assert) which is reproduced during migration of +a paused VM. I am able to reproduce it on a stand with 2 nodes and a common +NFS share, with VM's disk on that share. + +root@fedora40-1-vm:~# virsh domblklist alma8-vm + Target Source +------------------------------------------ + sda /mnt/shared/images/alma8.qcow2 + +root@fedora40-1-vm:~# df -Th /mnt/shared +Filesystem Type Size Used Avail Use% Mounted on +127.0.0.1:/srv/nfsd nfs4 63G 16G 48G 25% /mnt/shared + +On the 1st node: + +root@fedora40-1-vm:~# virsh start alma8-vm ; virsh suspend alma8-vm +root@fedora40-1-vm:~# virsh migrate --compressed --p2p --persistent +--undefinesource --live alma8-vm qemu+ssh://fedora40-2-vm/system + +Then on the 2nd node: + +root@fedora40-2-vm:~# virsh migrate --compressed --p2p --persistent +--undefinesource --live alma8-vm qemu+ssh://fedora40-1-vm/system +error: operation failed: domain is not running + +root@fedora40-2-vm:~# tail -3 /var/log/libvirt/qemu/alma8-vm.log +2024-09-19 13:53:33.336+0000: initiating migration +qemu-system-x86_64: ../block.c:6976: int +bdrv_inactivate_recurse(BlockDriverState *): Assertion `!(bs->open_flags & +BDRV_O_INACTIVE)' failed. +2024-09-19 13:53:42.991+0000: shutting down, reason=crashed + +Backtrace: + +(gdb) bt +#0 0x00007f7eaa2f1664 in __pthread_kill_implementation () at /lib64/libc.so.6 +#1 0x00007f7eaa298c4e in raise () at /lib64/libc.so.6 +#2 0x00007f7eaa280902 in abort () at /lib64/libc.so.6 +#3 0x00007f7eaa28081e in __assert_fail_base.cold () at /lib64/libc.so.6 +#4 0x00007f7eaa290d87 in __assert_fail () at /lib64/libc.so.6 +#5 0x0000563c38b95eb8 in bdrv_inactivate_recurse (bs=0x563c3b6c60c0) at +../block.c:6976 +#6 0x0000563c38b95aeb in bdrv_inactivate_all () at ../block.c:7038 +#7 0x0000563c3884d354 in qemu_savevm_state_complete_precopy_non_iterable +(f=0x563c3b700c20, in_postcopy=false, inactivate_disks=true) + at ../migration/savevm.c:1571 +#8 0x0000563c3884dc1a in qemu_savevm_state_complete_precopy (f=0x563c3b700c20, +iterable_only=false, inactivate_disks=true) at ../migration/savevm.c:1631 +#9 0x0000563c3883a340 in migration_completion_precopy (s=0x563c3b4d51f0, +current_active_state=<optimized out>) at ../migration/migration.c:2780 +#10 migration_completion (s=0x563c3b4d51f0) at ../migration/migration.c:2844 +#11 migration_iteration_run (s=0x563c3b4d51f0) at ../migration/migration.c:3270 +#12 migration_thread (opaque=0x563c3b4d51f0) at ../migration/migration.c:3536 +#13 0x0000563c38dbcf14 in qemu_thread_start (args=0x563c3c2d5bf0) at +../util/qemu-thread-posix.c:541 +#14 0x00007f7eaa2ef6d7 in start_thread () at /lib64/libc.so.6 +#15 0x00007f7eaa373414 in clone () at /lib64/libc.so.6 + +What happens here is that after 1st migration BDS related to HDD remains +inactive as VM is still paused. Then when we initiate 2nd migration, +bdrv_inactivate_all() leads to the attempt to set BDRV_O_INACTIVE flag +on that node which is already set, thus assert fails. + +Attached patch which simply skips setting flag if it's already set is more +of a kludge than a clean solution. Should we use more sophisticated logic +which allows some of the nodes be in inactive state prior to the migration, +and takes them into account during bdrv_inactivate_all()? Comments would +be appreciated. + +Andrey + +Andrey Drobyshev (1): + block: do not fail when inactivating node which is inactive + + block.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +-- +2.39.3 + +Instead of throwing an assert let's just ignore that flag is already set +and return. We assume that it's going to be safe to ignore. Otherwise +this assert fails when migrating a paused VM back and forth. + +Ideally we'd like to have a more sophisticated solution, e.g. not even +scan the nodes which should be inactive at this point. + +Signed-off-by: Andrey Drobyshev <andrey.drobyshev@virtuozzo.com> +--- + block.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/block.c b/block.c +index 7d90007cae..c1dcf906d1 100644 +--- a/block.c ++++ b/block.c +@@ -6973,7 +6973,15 @@ static int GRAPH_RDLOCK +bdrv_inactivate_recurse(BlockDriverState *bs) + return 0; + } + +- assert(!(bs->open_flags & BDRV_O_INACTIVE)); ++ if (bs->open_flags & BDRV_O_INACTIVE) { ++ /* ++ * Return here instead of throwing assert as a workaround to ++ * prevent failure on migrating paused VM. ++ * Here we assume that if we're trying to inactivate BDS that's ++ * already inactive, it's safe to just ignore it. ++ */ ++ return 0; ++ } + + /* Inactivate this node */ + if (bs->drv->bdrv_inactivate) { +-- +2.39.3 + +[add migration maintainers] + +On 24.09.24 15:56, Andrey Drobyshev wrote: +Instead of throwing an assert let's just ignore that flag is already set +and return. We assume that it's going to be safe to ignore. Otherwise +this assert fails when migrating a paused VM back and forth. + +Ideally we'd like to have a more sophisticated solution, e.g. not even +scan the nodes which should be inactive at this point. + +Signed-off-by: Andrey Drobyshev <andrey.drobyshev@virtuozzo.com> +--- + block.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/block.c b/block.c +index 7d90007cae..c1dcf906d1 100644 +--- a/block.c ++++ b/block.c +@@ -6973,7 +6973,15 @@ static int GRAPH_RDLOCK +bdrv_inactivate_recurse(BlockDriverState *bs) + return 0; + } +- assert(!(bs->open_flags & BDRV_O_INACTIVE)); ++ if (bs->open_flags & BDRV_O_INACTIVE) { ++ /* ++ * Return here instead of throwing assert as a workaround to ++ * prevent failure on migrating paused VM. ++ * Here we assume that if we're trying to inactivate BDS that's ++ * already inactive, it's safe to just ignore it. ++ */ ++ return 0; ++ } +/* Inactivate this node */ +if (bs->drv->bdrv_inactivate) { +I doubt that this a correct way to go. + +As far as I understand, "inactive" actually means that "storage is not belong to +qemu, but to someone else (another qemu process for example), and may be changed +transparently". In turn this means that Qemu should do nothing with inactive disks. So the +problem is that nobody called bdrv_activate_all on target, and we shouldn't ignore that. + +Hmm, I see in process_incoming_migration_bh() we do call bdrv_activate_all(), +but only in some scenarios. May be, the condition should be less strict here. + +Why we need any condition here at all? Don't we want to activate block-layer on +target after migration anyway? + +-- +Best regards, +Vladimir + +On 9/30/24 12:25 PM, Vladimir Sementsov-Ogievskiy wrote: +> +[add migration maintainers] +> +> +On 24.09.24 15:56, Andrey Drobyshev wrote: +> +> [...] +> +> +I doubt that this a correct way to go. +> +> +As far as I understand, "inactive" actually means that "storage is not +> +belong to qemu, but to someone else (another qemu process for example), +> +and may be changed transparently". In turn this means that Qemu should +> +do nothing with inactive disks. So the problem is that nobody called +> +bdrv_activate_all on target, and we shouldn't ignore that. +> +> +Hmm, I see in process_incoming_migration_bh() we do call +> +bdrv_activate_all(), but only in some scenarios. May be, the condition +> +should be less strict here. +> +> +Why we need any condition here at all? Don't we want to activate +> +block-layer on target after migration anyway? +> +Hmm I'm not sure about the unconditional activation, since we at least +have to honor LATE_BLOCK_ACTIVATE cap if it's set (and probably delay it +in such a case). In current libvirt upstream I see such code: + +> +/* Migration capabilities which should always be enabled as long as they +> +> +* are supported by QEMU. If the capability is supposed to be enabled on both +> +> +* sides of migration, it won't be enabled unless both sides support it. +> +> +*/ +> +> +static const qemuMigrationParamsAlwaysOnItem qemuMigrationParamsAlwaysOn[] = +> +{ +> +> +{QEMU_MIGRATION_CAP_PAUSE_BEFORE_SWITCHOVER, +> +> +QEMU_MIGRATION_SOURCE}, +> +> +> +> +{QEMU_MIGRATION_CAP_LATE_BLOCK_ACTIVATE, +> +> +QEMU_MIGRATION_DESTINATION}, +> +> +}; +which means that libvirt always wants LATE_BLOCK_ACTIVATE to be set. + +The code from process_incoming_migration_bh() you're referring to: + +> +/* If capability late_block_activate is set: +> +> +* Only fire up the block code now if we're going to restart the +> +> +* VM, else 'cont' will do it. +> +> +* This causes file locking to happen; so we don't want it to happen +> +> +* unless we really are starting the VM. +> +> +*/ +> +> +if (!migrate_late_block_activate() || +> +> +(autostart && (!global_state_received() || +> +> +runstate_is_live(global_state_get_runstate())))) { +> +> +/* Make sure all file formats throw away their mutable metadata. +> +> +> +* If we get an error here, just don't restart the VM yet. */ +> +> +bdrv_activate_all(&local_err); +> +> +if (local_err) { +> +> +error_report_err(local_err); +> +> +local_err = NULL; +> +> +autostart = false; +> +> +} +> +> +} +It states explicitly that we're either going to start VM right at this +point if (autostart == true), or we wait till "cont" command happens. +None of this is going to happen if we start another migration while +still being in PAUSED state. So I think it seems reasonable to take +such case into account. For instance, this patch does prevent the crash: + +> +diff --git a/migration/migration.c b/migration/migration.c +> +index ae2be31557..3222f6745b 100644 +> +--- a/migration/migration.c +> ++++ b/migration/migration.c +> +@@ -733,7 +733,8 @@ static void process_incoming_migration_bh(void *opaque) +> +*/ +> +if (!migrate_late_block_activate() || +> +(autostart && (!global_state_received() || +> +- runstate_is_live(global_state_get_runstate())))) { +> ++ runstate_is_live(global_state_get_runstate()))) || +> ++ (!autostart && global_state_get_runstate() == RUN_STATE_PAUSED)) { +> +/* Make sure all file formats throw away their mutable metadata. +> +* If we get an error here, just don't restart the VM yet. */ +> +bdrv_activate_all(&local_err); +What are your thoughts on it? + +Andrey + diff --git a/results/classifier/016/debug/80570214 b/results/classifier/016/debug/80570214 new file mode 100644 index 00000000..5cf23bfd --- /dev/null +++ b/results/classifier/016/debug/80570214 @@ -0,0 +1,427 @@ +debug: 0.918 +x86: 0.601 +hypervisor: 0.441 +operating system: 0.315 +kernel: 0.215 +user-level: 0.171 +virtual: 0.147 +PID: 0.085 +files: 0.068 +network: 0.061 +TCG: 0.046 +performance: 0.032 +i386: 0.031 +assembly: 0.025 +register: 0.024 +KVM: 0.015 +socket: 0.011 +semantic: 0.010 +ppc: 0.010 +arm: 0.007 +vnc: 0.005 +device: 0.005 +risc-v: 0.005 +VMM: 0.004 +architecture: 0.004 +graphic: 0.002 +alpha: 0.002 +permissions: 0.002 +peripherals: 0.001 +boot: 0.001 +mistranslation: 0.001 + +[Qemu-devel] [vhost-user BUG ?] QEMU process segfault when shutdown or reboot with vhost-user + +Hi, + +We catch a segfault in our project. + +Qemu version is 2.3.0 + +The Stack backtrace is: +(gdb) bt +#0 0x0000000000000000 in ?? () +#1 0x00007f7ad9280b2f in qemu_deliver_packet (sender=<optimized out>, flags=<optimized +out>, data=<optimized out>, size=100, opaque= + 0x7f7ad9d6db10) at net/net.c:510 +#2 0x00007f7ad92831fa in qemu_net_queue_deliver (size=<optimized out>, data=<optimized +out>, flags=<optimized out>, + sender=<optimized out>, queue=<optimized out>) at net/queue.c:157 +#3 qemu_net_queue_flush (queue=0x7f7ad9d39630) at net/queue.c:254 +#4 0x00007f7ad9280dac in qemu_flush_or_purge_queued_packets +(nc=0x7f7ad9d6db10, purge=true) at net/net.c:539 +#5 0x00007f7ad9280e76 in net_vm_change_state_handler (opaque=<optimized out>, +running=<optimized out>, state=100) at net/net.c:1214 +#6 0x00007f7ad915612f in vm_state_notify (running=0, state=RUN_STATE_SHUTDOWN) +at vl.c:1820 +#7 0x00007f7ad906db1a in do_vm_stop (state=<optimized out>) at +/usr/src/packages/BUILD/qemu-kvm-2.3.0/cpus.c:631 +#8 vm_stop (state=RUN_STATE_SHUTDOWN) at +/usr/src/packages/BUILD/qemu-kvm-2.3.0/cpus.c:1325 +#9 0x00007f7ad915e4a2 in main_loop_should_exit () at vl.c:2080 +#10 main_loop () at vl.c:2131 +#11 main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at +vl.c:4721 +(gdb) p *(NetClientState *)0x7f7ad9d6db10 +$1 = {info = 0x7f7ad9824520, link_down = 0, next = {tqe_next = 0x7f7ad0f06d10, +tqe_prev = 0x7f7ad98b1cf0}, peer = 0x7f7ad0f06d10, + incoming_queue = 0x7f7ad9d39630, model = 0x7f7ad9d39590 "vhost_user", name = +0x7f7ad9d39570 "hostnet0", info_str = + "vhost-user to charnet0", '\000' <repeats 233 times>, receive_disabled = 0, +destructor = + 0x7f7ad92821f0 <qemu_net_client_destructor>, queue_index = 0, +rxfilter_notify_enabled = 0} +(gdb) p *(NetClientInfo *)0x7f7ad9824520 +$2 = {type = NET_CLIENT_OPTIONS_KIND_VHOST_USER, size = 360, receive = 0, +receive_raw = 0, receive_iov = 0, can_receive = 0, cleanup = + 0x7f7ad9288850 <vhost_user_cleanup>, link_status_changed = 0, +query_rx_filter = 0, poll = 0, has_ufo = + 0x7f7ad92886d0 <vhost_user_has_ufo>, has_vnet_hdr = 0x7f7ad9288670 +<vhost_user_has_vnet_hdr>, has_vnet_hdr_len = 0, + using_vnet_hdr = 0, set_offload = 0, set_vnet_hdr_len = 0} +(gdb) + +The corresponding codes where gdb reports error are: (We have added some codes +in net.c) +ssize_t qemu_deliver_packet(NetClientState *sender, + unsigned flags, + const uint8_t *data, + size_t size, + void *opaque) +{ + NetClientState *nc = opaque; + ssize_t ret; + + if (nc->link_down) { + return size; + } + + if (nc->receive_disabled) { + return 0; + } + + if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) { + ret = nc->info->receive_raw(nc, data, size); + } else { + ret = nc->info->receive(nc, data, size); ----> Here is 510 line + } + +I'm not quite familiar with vhost-user, but for vhost-user, these two callback +functions seem to be always NULL, +Why we can come here ? +Is it an error to add VM state change handler for vhost-user ? + +Thanks, +zhanghailiang + +Hi + +On Tue, Nov 3, 2015 at 2:01 PM, zhanghailiang +<address@hidden> wrote: +> +The corresponding codes where gdb reports error are: (We have added some +> +codes in net.c) +Can you reproduce with unmodified qemu? Could you give instructions to do so? + +> +ssize_t qemu_deliver_packet(NetClientState *sender, +> +unsigned flags, +> +const uint8_t *data, +> +size_t size, +> +void *opaque) +> +{ +> +NetClientState *nc = opaque; +> +ssize_t ret; +> +> +if (nc->link_down) { +> +return size; +> +} +> +> +if (nc->receive_disabled) { +> +return 0; +> +} +> +> +if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) { +> +ret = nc->info->receive_raw(nc, data, size); +> +} else { +> +ret = nc->info->receive(nc, data, size); ----> Here is 510 line +> +} +> +> +I'm not quite familiar with vhost-user, but for vhost-user, these two +> +callback functions seem to be always NULL, +> +Why we can come here ? +You should not come here, vhost-user has nc->receive_disabled (it +changes in 2.5) + +-- +Marc-André Lureau + +On 2015/11/3 22:54, Marc-André Lureau wrote: +Hi + +On Tue, Nov 3, 2015 at 2:01 PM, zhanghailiang +<address@hidden> wrote: +The corresponding codes where gdb reports error are: (We have added some +codes in net.c) +Can you reproduce with unmodified qemu? Could you give instructions to do so? +OK, i will try to do it. There is nothing special, we run iperf tool in VM, +and then shutdown or reboot it. There is change you can catch segfault. +ssize_t qemu_deliver_packet(NetClientState *sender, + unsigned flags, + const uint8_t *data, + size_t size, + void *opaque) +{ + NetClientState *nc = opaque; + ssize_t ret; + + if (nc->link_down) { + return size; + } + + if (nc->receive_disabled) { + return 0; + } + + if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) { + ret = nc->info->receive_raw(nc, data, size); + } else { + ret = nc->info->receive(nc, data, size); ----> Here is 510 line + } + +I'm not quite familiar with vhost-user, but for vhost-user, these two +callback functions seem to be always NULL, +Why we can come here ? +You should not come here, vhost-user has nc->receive_disabled (it +changes in 2.5) +I have looked at the newest codes, i think we can still have chance to +come here, since we will change nc->receive_disable to false temporarily in +qemu_flush_or_purge_queued_packets(), there is no difference between 2.3 and 2.5 +for this. +Besides, is it possible for !QTAILQ_EMPTY(&queue->packets) to be true +in qemu_net_queue_flush() for vhost-user ? + +i will try to reproduce it by using newest qemu. + +Thanks, +zhanghailiang + +On 11/04/2015 10:24 AM, zhanghailiang wrote: +> +On 2015/11/3 22:54, Marc-André Lureau wrote: +> +> Hi +> +> +> +> On Tue, Nov 3, 2015 at 2:01 PM, zhanghailiang +> +> <address@hidden> wrote: +> +>> The corresponding codes where gdb reports error are: (We have added +> +>> some +> +>> codes in net.c) +> +> +> +> Can you reproduce with unmodified qemu? Could you give instructions +> +> to do so? +> +> +> +> +OK, i will try to do it. There is nothing special, we run iperf tool +> +in VM, +> +and then shutdown or reboot it. There is change you can catch segfault. +> +> +>> ssize_t qemu_deliver_packet(NetClientState *sender, +> +>> unsigned flags, +> +>> const uint8_t *data, +> +>> size_t size, +> +>> void *opaque) +> +>> { +> +>> NetClientState *nc = opaque; +> +>> ssize_t ret; +> +>> +> +>> if (nc->link_down) { +> +>> return size; +> +>> } +> +>> +> +>> if (nc->receive_disabled) { +> +>> return 0; +> +>> } +> +>> +> +>> if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) { +> +>> ret = nc->info->receive_raw(nc, data, size); +> +>> } else { +> +>> ret = nc->info->receive(nc, data, size); ----> Here is +> +>> 510 line +> +>> } +> +>> +> +>> I'm not quite familiar with vhost-user, but for vhost-user, these two +> +>> callback functions seem to be always NULL, +> +>> Why we can come here ? +> +> +> +> You should not come here, vhost-user has nc->receive_disabled (it +> +> changes in 2.5) +> +> +> +> +I have looked at the newest codes, i think we can still have chance to +> +come here, since we will change nc->receive_disable to false +> +temporarily in +> +qemu_flush_or_purge_queued_packets(), there is no difference between +> +2.3 and 2.5 +> +for this. +> +Besides, is it possible for !QTAILQ_EMPTY(&queue->packets) to be true +> +in qemu_net_queue_flush() for vhost-user ? +The only thing I can image is self announcing. Are you trying to do +migration? 2.5 only support sending rarp through this. + +And it's better to have a breakpoint to see why a packet was queued for +vhost-user. The stack trace may also help in this case. + +> +> +i will try to reproduce it by using newest qemu. +> +> +Thanks, +> +zhanghailiang +> + +On 2015/11/4 11:19, Jason Wang wrote: +On 11/04/2015 10:24 AM, zhanghailiang wrote: +On 2015/11/3 22:54, Marc-André Lureau wrote: +Hi + +On Tue, Nov 3, 2015 at 2:01 PM, zhanghailiang +<address@hidden> wrote: +The corresponding codes where gdb reports error are: (We have added +some +codes in net.c) +Can you reproduce with unmodified qemu? Could you give instructions +to do so? +OK, i will try to do it. There is nothing special, we run iperf tool +in VM, +and then shutdown or reboot it. There is change you can catch segfault. +ssize_t qemu_deliver_packet(NetClientState *sender, + unsigned flags, + const uint8_t *data, + size_t size, + void *opaque) +{ + NetClientState *nc = opaque; + ssize_t ret; + + if (nc->link_down) { + return size; + } + + if (nc->receive_disabled) { + return 0; + } + + if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) { + ret = nc->info->receive_raw(nc, data, size); + } else { + ret = nc->info->receive(nc, data, size); ----> Here is +510 line + } + +I'm not quite familiar with vhost-user, but for vhost-user, these two +callback functions seem to be always NULL, +Why we can come here ? +You should not come here, vhost-user has nc->receive_disabled (it +changes in 2.5) +I have looked at the newest codes, i think we can still have chance to +come here, since we will change nc->receive_disable to false +temporarily in +qemu_flush_or_purge_queued_packets(), there is no difference between +2.3 and 2.5 +for this. +Besides, is it possible for !QTAILQ_EMPTY(&queue->packets) to be true +in qemu_net_queue_flush() for vhost-user ? +The only thing I can image is self announcing. Are you trying to do +migration? 2.5 only support sending rarp through this. +Hmm, it's not triggered by migration, For qemu-2.5, IMHO, it doesn't have such +problem, +since the callback function 'receive' is not NULL. It is vhost_user_receive(). +And it's better to have a breakpoint to see why a packet was queued for +vhost-user. The stack trace may also help in this case. +OK, i'm trying to reproduce it. + +Thanks, +zhanghailiang +i will try to reproduce it by using newest qemu. + +Thanks, +zhanghailiang +. + diff --git a/results/classifier/016/debug/85542195 b/results/classifier/016/debug/85542195 new file mode 100644 index 00000000..3f21ebe1 --- /dev/null +++ b/results/classifier/016/debug/85542195 @@ -0,0 +1,147 @@ +debug: 0.876 +ppc: 0.654 +virtual: 0.577 +register: 0.396 +TCG: 0.331 +PID: 0.322 +user-level: 0.317 +x86: 0.294 +operating system: 0.258 +hypervisor: 0.219 +risc-v: 0.193 +socket: 0.176 +boot: 0.169 +network: 0.127 +device: 0.126 +vnc: 0.121 +alpha: 0.096 +VMM: 0.094 +files: 0.088 +i386: 0.049 +semantic: 0.024 +kernel: 0.013 +assembly: 0.011 +performance: 0.007 +peripherals: 0.004 +permissions: 0.004 +KVM: 0.003 +architecture: 0.003 +arm: 0.001 +graphic: 0.001 +mistranslation: 0.001 + +[Qemu-devel] [Bug in qemu-system-ppc running Mac OS 9 on Windows 10] + +Hi all, + +I've been experiencing issues when installing Mac OS 9.x using +qemu-system-ppc.exe in Windows 10. After booting from CD image, +partitioning a fresh disk image often hangs Qemu. When using a +pre-partitioned disk image, the OS installation process halts +somewhere during the process. The issues can be resolved by setting +qemu-system-ppc.exe to run in Windows 7 compatibility mode. +AFAIK all Qemu builds for Windows since Mac OS 9 became available as +guest are affected. +The issue is reproducible by installing Qemu for Windows from Stephan +Weil on Windows 10 and boot/install Mac OS 9.x + +Best regards and thanks for looking into this, +Howard + +On Nov 25, 2016, at 9:26 AM, address@hidden wrote: +Hi all, + +I've been experiencing issues when installing Mac OS 9.x using +qemu-system-ppc.exe in Windows 10. After booting from CD image, +partitioning a fresh disk image often hangs Qemu. When using a +pre-partitioned disk image, the OS installation process halts +somewhere during the process. The issues can be resolved by setting +qemu-system-ppc.exe to run in Windows 7 compatibility mode. +AFAIK all Qemu builds for Windows since Mac OS 9 became available as +guest are affected. +The issue is reproducible by installing Qemu for Windows from Stephan +Weil on Windows 10 and boot/install Mac OS 9.x + +Best regards and thanks for looking into this, +Howard +I assume there was some kind of behavior change for some of the +Windows API between Windows 7 and Windows 10, that is my guess as to +why the compatibility mode works. Could you run 'make check' on your +system, once in Windows 7 and once in Windows 10. Maybe the tests +will tell us something. I'm hoping that one of the tests succeeds in +Windows 7 and fails in Windows 10. That would help us pinpoint what +the problem is. +What I mean by run in Windows 7 is set the mingw environment to run +in Windows 7 compatibility mode (if possible). If you have Windows 7 +on another partition you could boot from, that would be better. +Good luck. +p.s. use 'make check -k' to allow all the tests to run (even if one +or more of the tests fails). + +> +> Hi all, +> +> +> +> I've been experiencing issues when installing Mac OS 9.x using +> +> qemu-system-ppc.exe in Windows 10. After booting from CD image, +> +> partitioning a fresh disk image often hangs Qemu. When using a +> +> pre-partitioned disk image, the OS installation process halts +> +> somewhere during the process. The issues can be resolved by setting +> +> qemu-system-ppc.exe to run in Windows 7 compatibility mode. +> +> AFAIK all Qemu builds for Windows since Mac OS 9 became available as +> +> guest are affected. +> +> The issue is reproducible by installing Qemu for Windows from Stephan +> +> Weil on Windows 10 and boot/install Mac OS 9.x +> +> +> +> Best regards and thanks for looking into this, +> +> Howard +> +> +> +I assume there was some kind of behavior change for some of the Windows API +> +between Windows 7 and Windows 10, that is my guess as to why the +> +compatibility mode works. Could you run 'make check' on your system, once in +> +Windows 7 and once in Windows 10. Maybe the tests will tell us something. +> +I'm hoping that one of the tests succeeds in Windows 7 and fails in Windows +> +10. That would help us pinpoint what the problem is. +> +> +What I mean by run in Windows 7 is set the mingw environment to run in +> +Windows 7 compatibility mode (if possible). If you have Windows 7 on another +> +partition you could boot from, that would be better. +> +> +Good luck. +> +> +p.s. use 'make check -k' to allow all the tests to run (even if one or more +> +of the tests fails). +Hi, + +Thank you for you suggestion, but I have no means to run the check you +suggest. I cross-compile from Linux. + +Best regards, +Howard + diff --git a/results/classifier/016/debug/88225572 b/results/classifier/016/debug/88225572 new file mode 100644 index 00000000..855d0aa1 --- /dev/null +++ b/results/classifier/016/debug/88225572 @@ -0,0 +1,2927 @@ +debug: 0.966 +hypervisor: 0.541 +kernel: 0.484 +x86: 0.394 +user-level: 0.321 +KVM: 0.289 +operating system: 0.257 +virtual: 0.215 +TCG: 0.119 +PID: 0.087 +files: 0.078 +assembly: 0.047 +register: 0.046 +semantic: 0.039 +performance: 0.032 +i386: 0.031 +device: 0.030 +VMM: 0.021 +ppc: 0.015 +architecture: 0.007 +peripherals: 0.007 +arm: 0.006 +risc-v: 0.006 +network: 0.003 +vnc: 0.003 +alpha: 0.002 +graphic: 0.002 +socket: 0.002 +permissions: 0.001 +boot: 0.001 +mistranslation: 0.001 + +[BUG qemu 4.0] segfault when unplugging virtio-blk-pci device + +Hi, + +I'm using qemu 4.0 and hit segfault when tearing down kata sandbox, I +think it's because io completion hits use-after-free when device is +already gone. Is this a known bug that has been fixed? (I went through +the git log but didn't find anything obvious). + +gdb backtrace is: + +Core was generated by `/usr/local/libexec/qemu-kvm -name +sandbox-5b8df8c6c6901c3c0a9b02879be10fe8d69d6'. +Program terminated with signal 11, Segmentation fault. +#0 object_get_class (obj=obj@entry=0x0) at +/usr/src/debug/qemu-4.0/qom/object.c:903 +903 return obj->class; +(gdb) bt +#0 object_get_class (obj=obj@entry=0x0) at +/usr/src/debug/qemu-4.0/qom/object.c:903 +#1  0x0000558a2c009e9b in virtio_notify_vector (vdev=0x558a2e7751d0, +  vector=<optimized out>) at /usr/src/debug/qemu-4.0/hw/virtio/virtio.c:1118 +#2  0x0000558a2bfdcb1e in virtio_blk_discard_write_zeroes_complete ( +  opaque=0x558a2f2fd420, ret=0) +  at /usr/src/debug/qemu-4.0/hw/block/virtio-blk.c:186 +#3  0x0000558a2c261c7e in blk_aio_complete (acb=0x558a2eed7420) +  at /usr/src/debug/qemu-4.0/block/block-backend.c:1305 +#4  0x0000558a2c3031db in coroutine_trampoline (i0=<optimized out>, +  i1=<optimized out>) at /usr/src/debug/qemu-4.0/util/coroutine-ucontext.c:116 +#5  0x00007f45b2f8b080 in ?? () from /lib64/libc.so.6 +#6  0x00007fff9ed75780 in ?? () +#7  0x0000000000000000 in ?? () + +It seems like qemu was completing a discard/write_zero request, but +parent BusState was already freed & set to NULL. + +Do we need to drain all pending request before unrealizing virtio-blk +device? Like the following patch proposed? +https://lists.gnu.org/archive/html/qemu-devel/2017-06/msg02945.html +If more info is needed, please let me know. + +Thanks, +Eryu + +On Tue, 31 Dec 2019 18:34:34 +0800 +Eryu Guan <address@hidden> wrote: + +> +Hi, +> +> +I'm using qemu 4.0 and hit segfault when tearing down kata sandbox, I +> +think it's because io completion hits use-after-free when device is +> +already gone. Is this a known bug that has been fixed? (I went through +> +the git log but didn't find anything obvious). +> +> +gdb backtrace is: +> +> +Core was generated by `/usr/local/libexec/qemu-kvm -name +> +sandbox-5b8df8c6c6901c3c0a9b02879be10fe8d69d6'. +> +Program terminated with signal 11, Segmentation fault. +> +#0 object_get_class (obj=obj@entry=0x0) at +> +/usr/src/debug/qemu-4.0/qom/object.c:903 +> +903 return obj->class; +> +(gdb) bt +> +#0 object_get_class (obj=obj@entry=0x0) at +> +/usr/src/debug/qemu-4.0/qom/object.c:903 +> +#1  0x0000558a2c009e9b in virtio_notify_vector (vdev=0x558a2e7751d0, +> +  vector=<optimized out>) at /usr/src/debug/qemu-4.0/hw/virtio/virtio.c:1118 +> +#2  0x0000558a2bfdcb1e in virtio_blk_discard_write_zeroes_complete ( +> +  opaque=0x558a2f2fd420, ret=0) +> +  at /usr/src/debug/qemu-4.0/hw/block/virtio-blk.c:186 +> +#3  0x0000558a2c261c7e in blk_aio_complete (acb=0x558a2eed7420) +> +  at /usr/src/debug/qemu-4.0/block/block-backend.c:1305 +> +#4  0x0000558a2c3031db in coroutine_trampoline (i0=<optimized out>, +> +  i1=<optimized out>) at +> +/usr/src/debug/qemu-4.0/util/coroutine-ucontext.c:116 +> +#5  0x00007f45b2f8b080 in ?? () from /lib64/libc.so.6 +> +#6  0x00007fff9ed75780 in ?? () +> +#7  0x0000000000000000 in ?? () +> +> +It seems like qemu was completing a discard/write_zero request, but +> +parent BusState was already freed & set to NULL. +> +> +Do we need to drain all pending request before unrealizing virtio-blk +> +device? Like the following patch proposed? +> +> +https://lists.gnu.org/archive/html/qemu-devel/2017-06/msg02945.html +> +> +If more info is needed, please let me know. +may be this will help: +https://patchwork.kernel.org/patch/11213047/ +> +> +Thanks, +> +Eryu +> + +On Tue, Dec 31, 2019 at 11:51:35AM +0100, Igor Mammedov wrote: +> +On Tue, 31 Dec 2019 18:34:34 +0800 +> +Eryu Guan <address@hidden> wrote: +> +> +> Hi, +> +> +> +> I'm using qemu 4.0 and hit segfault when tearing down kata sandbox, I +> +> think it's because io completion hits use-after-free when device is +> +> already gone. Is this a known bug that has been fixed? (I went through +> +> the git log but didn't find anything obvious). +> +> +> +> gdb backtrace is: +> +> +> +> Core was generated by `/usr/local/libexec/qemu-kvm -name +> +> sandbox-5b8df8c6c6901c3c0a9b02879be10fe8d69d6'. +> +> Program terminated with signal 11, Segmentation fault. +> +> #0 object_get_class (obj=obj@entry=0x0) at +> +> /usr/src/debug/qemu-4.0/qom/object.c:903 +> +> 903 return obj->class; +> +> (gdb) bt +> +> #0 object_get_class (obj=obj@entry=0x0) at +> +> /usr/src/debug/qemu-4.0/qom/object.c:903 +> +> #1  0x0000558a2c009e9b in virtio_notify_vector (vdev=0x558a2e7751d0, +> +>   vector=<optimized out>) at +> +> /usr/src/debug/qemu-4.0/hw/virtio/virtio.c:1118 +> +> #2  0x0000558a2bfdcb1e in virtio_blk_discard_write_zeroes_complete ( +> +>   opaque=0x558a2f2fd420, ret=0) +> +>   at /usr/src/debug/qemu-4.0/hw/block/virtio-blk.c:186 +> +> #3  0x0000558a2c261c7e in blk_aio_complete (acb=0x558a2eed7420) +> +>   at /usr/src/debug/qemu-4.0/block/block-backend.c:1305 +> +> #4  0x0000558a2c3031db in coroutine_trampoline (i0=<optimized out>, +> +>   i1=<optimized out>) at +> +> /usr/src/debug/qemu-4.0/util/coroutine-ucontext.c:116 +> +> #5  0x00007f45b2f8b080 in ?? () from /lib64/libc.so.6 +> +> #6  0x00007fff9ed75780 in ?? () +> +> #7  0x0000000000000000 in ?? () +> +> +> +> It seems like qemu was completing a discard/write_zero request, but +> +> parent BusState was already freed & set to NULL. +> +> +> +> Do we need to drain all pending request before unrealizing virtio-blk +> +> device? Like the following patch proposed? +> +> +> +> +https://lists.gnu.org/archive/html/qemu-devel/2017-06/msg02945.html +> +> +> +> If more info is needed, please let me know. +> +> +may be this will help: +https://patchwork.kernel.org/patch/11213047/ +Yeah, this looks promising! I'll try it out (though it's a one-time +crash for me). Thanks! + +Eryu + +On Thu, Jan 02, 2020 at 10:08:50AM +0800, Eryu Guan wrote: +> +On Tue, Dec 31, 2019 at 11:51:35AM +0100, Igor Mammedov wrote: +> +> On Tue, 31 Dec 2019 18:34:34 +0800 +> +> Eryu Guan <address@hidden> wrote: +> +> +> +> > Hi, +> +> > +> +> > I'm using qemu 4.0 and hit segfault when tearing down kata sandbox, I +> +> > think it's because io completion hits use-after-free when device is +> +> > already gone. Is this a known bug that has been fixed? (I went through +> +> > the git log but didn't find anything obvious). +> +> > +> +> > gdb backtrace is: +> +> > +> +> > Core was generated by `/usr/local/libexec/qemu-kvm -name +> +> > sandbox-5b8df8c6c6901c3c0a9b02879be10fe8d69d6'. +> +> > Program terminated with signal 11, Segmentation fault. +> +> > #0 object_get_class (obj=obj@entry=0x0) at +> +> > /usr/src/debug/qemu-4.0/qom/object.c:903 +> +> > 903 return obj->class; +> +> > (gdb) bt +> +> > #0 object_get_class (obj=obj@entry=0x0) at +> +> > /usr/src/debug/qemu-4.0/qom/object.c:903 +> +> > #1  0x0000558a2c009e9b in virtio_notify_vector (vdev=0x558a2e7751d0, +> +> >   vector=<optimized out>) at +> +> > /usr/src/debug/qemu-4.0/hw/virtio/virtio.c:1118 +> +> > #2  0x0000558a2bfdcb1e in virtio_blk_discard_write_zeroes_complete ( +> +> >   opaque=0x558a2f2fd420, ret=0) +> +> >   at /usr/src/debug/qemu-4.0/hw/block/virtio-blk.c:186 +> +> > #3  0x0000558a2c261c7e in blk_aio_complete (acb=0x558a2eed7420) +> +> >   at /usr/src/debug/qemu-4.0/block/block-backend.c:1305 +> +> > #4  0x0000558a2c3031db in coroutine_trampoline (i0=<optimized out>, +> +> >   i1=<optimized out>) at +> +> > /usr/src/debug/qemu-4.0/util/coroutine-ucontext.c:116 +> +> > #5  0x00007f45b2f8b080 in ?? () from /lib64/libc.so.6 +> +> > #6  0x00007fff9ed75780 in ?? () +> +> > #7  0x0000000000000000 in ?? () +> +> > +> +> > It seems like qemu was completing a discard/write_zero request, but +> +> > parent BusState was already freed & set to NULL. +> +> > +> +> > Do we need to drain all pending request before unrealizing virtio-blk +> +> > device? Like the following patch proposed? +> +> > +> +> > +https://lists.gnu.org/archive/html/qemu-devel/2017-06/msg02945.html +> +> > +> +> > If more info is needed, please let me know. +> +> +> +> may be this will help: +https://patchwork.kernel.org/patch/11213047/ +> +> +Yeah, this looks promising! I'll try it out (though it's a one-time +> +crash for me). Thanks! +After applying this patch, I don't see the original segfaut and +backtrace, but I see this crash + +[Thread debugging using libthread_db enabled] +Using host libthread_db library "/lib64/libthread_db.so.1". +Core was generated by `/usr/local/libexec/qemu-kvm -name +sandbox-a2f34a11a7e1449496503bbc4050ae040c0d3'. +Program terminated with signal 11, Segmentation fault. +#0 0x0000561216a57609 in virtio_pci_notify_write (opaque=0x5612184747e0, +addr=0, val=<optimized out>, size=<optimized out>) at +/usr/src/debug/qemu-4.0/hw/virtio/virtio-pci.c:1324 +1324 VirtIOPCIProxy *proxy = +VIRTIO_PCI(DEVICE(vdev)->parent_bus->parent); +Missing separate debuginfos, use: debuginfo-install +glib2-2.42.2-5.1.alios7.x86_64 glibc-2.17-260.alios7.x86_64 +libgcc-4.8.5-28.alios7.1.x86_64 libseccomp-2.3.1-3.alios7.x86_64 +libstdc++-4.8.5-28.alios7.1.x86_64 numactl-libs-2.0.9-5.1.alios7.x86_64 +pixman-0.32.6-3.1.alios7.x86_64 zlib-1.2.7-16.2.alios7.x86_64 +(gdb) bt +#0 0x0000561216a57609 in virtio_pci_notify_write (opaque=0x5612184747e0, +addr=0, val=<optimized out>, size=<optimized out>) at +/usr/src/debug/qemu-4.0/hw/virtio/virtio-pci.c:1324 +#1 0x0000561216835b22 in memory_region_write_accessor (mr=<optimized out>, +addr=<optimized out>, value=<optimized out>, size=<optimized out>, +shift=<optimized out>, mask=<optimized out>, attrs=...) at +/usr/src/debug/qemu-4.0/memory.c:502 +#2 0x0000561216833c5d in access_with_adjusted_size (addr=addr@entry=0, +value=value@entry=0x7fcdeab1b8a8, size=size@entry=2, access_size_min=<optimized +out>, access_size_max=<optimized out>, access_fn=0x561216835ac0 +<memory_region_write_accessor>, mr=0x56121846d340, attrs=...) + at /usr/src/debug/qemu-4.0/memory.c:568 +#3 0x0000561216837c66 in memory_region_dispatch_write +(mr=mr@entry=0x56121846d340, addr=0, data=<optimized out>, size=2, +attrs=attrs@entry=...) at /usr/src/debug/qemu-4.0/memory.c:1503 +#4 0x00005612167e036f in flatview_write_continue (fv=fv@entry=0x56121852edd0, +addr=addr@entry=841813602304, attrs=..., buf=buf@entry=0x7fce7dd97028 <Address +0x7fce7dd97028 out of bounds>, len=len@entry=2, addr1=<optimized out>, +l=<optimized out>, mr=0x56121846d340) + at /usr/src/debug/qemu-4.0/exec.c:3279 +#5 0x00005612167e0506 in flatview_write (fv=0x56121852edd0, addr=841813602304, +attrs=..., buf=0x7fce7dd97028 <Address 0x7fce7dd97028 out of bounds>, len=2) at +/usr/src/debug/qemu-4.0/exec.c:3318 +#6 0x00005612167e4a1b in address_space_write (as=<optimized out>, +addr=<optimized out>, attrs=..., buf=<optimized out>, len=<optimized out>) at +/usr/src/debug/qemu-4.0/exec.c:3408 +#7 0x00005612167e4aa5 in address_space_rw (as=<optimized out>, addr=<optimized +out>, attrs=..., attrs@entry=..., buf=buf@entry=0x7fce7dd97028 <Address +0x7fce7dd97028 out of bounds>, len=<optimized out>, is_write=<optimized out>) +at /usr/src/debug/qemu-4.0/exec.c:3419 +#8 0x0000561216849da1 in kvm_cpu_exec (cpu=cpu@entry=0x56121849aa00) at +/usr/src/debug/qemu-4.0/accel/kvm/kvm-all.c:2034 +#9 0x000056121682255e in qemu_kvm_cpu_thread_fn (arg=arg@entry=0x56121849aa00) +at /usr/src/debug/qemu-4.0/cpus.c:1281 +#10 0x0000561216b794d6 in qemu_thread_start (args=<optimized out>) at +/usr/src/debug/qemu-4.0/util/qemu-thread-posix.c:502 +#11 0x00007fce7bef6e25 in start_thread () from /lib64/libpthread.so.0 +#12 0x00007fce7bc1ef1d in clone () from /lib64/libc.so.6 + +And I searched and found +https://bugzilla.redhat.com/show_bug.cgi?id=1706759 +, which has the same +backtrace as above, and it seems commit 7bfde688fb1b ("virtio-blk: Add +blk_drain() to virtio_blk_device_unrealize()") is to fix this particular +bug. + +But I can still hit the bug even after applying the commit. Do I miss +anything? + +Thanks, +Eryu +> +Eryu + +On Tue, Jan 7, 2020 at 2:06 PM Eryu Guan <address@hidden> wrote: +> +> +On Thu, Jan 02, 2020 at 10:08:50AM +0800, Eryu Guan wrote: +> +> On Tue, Dec 31, 2019 at 11:51:35AM +0100, Igor Mammedov wrote: +> +> > On Tue, 31 Dec 2019 18:34:34 +0800 +> +> > Eryu Guan <address@hidden> wrote: +> +> > +> +> > > Hi, +> +> > > +> +> > > I'm using qemu 4.0 and hit segfault when tearing down kata sandbox, I +> +> > > think it's because io completion hits use-after-free when device is +> +> > > already gone. Is this a known bug that has been fixed? (I went through +> +> > > the git log but didn't find anything obvious). +> +> > > +> +> > > gdb backtrace is: +> +> > > +> +> > > Core was generated by `/usr/local/libexec/qemu-kvm -name +> +> > > sandbox-5b8df8c6c6901c3c0a9b02879be10fe8d69d6'. +> +> > > Program terminated with signal 11, Segmentation fault. +> +> > > #0 object_get_class (obj=obj@entry=0x0) at +> +> > > /usr/src/debug/qemu-4.0/qom/object.c:903 +> +> > > 903 return obj->class; +> +> > > (gdb) bt +> +> > > #0 object_get_class (obj=obj@entry=0x0) at +> +> > > /usr/src/debug/qemu-4.0/qom/object.c:903 +> +> > > #1 0x0000558a2c009e9b in virtio_notify_vector (vdev=0x558a2e7751d0, +> +> > > vector=<optimized out>) at +> +> > > /usr/src/debug/qemu-4.0/hw/virtio/virtio.c:1118 +> +> > > #2 0x0000558a2bfdcb1e in virtio_blk_discard_write_zeroes_complete ( +> +> > > opaque=0x558a2f2fd420, ret=0) +> +> > > at /usr/src/debug/qemu-4.0/hw/block/virtio-blk.c:186 +> +> > > #3 0x0000558a2c261c7e in blk_aio_complete (acb=0x558a2eed7420) +> +> > > at /usr/src/debug/qemu-4.0/block/block-backend.c:1305 +> +> > > #4 0x0000558a2c3031db in coroutine_trampoline (i0=<optimized out>, +> +> > > i1=<optimized out>) at +> +> > > /usr/src/debug/qemu-4.0/util/coroutine-ucontext.c:116 +> +> > > #5 0x00007f45b2f8b080 in ?? () from /lib64/libc.so.6 +> +> > > #6 0x00007fff9ed75780 in ?? () +> +> > > #7 0x0000000000000000 in ?? () +> +> > > +> +> > > It seems like qemu was completing a discard/write_zero request, but +> +> > > parent BusState was already freed & set to NULL. +> +> > > +> +> > > Do we need to drain all pending request before unrealizing virtio-blk +> +> > > device? Like the following patch proposed? +> +> > > +> +> > > +https://lists.gnu.org/archive/html/qemu-devel/2017-06/msg02945.html +> +> > > +> +> > > If more info is needed, please let me know. +> +> > +> +> > may be this will help: +https://patchwork.kernel.org/patch/11213047/ +> +> +> +> Yeah, this looks promising! I'll try it out (though it's a one-time +> +> crash for me). Thanks! +> +> +After applying this patch, I don't see the original segfaut and +> +backtrace, but I see this crash +> +> +[Thread debugging using libthread_db enabled] +> +Using host libthread_db library "/lib64/libthread_db.so.1". +> +Core was generated by `/usr/local/libexec/qemu-kvm -name +> +sandbox-a2f34a11a7e1449496503bbc4050ae040c0d3'. +> +Program terminated with signal 11, Segmentation fault. +> +#0 0x0000561216a57609 in virtio_pci_notify_write (opaque=0x5612184747e0, +> +addr=0, val=<optimized out>, size=<optimized out>) at +> +/usr/src/debug/qemu-4.0/hw/virtio/virtio-pci.c:1324 +> +1324 VirtIOPCIProxy *proxy = +> +VIRTIO_PCI(DEVICE(vdev)->parent_bus->parent); +> +Missing separate debuginfos, use: debuginfo-install +> +glib2-2.42.2-5.1.alios7.x86_64 glibc-2.17-260.alios7.x86_64 +> +libgcc-4.8.5-28.alios7.1.x86_64 libseccomp-2.3.1-3.alios7.x86_64 +> +libstdc++-4.8.5-28.alios7.1.x86_64 numactl-libs-2.0.9-5.1.alios7.x86_64 +> +pixman-0.32.6-3.1.alios7.x86_64 zlib-1.2.7-16.2.alios7.x86_64 +> +(gdb) bt +> +#0 0x0000561216a57609 in virtio_pci_notify_write (opaque=0x5612184747e0, +> +addr=0, val=<optimized out>, size=<optimized out>) at +> +/usr/src/debug/qemu-4.0/hw/virtio/virtio-pci.c:1324 +> +#1 0x0000561216835b22 in memory_region_write_accessor (mr=<optimized out>, +> +addr=<optimized out>, value=<optimized out>, size=<optimized out>, +> +shift=<optimized out>, mask=<optimized out>, attrs=...) at +> +/usr/src/debug/qemu-4.0/memory.c:502 +> +#2 0x0000561216833c5d in access_with_adjusted_size (addr=addr@entry=0, +> +value=value@entry=0x7fcdeab1b8a8, size=size@entry=2, +> +access_size_min=<optimized out>, access_size_max=<optimized out>, +> +access_fn=0x561216835ac0 <memory_region_write_accessor>, mr=0x56121846d340, +> +attrs=...) +> +at /usr/src/debug/qemu-4.0/memory.c:568 +> +#3 0x0000561216837c66 in memory_region_dispatch_write +> +(mr=mr@entry=0x56121846d340, addr=0, data=<optimized out>, size=2, +> +attrs=attrs@entry=...) at /usr/src/debug/qemu-4.0/memory.c:1503 +> +#4 0x00005612167e036f in flatview_write_continue +> +(fv=fv@entry=0x56121852edd0, addr=addr@entry=841813602304, attrs=..., +> +buf=buf@entry=0x7fce7dd97028 <Address 0x7fce7dd97028 out of bounds>, +> +len=len@entry=2, addr1=<optimized out>, l=<optimized out>, mr=0x56121846d340) +> +at /usr/src/debug/qemu-4.0/exec.c:3279 +> +#5 0x00005612167e0506 in flatview_write (fv=0x56121852edd0, +> +addr=841813602304, attrs=..., buf=0x7fce7dd97028 <Address 0x7fce7dd97028 out +> +of bounds>, len=2) at /usr/src/debug/qemu-4.0/exec.c:3318 +> +#6 0x00005612167e4a1b in address_space_write (as=<optimized out>, +> +addr=<optimized out>, attrs=..., buf=<optimized out>, len=<optimized out>) at +> +/usr/src/debug/qemu-4.0/exec.c:3408 +> +#7 0x00005612167e4aa5 in address_space_rw (as=<optimized out>, +> +addr=<optimized out>, attrs=..., attrs@entry=..., +> +buf=buf@entry=0x7fce7dd97028 <Address 0x7fce7dd97028 out of bounds>, +> +len=<optimized out>, is_write=<optimized out>) at +> +/usr/src/debug/qemu-4.0/exec.c:3419 +> +#8 0x0000561216849da1 in kvm_cpu_exec (cpu=cpu@entry=0x56121849aa00) at +> +/usr/src/debug/qemu-4.0/accel/kvm/kvm-all.c:2034 +> +#9 0x000056121682255e in qemu_kvm_cpu_thread_fn +> +(arg=arg@entry=0x56121849aa00) at /usr/src/debug/qemu-4.0/cpus.c:1281 +> +#10 0x0000561216b794d6 in qemu_thread_start (args=<optimized out>) at +> +/usr/src/debug/qemu-4.0/util/qemu-thread-posix.c:502 +> +#11 0x00007fce7bef6e25 in start_thread () from /lib64/libpthread.so.0 +> +#12 0x00007fce7bc1ef1d in clone () from /lib64/libc.so.6 +> +> +And I searched and found +> +https://bugzilla.redhat.com/show_bug.cgi?id=1706759 +, which has the same +> +backtrace as above, and it seems commit 7bfde688fb1b ("virtio-blk: Add +> +blk_drain() to virtio_blk_device_unrealize()") is to fix this particular +> +bug. +> +> +But I can still hit the bug even after applying the commit. Do I miss +> +anything? +Hi Eryu, +This backtrace seems to be caused by this bug (there were two bugs in +1706759): +https://bugzilla.redhat.com/show_bug.cgi?id=1708480 +Although the solution hasn't been tested on virtio-blk yet, you may +want to apply this patch: +https://lists.nongnu.org/archive/html/qemu-devel/2019-12/msg05197.html +Let me know if this works. + +Best regards, Julia Suvorova. + +On Tue, Jan 07, 2020 at 03:01:01PM +0100, Julia Suvorova wrote: +> +On Tue, Jan 7, 2020 at 2:06 PM Eryu Guan <address@hidden> wrote: +> +> +> +> On Thu, Jan 02, 2020 at 10:08:50AM +0800, Eryu Guan wrote: +> +> > On Tue, Dec 31, 2019 at 11:51:35AM +0100, Igor Mammedov wrote: +> +> > > On Tue, 31 Dec 2019 18:34:34 +0800 +> +> > > Eryu Guan <address@hidden> wrote: +> +> > > +> +> > > > Hi, +> +> > > > +> +> > > > I'm using qemu 4.0 and hit segfault when tearing down kata sandbox, I +> +> > > > think it's because io completion hits use-after-free when device is +> +> > > > already gone. Is this a known bug that has been fixed? (I went through +> +> > > > the git log but didn't find anything obvious). +> +> > > > +> +> > > > gdb backtrace is: +> +> > > > +> +> > > > Core was generated by `/usr/local/libexec/qemu-kvm -name +> +> > > > sandbox-5b8df8c6c6901c3c0a9b02879be10fe8d69d6'. +> +> > > > Program terminated with signal 11, Segmentation fault. +> +> > > > #0 object_get_class (obj=obj@entry=0x0) at +> +> > > > /usr/src/debug/qemu-4.0/qom/object.c:903 +> +> > > > 903 return obj->class; +> +> > > > (gdb) bt +> +> > > > #0 object_get_class (obj=obj@entry=0x0) at +> +> > > > /usr/src/debug/qemu-4.0/qom/object.c:903 +> +> > > > #1 0x0000558a2c009e9b in virtio_notify_vector (vdev=0x558a2e7751d0, +> +> > > > vector=<optimized out>) at +> +> > > > /usr/src/debug/qemu-4.0/hw/virtio/virtio.c:1118 +> +> > > > #2 0x0000558a2bfdcb1e in virtio_blk_discard_write_zeroes_complete ( +> +> > > > opaque=0x558a2f2fd420, ret=0) +> +> > > > at /usr/src/debug/qemu-4.0/hw/block/virtio-blk.c:186 +> +> > > > #3 0x0000558a2c261c7e in blk_aio_complete (acb=0x558a2eed7420) +> +> > > > at /usr/src/debug/qemu-4.0/block/block-backend.c:1305 +> +> > > > #4 0x0000558a2c3031db in coroutine_trampoline (i0=<optimized out>, +> +> > > > i1=<optimized out>) at +> +> > > > /usr/src/debug/qemu-4.0/util/coroutine-ucontext.c:116 +> +> > > > #5 0x00007f45b2f8b080 in ?? () from /lib64/libc.so.6 +> +> > > > #6 0x00007fff9ed75780 in ?? () +> +> > > > #7 0x0000000000000000 in ?? () +> +> > > > +> +> > > > It seems like qemu was completing a discard/write_zero request, but +> +> > > > parent BusState was already freed & set to NULL. +> +> > > > +> +> > > > Do we need to drain all pending request before unrealizing virtio-blk +> +> > > > device? Like the following patch proposed? +> +> > > > +> +> > > > +https://lists.gnu.org/archive/html/qemu-devel/2017-06/msg02945.html +> +> > > > +> +> > > > If more info is needed, please let me know. +> +> > > +> +> > > may be this will help: +https://patchwork.kernel.org/patch/11213047/ +> +> > +> +> > Yeah, this looks promising! I'll try it out (though it's a one-time +> +> > crash for me). Thanks! +> +> +> +> After applying this patch, I don't see the original segfaut and +> +> backtrace, but I see this crash +> +> +> +> [Thread debugging using libthread_db enabled] +> +> Using host libthread_db library "/lib64/libthread_db.so.1". +> +> Core was generated by `/usr/local/libexec/qemu-kvm -name +> +> sandbox-a2f34a11a7e1449496503bbc4050ae040c0d3'. +> +> Program terminated with signal 11, Segmentation fault. +> +> #0 0x0000561216a57609 in virtio_pci_notify_write (opaque=0x5612184747e0, +> +> addr=0, val=<optimized out>, size=<optimized out>) at +> +> /usr/src/debug/qemu-4.0/hw/virtio/virtio-pci.c:1324 +> +> 1324 VirtIOPCIProxy *proxy = +> +> VIRTIO_PCI(DEVICE(vdev)->parent_bus->parent); +> +> Missing separate debuginfos, use: debuginfo-install +> +> glib2-2.42.2-5.1.alios7.x86_64 glibc-2.17-260.alios7.x86_64 +> +> libgcc-4.8.5-28.alios7.1.x86_64 libseccomp-2.3.1-3.alios7.x86_64 +> +> libstdc++-4.8.5-28.alios7.1.x86_64 numactl-libs-2.0.9-5.1.alios7.x86_64 +> +> pixman-0.32.6-3.1.alios7.x86_64 zlib-1.2.7-16.2.alios7.x86_64 +> +> (gdb) bt +> +> #0 0x0000561216a57609 in virtio_pci_notify_write (opaque=0x5612184747e0, +> +> addr=0, val=<optimized out>, size=<optimized out>) at +> +> /usr/src/debug/qemu-4.0/hw/virtio/virtio-pci.c:1324 +> +> #1 0x0000561216835b22 in memory_region_write_accessor (mr=<optimized out>, +> +> addr=<optimized out>, value=<optimized out>, size=<optimized out>, +> +> shift=<optimized out>, mask=<optimized out>, attrs=...) at +> +> /usr/src/debug/qemu-4.0/memory.c:502 +> +> #2 0x0000561216833c5d in access_with_adjusted_size (addr=addr@entry=0, +> +> value=value@entry=0x7fcdeab1b8a8, size=size@entry=2, +> +> access_size_min=<optimized out>, access_size_max=<optimized out>, +> +> access_fn=0x561216835ac0 <memory_region_write_accessor>, mr=0x56121846d340, +> +> attrs=...) +> +> at /usr/src/debug/qemu-4.0/memory.c:568 +> +> #3 0x0000561216837c66 in memory_region_dispatch_write +> +> (mr=mr@entry=0x56121846d340, addr=0, data=<optimized out>, size=2, +> +> attrs=attrs@entry=...) at /usr/src/debug/qemu-4.0/memory.c:1503 +> +> #4 0x00005612167e036f in flatview_write_continue +> +> (fv=fv@entry=0x56121852edd0, addr=addr@entry=841813602304, attrs=..., +> +> buf=buf@entry=0x7fce7dd97028 <Address 0x7fce7dd97028 out of bounds>, +> +> len=len@entry=2, addr1=<optimized out>, l=<optimized out>, +> +> mr=0x56121846d340) +> +> at /usr/src/debug/qemu-4.0/exec.c:3279 +> +> #5 0x00005612167e0506 in flatview_write (fv=0x56121852edd0, +> +> addr=841813602304, attrs=..., buf=0x7fce7dd97028 <Address 0x7fce7dd97028 +> +> out of bounds>, len=2) at /usr/src/debug/qemu-4.0/exec.c:3318 +> +> #6 0x00005612167e4a1b in address_space_write (as=<optimized out>, +> +> addr=<optimized out>, attrs=..., buf=<optimized out>, len=<optimized out>) +> +> at /usr/src/debug/qemu-4.0/exec.c:3408 +> +> #7 0x00005612167e4aa5 in address_space_rw (as=<optimized out>, +> +> addr=<optimized out>, attrs=..., attrs@entry=..., +> +> buf=buf@entry=0x7fce7dd97028 <Address 0x7fce7dd97028 out of bounds>, +> +> len=<optimized out>, is_write=<optimized out>) at +> +> /usr/src/debug/qemu-4.0/exec.c:3419 +> +> #8 0x0000561216849da1 in kvm_cpu_exec (cpu=cpu@entry=0x56121849aa00) at +> +> /usr/src/debug/qemu-4.0/accel/kvm/kvm-all.c:2034 +> +> #9 0x000056121682255e in qemu_kvm_cpu_thread_fn +> +> (arg=arg@entry=0x56121849aa00) at /usr/src/debug/qemu-4.0/cpus.c:1281 +> +> #10 0x0000561216b794d6 in qemu_thread_start (args=<optimized out>) at +> +> /usr/src/debug/qemu-4.0/util/qemu-thread-posix.c:502 +> +> #11 0x00007fce7bef6e25 in start_thread () from /lib64/libpthread.so.0 +> +> #12 0x00007fce7bc1ef1d in clone () from /lib64/libc.so.6 +> +> +> +> And I searched and found +> +> +https://bugzilla.redhat.com/show_bug.cgi?id=1706759 +, which has the same +> +> backtrace as above, and it seems commit 7bfde688fb1b ("virtio-blk: Add +> +> blk_drain() to virtio_blk_device_unrealize()") is to fix this particular +> +> bug. +> +> +> +> But I can still hit the bug even after applying the commit. Do I miss +> +> anything? +> +> +Hi Eryu, +> +This backtrace seems to be caused by this bug (there were two bugs in +> +1706759): +https://bugzilla.redhat.com/show_bug.cgi?id=1708480 +> +Although the solution hasn't been tested on virtio-blk yet, you may +> +want to apply this patch: +> +https://lists.nongnu.org/archive/html/qemu-devel/2019-12/msg05197.html +> +Let me know if this works. +Will try it out, thanks a lot! + +Eryu + +On Tue, Jan 07, 2020 at 03:01:01PM +0100, Julia Suvorova wrote: +> +On Tue, Jan 7, 2020 at 2:06 PM Eryu Guan <address@hidden> wrote: +> +> +> +> On Thu, Jan 02, 2020 at 10:08:50AM +0800, Eryu Guan wrote: +> +> > On Tue, Dec 31, 2019 at 11:51:35AM +0100, Igor Mammedov wrote: +> +> > > On Tue, 31 Dec 2019 18:34:34 +0800 +> +> > > Eryu Guan <address@hidden> wrote: +> +> > > +> +> > > > Hi, +> +> > > > +> +> > > > I'm using qemu 4.0 and hit segfault when tearing down kata sandbox, I +> +> > > > think it's because io completion hits use-after-free when device is +> +> > > > already gone. Is this a known bug that has been fixed? (I went through +> +> > > > the git log but didn't find anything obvious). +> +> > > > +> +> > > > gdb backtrace is: +> +> > > > +> +> > > > Core was generated by `/usr/local/libexec/qemu-kvm -name +> +> > > > sandbox-5b8df8c6c6901c3c0a9b02879be10fe8d69d6'. +> +> > > > Program terminated with signal 11, Segmentation fault. +> +> > > > #0 object_get_class (obj=obj@entry=0x0) at +> +> > > > /usr/src/debug/qemu-4.0/qom/object.c:903 +> +> > > > 903 return obj->class; +> +> > > > (gdb) bt +> +> > > > #0 object_get_class (obj=obj@entry=0x0) at +> +> > > > /usr/src/debug/qemu-4.0/qom/object.c:903 +> +> > > > #1 0x0000558a2c009e9b in virtio_notify_vector (vdev=0x558a2e7751d0, +> +> > > > vector=<optimized out>) at +> +> > > > /usr/src/debug/qemu-4.0/hw/virtio/virtio.c:1118 +> +> > > > #2 0x0000558a2bfdcb1e in virtio_blk_discard_write_zeroes_complete ( +> +> > > > opaque=0x558a2f2fd420, ret=0) +> +> > > > at /usr/src/debug/qemu-4.0/hw/block/virtio-blk.c:186 +> +> > > > #3 0x0000558a2c261c7e in blk_aio_complete (acb=0x558a2eed7420) +> +> > > > at /usr/src/debug/qemu-4.0/block/block-backend.c:1305 +> +> > > > #4 0x0000558a2c3031db in coroutine_trampoline (i0=<optimized out>, +> +> > > > i1=<optimized out>) at +> +> > > > /usr/src/debug/qemu-4.0/util/coroutine-ucontext.c:116 +> +> > > > #5 0x00007f45b2f8b080 in ?? () from /lib64/libc.so.6 +> +> > > > #6 0x00007fff9ed75780 in ?? () +> +> > > > #7 0x0000000000000000 in ?? () +> +> > > > +> +> > > > It seems like qemu was completing a discard/write_zero request, but +> +> > > > parent BusState was already freed & set to NULL. +> +> > > > +> +> > > > Do we need to drain all pending request before unrealizing virtio-blk +> +> > > > device? Like the following patch proposed? +> +> > > > +> +> > > > +https://lists.gnu.org/archive/html/qemu-devel/2017-06/msg02945.html +> +> > > > +> +> > > > If more info is needed, please let me know. +> +> > > +> +> > > may be this will help: +https://patchwork.kernel.org/patch/11213047/ +> +> > +> +> > Yeah, this looks promising! I'll try it out (though it's a one-time +> +> > crash for me). Thanks! +> +> +> +> After applying this patch, I don't see the original segfaut and +> +> backtrace, but I see this crash +> +> +> +> [Thread debugging using libthread_db enabled] +> +> Using host libthread_db library "/lib64/libthread_db.so.1". +> +> Core was generated by `/usr/local/libexec/qemu-kvm -name +> +> sandbox-a2f34a11a7e1449496503bbc4050ae040c0d3'. +> +> Program terminated with signal 11, Segmentation fault. +> +> #0 0x0000561216a57609 in virtio_pci_notify_write (opaque=0x5612184747e0, +> +> addr=0, val=<optimized out>, size=<optimized out>) at +> +> /usr/src/debug/qemu-4.0/hw/virtio/virtio-pci.c:1324 +> +> 1324 VirtIOPCIProxy *proxy = +> +> VIRTIO_PCI(DEVICE(vdev)->parent_bus->parent); +> +> Missing separate debuginfos, use: debuginfo-install +> +> glib2-2.42.2-5.1.alios7.x86_64 glibc-2.17-260.alios7.x86_64 +> +> libgcc-4.8.5-28.alios7.1.x86_64 libseccomp-2.3.1-3.alios7.x86_64 +> +> libstdc++-4.8.5-28.alios7.1.x86_64 numactl-libs-2.0.9-5.1.alios7.x86_64 +> +> pixman-0.32.6-3.1.alios7.x86_64 zlib-1.2.7-16.2.alios7.x86_64 +> +> (gdb) bt +> +> #0 0x0000561216a57609 in virtio_pci_notify_write (opaque=0x5612184747e0, +> +> addr=0, val=<optimized out>, size=<optimized out>) at +> +> /usr/src/debug/qemu-4.0/hw/virtio/virtio-pci.c:1324 +> +> #1 0x0000561216835b22 in memory_region_write_accessor (mr=<optimized out>, +> +> addr=<optimized out>, value=<optimized out>, size=<optimized out>, +> +> shift=<optimized out>, mask=<optimized out>, attrs=...) at +> +> /usr/src/debug/qemu-4.0/memory.c:502 +> +> #2 0x0000561216833c5d in access_with_adjusted_size (addr=addr@entry=0, +> +> value=value@entry=0x7fcdeab1b8a8, size=size@entry=2, +> +> access_size_min=<optimized out>, access_size_max=<optimized out>, +> +> access_fn=0x561216835ac0 <memory_region_write_accessor>, mr=0x56121846d340, +> +> attrs=...) +> +> at /usr/src/debug/qemu-4.0/memory.c:568 +> +> #3 0x0000561216837c66 in memory_region_dispatch_write +> +> (mr=mr@entry=0x56121846d340, addr=0, data=<optimized out>, size=2, +> +> attrs=attrs@entry=...) at /usr/src/debug/qemu-4.0/memory.c:1503 +> +> #4 0x00005612167e036f in flatview_write_continue +> +> (fv=fv@entry=0x56121852edd0, addr=addr@entry=841813602304, attrs=..., +> +> buf=buf@entry=0x7fce7dd97028 <Address 0x7fce7dd97028 out of bounds>, +> +> len=len@entry=2, addr1=<optimized out>, l=<optimized out>, +> +> mr=0x56121846d340) +> +> at /usr/src/debug/qemu-4.0/exec.c:3279 +> +> #5 0x00005612167e0506 in flatview_write (fv=0x56121852edd0, +> +> addr=841813602304, attrs=..., buf=0x7fce7dd97028 <Address 0x7fce7dd97028 +> +> out of bounds>, len=2) at /usr/src/debug/qemu-4.0/exec.c:3318 +> +> #6 0x00005612167e4a1b in address_space_write (as=<optimized out>, +> +> addr=<optimized out>, attrs=..., buf=<optimized out>, len=<optimized out>) +> +> at /usr/src/debug/qemu-4.0/exec.c:3408 +> +> #7 0x00005612167e4aa5 in address_space_rw (as=<optimized out>, +> +> addr=<optimized out>, attrs=..., attrs@entry=..., +> +> buf=buf@entry=0x7fce7dd97028 <Address 0x7fce7dd97028 out of bounds>, +> +> len=<optimized out>, is_write=<optimized out>) at +> +> /usr/src/debug/qemu-4.0/exec.c:3419 +> +> #8 0x0000561216849da1 in kvm_cpu_exec (cpu=cpu@entry=0x56121849aa00) at +> +> /usr/src/debug/qemu-4.0/accel/kvm/kvm-all.c:2034 +> +> #9 0x000056121682255e in qemu_kvm_cpu_thread_fn +> +> (arg=arg@entry=0x56121849aa00) at /usr/src/debug/qemu-4.0/cpus.c:1281 +> +> #10 0x0000561216b794d6 in qemu_thread_start (args=<optimized out>) at +> +> /usr/src/debug/qemu-4.0/util/qemu-thread-posix.c:502 +> +> #11 0x00007fce7bef6e25 in start_thread () from /lib64/libpthread.so.0 +> +> #12 0x00007fce7bc1ef1d in clone () from /lib64/libc.so.6 +> +> +> +> And I searched and found +> +> +https://bugzilla.redhat.com/show_bug.cgi?id=1706759 +, which has the same +> +> backtrace as above, and it seems commit 7bfde688fb1b ("virtio-blk: Add +> +> blk_drain() to virtio_blk_device_unrealize()") is to fix this particular +> +> bug. +> +> +> +> But I can still hit the bug even after applying the commit. Do I miss +> +> anything? +> +> +Hi Eryu, +> +This backtrace seems to be caused by this bug (there were two bugs in +> +1706759): +https://bugzilla.redhat.com/show_bug.cgi?id=1708480 +> +Although the solution hasn't been tested on virtio-blk yet, you may +> +want to apply this patch: +> +https://lists.nongnu.org/archive/html/qemu-devel/2019-12/msg05197.html +> +Let me know if this works. +Unfortunately, I still see the same segfault & backtrace after applying +commit 421afd2fe8dd ("virtio: reset region cache when on queue +deletion") + +Anything I can help to debug? + +Thanks, +Eryu + +On Thu, Jan 09, 2020 at 12:58:06PM +0800, Eryu Guan wrote: +> +On Tue, Jan 07, 2020 at 03:01:01PM +0100, Julia Suvorova wrote: +> +> On Tue, Jan 7, 2020 at 2:06 PM Eryu Guan <address@hidden> wrote: +> +> > +> +> > On Thu, Jan 02, 2020 at 10:08:50AM +0800, Eryu Guan wrote: +> +> > > On Tue, Dec 31, 2019 at 11:51:35AM +0100, Igor Mammedov wrote: +> +> > > > On Tue, 31 Dec 2019 18:34:34 +0800 +> +> > > > Eryu Guan <address@hidden> wrote: +> +> > > > +> +> > > > > Hi, +> +> > > > > +> +> > > > > I'm using qemu 4.0 and hit segfault when tearing down kata sandbox, +> +> > > > > I +> +> > > > > think it's because io completion hits use-after-free when device is +> +> > > > > already gone. Is this a known bug that has been fixed? (I went +> +> > > > > through +> +> > > > > the git log but didn't find anything obvious). +> +> > > > > +> +> > > > > gdb backtrace is: +> +> > > > > +> +> > > > > Core was generated by `/usr/local/libexec/qemu-kvm -name +> +> > > > > sandbox-5b8df8c6c6901c3c0a9b02879be10fe8d69d6'. +> +> > > > > Program terminated with signal 11, Segmentation fault. +> +> > > > > #0 object_get_class (obj=obj@entry=0x0) at +> +> > > > > /usr/src/debug/qemu-4.0/qom/object.c:903 +> +> > > > > 903 return obj->class; +> +> > > > > (gdb) bt +> +> > > > > #0 object_get_class (obj=obj@entry=0x0) at +> +> > > > > /usr/src/debug/qemu-4.0/qom/object.c:903 +> +> > > > > #1 0x0000558a2c009e9b in virtio_notify_vector (vdev=0x558a2e7751d0, +> +> > > > > vector=<optimized out>) at +> +> > > > > /usr/src/debug/qemu-4.0/hw/virtio/virtio.c:1118 +> +> > > > > #2 0x0000558a2bfdcb1e in virtio_blk_discard_write_zeroes_complete ( +> +> > > > > opaque=0x558a2f2fd420, ret=0) +> +> > > > > at /usr/src/debug/qemu-4.0/hw/block/virtio-blk.c:186 +> +> > > > > #3 0x0000558a2c261c7e in blk_aio_complete (acb=0x558a2eed7420) +> +> > > > > at /usr/src/debug/qemu-4.0/block/block-backend.c:1305 +> +> > > > > #4 0x0000558a2c3031db in coroutine_trampoline (i0=<optimized out>, +> +> > > > > i1=<optimized out>) at +> +> > > > > /usr/src/debug/qemu-4.0/util/coroutine-ucontext.c:116 +> +> > > > > #5 0x00007f45b2f8b080 in ?? () from /lib64/libc.so.6 +> +> > > > > #6 0x00007fff9ed75780 in ?? () +> +> > > > > #7 0x0000000000000000 in ?? () +> +> > > > > +> +> > > > > It seems like qemu was completing a discard/write_zero request, but +> +> > > > > parent BusState was already freed & set to NULL. +> +> > > > > +> +> > > > > Do we need to drain all pending request before unrealizing +> +> > > > > virtio-blk +> +> > > > > device? Like the following patch proposed? +> +> > > > > +> +> > > > > +https://lists.gnu.org/archive/html/qemu-devel/2017-06/msg02945.html +> +> > > > > +> +> > > > > If more info is needed, please let me know. +> +> > > > +> +> > > > may be this will help: +https://patchwork.kernel.org/patch/11213047/ +> +> > > +> +> > > Yeah, this looks promising! I'll try it out (though it's a one-time +> +> > > crash for me). Thanks! +> +> > +> +> > After applying this patch, I don't see the original segfaut and +> +> > backtrace, but I see this crash +> +> > +> +> > [Thread debugging using libthread_db enabled] +> +> > Using host libthread_db library "/lib64/libthread_db.so.1". +> +> > Core was generated by `/usr/local/libexec/qemu-kvm -name +> +> > sandbox-a2f34a11a7e1449496503bbc4050ae040c0d3'. +> +> > Program terminated with signal 11, Segmentation fault. +> +> > #0 0x0000561216a57609 in virtio_pci_notify_write (opaque=0x5612184747e0, +> +> > addr=0, val=<optimized out>, size=<optimized out>) at +> +> > /usr/src/debug/qemu-4.0/hw/virtio/virtio-pci.c:1324 +> +> > 1324 VirtIOPCIProxy *proxy = +> +> > VIRTIO_PCI(DEVICE(vdev)->parent_bus->parent); +> +> > Missing separate debuginfos, use: debuginfo-install +> +> > glib2-2.42.2-5.1.alios7.x86_64 glibc-2.17-260.alios7.x86_64 +> +> > libgcc-4.8.5-28.alios7.1.x86_64 libseccomp-2.3.1-3.alios7.x86_64 +> +> > libstdc++-4.8.5-28.alios7.1.x86_64 numactl-libs-2.0.9-5.1.alios7.x86_64 +> +> > pixman-0.32.6-3.1.alios7.x86_64 zlib-1.2.7-16.2.alios7.x86_64 +> +> > (gdb) bt +> +> > #0 0x0000561216a57609 in virtio_pci_notify_write (opaque=0x5612184747e0, +> +> > addr=0, val=<optimized out>, size=<optimized out>) at +> +> > /usr/src/debug/qemu-4.0/hw/virtio/virtio-pci.c:1324 +> +> > #1 0x0000561216835b22 in memory_region_write_accessor (mr=<optimized +> +> > out>, addr=<optimized out>, value=<optimized out>, size=<optimized out>, +> +> > shift=<optimized out>, mask=<optimized out>, attrs=...) at +> +> > /usr/src/debug/qemu-4.0/memory.c:502 +> +> > #2 0x0000561216833c5d in access_with_adjusted_size (addr=addr@entry=0, +> +> > value=value@entry=0x7fcdeab1b8a8, size=size@entry=2, +> +> > access_size_min=<optimized out>, access_size_max=<optimized out>, +> +> > access_fn=0x561216835ac0 <memory_region_write_accessor>, +> +> > mr=0x56121846d340, attrs=...) +> +> > at /usr/src/debug/qemu-4.0/memory.c:568 +> +> > #3 0x0000561216837c66 in memory_region_dispatch_write +> +> > (mr=mr@entry=0x56121846d340, addr=0, data=<optimized out>, size=2, +> +> > attrs=attrs@entry=...) at /usr/src/debug/qemu-4.0/memory.c:1503 +> +> > #4 0x00005612167e036f in flatview_write_continue +> +> > (fv=fv@entry=0x56121852edd0, addr=addr@entry=841813602304, attrs=..., +> +> > buf=buf@entry=0x7fce7dd97028 <Address 0x7fce7dd97028 out of bounds>, +> +> > len=len@entry=2, addr1=<optimized out>, l=<optimized out>, +> +> > mr=0x56121846d340) +> +> > at /usr/src/debug/qemu-4.0/exec.c:3279 +> +> > #5 0x00005612167e0506 in flatview_write (fv=0x56121852edd0, +> +> > addr=841813602304, attrs=..., buf=0x7fce7dd97028 <Address 0x7fce7dd97028 +> +> > out of bounds>, len=2) at /usr/src/debug/qemu-4.0/exec.c:3318 +> +> > #6 0x00005612167e4a1b in address_space_write (as=<optimized out>, +> +> > addr=<optimized out>, attrs=..., buf=<optimized out>, len=<optimized +> +> > out>) at /usr/src/debug/qemu-4.0/exec.c:3408 +> +> > #7 0x00005612167e4aa5 in address_space_rw (as=<optimized out>, +> +> > addr=<optimized out>, attrs=..., attrs@entry=..., +> +> > buf=buf@entry=0x7fce7dd97028 <Address 0x7fce7dd97028 out of bounds>, +> +> > len=<optimized out>, is_write=<optimized out>) at +> +> > /usr/src/debug/qemu-4.0/exec.c:3419 +> +> > #8 0x0000561216849da1 in kvm_cpu_exec (cpu=cpu@entry=0x56121849aa00) at +> +> > /usr/src/debug/qemu-4.0/accel/kvm/kvm-all.c:2034 +> +> > #9 0x000056121682255e in qemu_kvm_cpu_thread_fn +> +> > (arg=arg@entry=0x56121849aa00) at /usr/src/debug/qemu-4.0/cpus.c:1281 +> +> > #10 0x0000561216b794d6 in qemu_thread_start (args=<optimized out>) at +> +> > /usr/src/debug/qemu-4.0/util/qemu-thread-posix.c:502 +> +> > #11 0x00007fce7bef6e25 in start_thread () from /lib64/libpthread.so.0 +> +> > #12 0x00007fce7bc1ef1d in clone () from /lib64/libc.so.6 +> +> > +> +> > And I searched and found +> +> > +https://bugzilla.redhat.com/show_bug.cgi?id=1706759 +, which has the same +> +> > backtrace as above, and it seems commit 7bfde688fb1b ("virtio-blk: Add +> +> > blk_drain() to virtio_blk_device_unrealize()") is to fix this particular +> +> > bug. +> +> > +> +> > But I can still hit the bug even after applying the commit. Do I miss +> +> > anything? +> +> +> +> Hi Eryu, +> +> This backtrace seems to be caused by this bug (there were two bugs in +> +> 1706759): +https://bugzilla.redhat.com/show_bug.cgi?id=1708480 +> +> Although the solution hasn't been tested on virtio-blk yet, you may +> +> want to apply this patch: +> +> +https://lists.nongnu.org/archive/html/qemu-devel/2019-12/msg05197.html +> +> Let me know if this works. +> +> +Unfortunately, I still see the same segfault & backtrace after applying +> +commit 421afd2fe8dd ("virtio: reset region cache when on queue +> +deletion") +> +> +Anything I can help to debug? +Please post the QEMU command-line and the QMP commands use to remove the +device. + +The backtrace shows a vcpu thread submitting a request. The device +seems to be partially destroyed. That's surprising because the monitor +and the vcpu thread should use the QEMU global mutex to avoid race +conditions. Maybe seeing the QMP commands will make it clearer... + +Stefan +signature.asc +Description: +PGP signature + +On Mon, Jan 13, 2020 at 04:38:55PM +0000, Stefan Hajnoczi wrote: +> +On Thu, Jan 09, 2020 at 12:58:06PM +0800, Eryu Guan wrote: +> +> On Tue, Jan 07, 2020 at 03:01:01PM +0100, Julia Suvorova wrote: +> +> > On Tue, Jan 7, 2020 at 2:06 PM Eryu Guan <address@hidden> wrote: +> +> > > +> +> > > On Thu, Jan 02, 2020 at 10:08:50AM +0800, Eryu Guan wrote: +> +> > > > On Tue, Dec 31, 2019 at 11:51:35AM +0100, Igor Mammedov wrote: +> +> > > > > On Tue, 31 Dec 2019 18:34:34 +0800 +> +> > > > > Eryu Guan <address@hidden> wrote: +> +> > > > > +> +> > > > > > Hi, +> +> > > > > > +> +> > > > > > I'm using qemu 4.0 and hit segfault when tearing down kata +> +> > > > > > sandbox, I +> +> > > > > > think it's because io completion hits use-after-free when device +> +> > > > > > is +> +> > > > > > already gone. Is this a known bug that has been fixed? (I went +> +> > > > > > through +> +> > > > > > the git log but didn't find anything obvious). +> +> > > > > > +> +> > > > > > gdb backtrace is: +> +> > > > > > +> +> > > > > > Core was generated by `/usr/local/libexec/qemu-kvm -name +> +> > > > > > sandbox-5b8df8c6c6901c3c0a9b02879be10fe8d69d6'. +> +> > > > > > Program terminated with signal 11, Segmentation fault. +> +> > > > > > #0 object_get_class (obj=obj@entry=0x0) at +> +> > > > > > /usr/src/debug/qemu-4.0/qom/object.c:903 +> +> > > > > > 903 return obj->class; +> +> > > > > > (gdb) bt +> +> > > > > > #0 object_get_class (obj=obj@entry=0x0) at +> +> > > > > > /usr/src/debug/qemu-4.0/qom/object.c:903 +> +> > > > > > #1 0x0000558a2c009e9b in virtio_notify_vector +> +> > > > > > (vdev=0x558a2e7751d0, +> +> > > > > > vector=<optimized out>) at +> +> > > > > > /usr/src/debug/qemu-4.0/hw/virtio/virtio.c:1118 +> +> > > > > > #2 0x0000558a2bfdcb1e in +> +> > > > > > virtio_blk_discard_write_zeroes_complete ( +> +> > > > > > opaque=0x558a2f2fd420, ret=0) +> +> > > > > > at /usr/src/debug/qemu-4.0/hw/block/virtio-blk.c:186 +> +> > > > > > #3 0x0000558a2c261c7e in blk_aio_complete (acb=0x558a2eed7420) +> +> > > > > > at /usr/src/debug/qemu-4.0/block/block-backend.c:1305 +> +> > > > > > #4 0x0000558a2c3031db in coroutine_trampoline (i0=<optimized +> +> > > > > > out>, +> +> > > > > > i1=<optimized out>) at +> +> > > > > > /usr/src/debug/qemu-4.0/util/coroutine-ucontext.c:116 +> +> > > > > > #5 0x00007f45b2f8b080 in ?? () from /lib64/libc.so.6 +> +> > > > > > #6 0x00007fff9ed75780 in ?? () +> +> > > > > > #7 0x0000000000000000 in ?? () +> +> > > > > > +> +> > > > > > It seems like qemu was completing a discard/write_zero request, +> +> > > > > > but +> +> > > > > > parent BusState was already freed & set to NULL. +> +> > > > > > +> +> > > > > > Do we need to drain all pending request before unrealizing +> +> > > > > > virtio-blk +> +> > > > > > device? Like the following patch proposed? +> +> > > > > > +> +> > > > > > +https://lists.gnu.org/archive/html/qemu-devel/2017-06/msg02945.html +> +> > > > > > +> +> > > > > > If more info is needed, please let me know. +> +> > > > > +> +> > > > > may be this will help: +https://patchwork.kernel.org/patch/11213047/ +> +> > > > +> +> > > > Yeah, this looks promising! I'll try it out (though it's a one-time +> +> > > > crash for me). Thanks! +> +> > > +> +> > > After applying this patch, I don't see the original segfaut and +> +> > > backtrace, but I see this crash +> +> > > +> +> > > [Thread debugging using libthread_db enabled] +> +> > > Using host libthread_db library "/lib64/libthread_db.so.1". +> +> > > Core was generated by `/usr/local/libexec/qemu-kvm -name +> +> > > sandbox-a2f34a11a7e1449496503bbc4050ae040c0d3'. +> +> > > Program terminated with signal 11, Segmentation fault. +> +> > > #0 0x0000561216a57609 in virtio_pci_notify_write +> +> > > (opaque=0x5612184747e0, addr=0, val=<optimized out>, size=<optimized +> +> > > out>) at /usr/src/debug/qemu-4.0/hw/virtio/virtio-pci.c:1324 +> +> > > 1324 VirtIOPCIProxy *proxy = +> +> > > VIRTIO_PCI(DEVICE(vdev)->parent_bus->parent); +> +> > > Missing separate debuginfos, use: debuginfo-install +> +> > > glib2-2.42.2-5.1.alios7.x86_64 glibc-2.17-260.alios7.x86_64 +> +> > > libgcc-4.8.5-28.alios7.1.x86_64 libseccomp-2.3.1-3.alios7.x86_64 +> +> > > libstdc++-4.8.5-28.alios7.1.x86_64 numactl-libs-2.0.9-5.1.alios7.x86_64 +> +> > > pixman-0.32.6-3.1.alios7.x86_64 zlib-1.2.7-16.2.alios7.x86_64 +> +> > > (gdb) bt +> +> > > #0 0x0000561216a57609 in virtio_pci_notify_write +> +> > > (opaque=0x5612184747e0, addr=0, val=<optimized out>, size=<optimized +> +> > > out>) at /usr/src/debug/qemu-4.0/hw/virtio/virtio-pci.c:1324 +> +> > > #1 0x0000561216835b22 in memory_region_write_accessor (mr=<optimized +> +> > > out>, addr=<optimized out>, value=<optimized out>, size=<optimized +> +> > > out>, shift=<optimized out>, mask=<optimized out>, attrs=...) at +> +> > > /usr/src/debug/qemu-4.0/memory.c:502 +> +> > > #2 0x0000561216833c5d in access_with_adjusted_size (addr=addr@entry=0, +> +> > > value=value@entry=0x7fcdeab1b8a8, size=size@entry=2, +> +> > > access_size_min=<optimized out>, access_size_max=<optimized out>, +> +> > > access_fn=0x561216835ac0 <memory_region_write_accessor>, +> +> > > mr=0x56121846d340, attrs=...) +> +> > > at /usr/src/debug/qemu-4.0/memory.c:568 +> +> > > #3 0x0000561216837c66 in memory_region_dispatch_write +> +> > > (mr=mr@entry=0x56121846d340, addr=0, data=<optimized out>, size=2, +> +> > > attrs=attrs@entry=...) at /usr/src/debug/qemu-4.0/memory.c:1503 +> +> > > #4 0x00005612167e036f in flatview_write_continue +> +> > > (fv=fv@entry=0x56121852edd0, addr=addr@entry=841813602304, attrs=..., +> +> > > buf=buf@entry=0x7fce7dd97028 <Address 0x7fce7dd97028 out of bounds>, +> +> > > len=len@entry=2, addr1=<optimized out>, l=<optimized out>, +> +> > > mr=0x56121846d340) +> +> > > at /usr/src/debug/qemu-4.0/exec.c:3279 +> +> > > #5 0x00005612167e0506 in flatview_write (fv=0x56121852edd0, +> +> > > addr=841813602304, attrs=..., buf=0x7fce7dd97028 <Address +> +> > > 0x7fce7dd97028 out of bounds>, len=2) at +> +> > > /usr/src/debug/qemu-4.0/exec.c:3318 +> +> > > #6 0x00005612167e4a1b in address_space_write (as=<optimized out>, +> +> > > addr=<optimized out>, attrs=..., buf=<optimized out>, len=<optimized +> +> > > out>) at /usr/src/debug/qemu-4.0/exec.c:3408 +> +> > > #7 0x00005612167e4aa5 in address_space_rw (as=<optimized out>, +> +> > > addr=<optimized out>, attrs=..., attrs@entry=..., +> +> > > buf=buf@entry=0x7fce7dd97028 <Address 0x7fce7dd97028 out of bounds>, +> +> > > len=<optimized out>, is_write=<optimized out>) at +> +> > > /usr/src/debug/qemu-4.0/exec.c:3419 +> +> > > #8 0x0000561216849da1 in kvm_cpu_exec (cpu=cpu@entry=0x56121849aa00) +> +> > > at /usr/src/debug/qemu-4.0/accel/kvm/kvm-all.c:2034 +> +> > > #9 0x000056121682255e in qemu_kvm_cpu_thread_fn +> +> > > (arg=arg@entry=0x56121849aa00) at /usr/src/debug/qemu-4.0/cpus.c:1281 +> +> > > #10 0x0000561216b794d6 in qemu_thread_start (args=<optimized out>) at +> +> > > /usr/src/debug/qemu-4.0/util/qemu-thread-posix.c:502 +> +> > > #11 0x00007fce7bef6e25 in start_thread () from /lib64/libpthread.so.0 +> +> > > #12 0x00007fce7bc1ef1d in clone () from /lib64/libc.so.6 +> +> > > +> +> > > And I searched and found +> +> > > +https://bugzilla.redhat.com/show_bug.cgi?id=1706759 +, which has the same +> +> > > backtrace as above, and it seems commit 7bfde688fb1b ("virtio-blk: Add +> +> > > blk_drain() to virtio_blk_device_unrealize()") is to fix this particular +> +> > > bug. +> +> > > +> +> > > But I can still hit the bug even after applying the commit. Do I miss +> +> > > anything? +> +> > +> +> > Hi Eryu, +> +> > This backtrace seems to be caused by this bug (there were two bugs in +> +> > 1706759): +https://bugzilla.redhat.com/show_bug.cgi?id=1708480 +> +> > Although the solution hasn't been tested on virtio-blk yet, you may +> +> > want to apply this patch: +> +> > +https://lists.nongnu.org/archive/html/qemu-devel/2019-12/msg05197.html +> +> > Let me know if this works. +> +> +> +> Unfortunately, I still see the same segfault & backtrace after applying +> +> commit 421afd2fe8dd ("virtio: reset region cache when on queue +> +> deletion") +> +> +> +> Anything I can help to debug? +> +> +Please post the QEMU command-line and the QMP commands use to remove the +> +device. +It's a normal kata instance using virtio-fs as rootfs. + +/usr/local/libexec/qemu-kvm -name +sandbox-a670786fcb1758d2348eb120939d90ffacf9f049f10b337284ad49bbcd60936d \ + -uuid e03f6b6b-b80b-40c0-8d5b-0cbfed1305d2 -machine +q35,accel=kvm,kernel_irqchip,nvdimm,nosmm,nosmbus,nosata,nopit \ + -cpu host -qmp +unix:/run/vc/vm/a670786fcb1758d2348eb120939d90ffacf9f049f10b337284ad49bbcd60936d/qmp.sock,server,nowait + \ + -qmp +unix:/run/vc/vm/debug-a670786fcb1758d2348eb120939d90ffacf9f049f10b337284ad49bbcd60936d/qmp.sock,server,nowait + \ + -m 2048M,slots=10,maxmem=773893M -device +pci-bridge,bus=pcie.0,id=pci-bridge-0,chassis_nr=1,shpc=on,addr=2,romfile= \ + -device virtio-serial-pci,disable-modern=false,id=serial0,romfile= -device +virtconsole,chardev=charconsole0,id=console0 \ + -chardev +socket,id=charconsole0,path=/run/vc/vm/a670786fcb1758d2348eb120939d90ffacf9f049f10b337284ad49bbcd60936d/console.sock,server,nowait + \ + -device +virtserialport,chardev=metricagent,id=channel10,name=metric.agent.channel.10 \ + -chardev +socket,id=metricagent,path=/run/vc/vm/a670786fcb1758d2348eb120939d90ffacf9f049f10b337284ad49bbcd60936d/metric.agent.channel.sock,server,nowait + \ + -device nvdimm,id=nv0,memdev=mem0 -object +memory-backend-file,id=mem0,mem-path=/usr/local/share/containers-image-1.9.0.img,size=268435456 + \ + -object rng-random,id=rng0,filename=/dev/urandom -device +virtio-rng,rng=rng0,romfile= \ + -device virtserialport,chardev=charch0,id=channel0,name=agent.channel.0 \ + -chardev +socket,id=charch0,path=/run/vc/vm/a670786fcb1758d2348eb120939d90ffacf9f049f10b337284ad49bbcd60936d/kata.sock,server,nowait + \ + -chardev +socket,id=char-6fca044b801a78a1,path=/run/vc/vm/a670786fcb1758d2348eb120939d90ffacf9f049f10b337284ad49bbcd60936d/vhost-fs.sock + \ + -device +vhost-user-fs-pci,chardev=char-6fca044b801a78a1,tag=kataShared,cache-size=8192M +-netdev tap,id=network-0,vhost=on,vhostfds=3,fds=4 \ + -device +driver=virtio-net-pci,netdev=network-0,mac=76:57:f1:ab:51:5c,disable-modern=false,mq=on,vectors=4,romfile= + \ + -global kvm-pit.lost_tick_policy=discard -vga none -no-user-config -nodefaults +-nographic -daemonize \ + -object memory-backend-file,id=dimm1,size=2048M,mem-path=/dev/shm,share=on +-numa node,memdev=dimm1 -kernel /usr/local/share/kernel \ + -append tsc=reliable no_timer_check rcupdate.rcu_expedited=1 i8042.direct=1 +i8042.dumbkbd=1 i8042.nopnp=1 i8042.noaux=1 noreplace-smp reboot=k console=hvc0 +console=hvc1 iommu=off cryptomgr.notests net.ifnames=0 pci=lastbus=0 +root=/dev/pmem0p1 rootflags=dax,data=ordered,errors=remount-ro ro +rootfstype=ext4 quiet systemd.show_status=false panic=1 nr_cpus=96 +agent.use_vsock=false init=/usr/lib/systemd/systemd +systemd.unit=kata-containers.target systemd.mask=systemd-networkd.service +systemd.mask=systemd-networkd.socket \ + -pidfile +/run/vc/vm/a670786fcb1758d2348eb120939d90ffacf9f049f10b337284ad49bbcd60936d/pid +\ + -smp 1,cores=1,threads=1,sockets=96,maxcpus=96 + +QMP command to delete device (the device id is just an example, not the +one caused the crash): + +"{\"arguments\":{\"id\":\"virtio-drive-5967abfb917c8da6\"},\"execute\":\"device_del\"}" + +which has been hot plugged by: +"{\"arguments\":{\"cache\":{\"direct\":true,\"no-flush\":false},\"driver\":\"raw\",\"file\":{\"driver\":\"file\",\"filename\":\"/dev/dm-18\"},\"node-name\":\"drive-5967abfb917c8da6\"},\"execute\":\"blockdev-add\"}" +"{\"return\": {}}" +"{\"arguments\":{\"addr\":\"01\",\"bus\":\"pci-bridge-0\",\"drive\":\"drive-5967abfb917c8da6\",\"driver\":\"virtio-blk-pci\",\"id\":\"virtio-drive-5967abfb917c8da6\",\"romfile\":\"\",\"share-rw\":\"on\"},\"execute\":\"device_add\"}" +"{\"return\": {}}" + +> +> +The backtrace shows a vcpu thread submitting a request. The device +> +seems to be partially destroyed. That's surprising because the monitor +> +and the vcpu thread should use the QEMU global mutex to avoid race +> +conditions. Maybe seeing the QMP commands will make it clearer... +> +> +Stefan +Thanks! + +Eryu + +On Tue, Jan 14, 2020 at 10:50:58AM +0800, Eryu Guan wrote: +> +On Mon, Jan 13, 2020 at 04:38:55PM +0000, Stefan Hajnoczi wrote: +> +> On Thu, Jan 09, 2020 at 12:58:06PM +0800, Eryu Guan wrote: +> +> > On Tue, Jan 07, 2020 at 03:01:01PM +0100, Julia Suvorova wrote: +> +> > > On Tue, Jan 7, 2020 at 2:06 PM Eryu Guan <address@hidden> wrote: +> +> > > > +> +> > > > On Thu, Jan 02, 2020 at 10:08:50AM +0800, Eryu Guan wrote: +> +> > > > > On Tue, Dec 31, 2019 at 11:51:35AM +0100, Igor Mammedov wrote: +> +> > > > > > On Tue, 31 Dec 2019 18:34:34 +0800 +> +> > > > > > Eryu Guan <address@hidden> wrote: +> +> > > > > > +> +> > > > > > > Hi, +> +> > > > > > > +> +> > > > > > > I'm using qemu 4.0 and hit segfault when tearing down kata +> +> > > > > > > sandbox, I +> +> > > > > > > think it's because io completion hits use-after-free when +> +> > > > > > > device is +> +> > > > > > > already gone. Is this a known bug that has been fixed? (I went +> +> > > > > > > through +> +> > > > > > > the git log but didn't find anything obvious). +> +> > > > > > > +> +> > > > > > > gdb backtrace is: +> +> > > > > > > +> +> > > > > > > Core was generated by `/usr/local/libexec/qemu-kvm -name +> +> > > > > > > sandbox-5b8df8c6c6901c3c0a9b02879be10fe8d69d6'. +> +> > > > > > > Program terminated with signal 11, Segmentation fault. +> +> > > > > > > #0 object_get_class (obj=obj@entry=0x0) at +> +> > > > > > > /usr/src/debug/qemu-4.0/qom/object.c:903 +> +> > > > > > > 903 return obj->class; +> +> > > > > > > (gdb) bt +> +> > > > > > > #0 object_get_class (obj=obj@entry=0x0) at +> +> > > > > > > /usr/src/debug/qemu-4.0/qom/object.c:903 +> +> > > > > > > #1 0x0000558a2c009e9b in virtio_notify_vector +> +> > > > > > > (vdev=0x558a2e7751d0, +> +> > > > > > > vector=<optimized out>) at +> +> > > > > > > /usr/src/debug/qemu-4.0/hw/virtio/virtio.c:1118 +> +> > > > > > > #2 0x0000558a2bfdcb1e in +> +> > > > > > > virtio_blk_discard_write_zeroes_complete ( +> +> > > > > > > opaque=0x558a2f2fd420, ret=0) +> +> > > > > > > at /usr/src/debug/qemu-4.0/hw/block/virtio-blk.c:186 +> +> > > > > > > #3 0x0000558a2c261c7e in blk_aio_complete (acb=0x558a2eed7420) +> +> > > > > > > at /usr/src/debug/qemu-4.0/block/block-backend.c:1305 +> +> > > > > > > #4 0x0000558a2c3031db in coroutine_trampoline (i0=<optimized +> +> > > > > > > out>, +> +> > > > > > > i1=<optimized out>) at +> +> > > > > > > /usr/src/debug/qemu-4.0/util/coroutine-ucontext.c:116 +> +> > > > > > > #5 0x00007f45b2f8b080 in ?? () from /lib64/libc.so.6 +> +> > > > > > > #6 0x00007fff9ed75780 in ?? () +> +> > > > > > > #7 0x0000000000000000 in ?? () +> +> > > > > > > +> +> > > > > > > It seems like qemu was completing a discard/write_zero request, +> +> > > > > > > but +> +> > > > > > > parent BusState was already freed & set to NULL. +> +> > > > > > > +> +> > > > > > > Do we need to drain all pending request before unrealizing +> +> > > > > > > virtio-blk +> +> > > > > > > device? Like the following patch proposed? +> +> > > > > > > +> +> > > > > > > +https://lists.gnu.org/archive/html/qemu-devel/2017-06/msg02945.html +> +> > > > > > > +> +> > > > > > > If more info is needed, please let me know. +> +> > > > > > +> +> > > > > > may be this will help: +> +> > > > > > +https://patchwork.kernel.org/patch/11213047/ +> +> > > > > +> +> > > > > Yeah, this looks promising! I'll try it out (though it's a one-time +> +> > > > > crash for me). Thanks! +> +> > > > +> +> > > > After applying this patch, I don't see the original segfaut and +> +> > > > backtrace, but I see this crash +> +> > > > +> +> > > > [Thread debugging using libthread_db enabled] +> +> > > > Using host libthread_db library "/lib64/libthread_db.so.1". +> +> > > > Core was generated by `/usr/local/libexec/qemu-kvm -name +> +> > > > sandbox-a2f34a11a7e1449496503bbc4050ae040c0d3'. +> +> > > > Program terminated with signal 11, Segmentation fault. +> +> > > > #0 0x0000561216a57609 in virtio_pci_notify_write +> +> > > > (opaque=0x5612184747e0, addr=0, val=<optimized out>, size=<optimized +> +> > > > out>) at /usr/src/debug/qemu-4.0/hw/virtio/virtio-pci.c:1324 +> +> > > > 1324 VirtIOPCIProxy *proxy = +> +> > > > VIRTIO_PCI(DEVICE(vdev)->parent_bus->parent); +> +> > > > Missing separate debuginfos, use: debuginfo-install +> +> > > > glib2-2.42.2-5.1.alios7.x86_64 glibc-2.17-260.alios7.x86_64 +> +> > > > libgcc-4.8.5-28.alios7.1.x86_64 libseccomp-2.3.1-3.alios7.x86_64 +> +> > > > libstdc++-4.8.5-28.alios7.1.x86_64 +> +> > > > numactl-libs-2.0.9-5.1.alios7.x86_64 pixman-0.32.6-3.1.alios7.x86_64 +> +> > > > zlib-1.2.7-16.2.alios7.x86_64 +> +> > > > (gdb) bt +> +> > > > #0 0x0000561216a57609 in virtio_pci_notify_write +> +> > > > (opaque=0x5612184747e0, addr=0, val=<optimized out>, size=<optimized +> +> > > > out>) at /usr/src/debug/qemu-4.0/hw/virtio/virtio-pci.c:1324 +> +> > > > #1 0x0000561216835b22 in memory_region_write_accessor (mr=<optimized +> +> > > > out>, addr=<optimized out>, value=<optimized out>, size=<optimized +> +> > > > out>, shift=<optimized out>, mask=<optimized out>, attrs=...) at +> +> > > > /usr/src/debug/qemu-4.0/memory.c:502 +> +> > > > #2 0x0000561216833c5d in access_with_adjusted_size +> +> > > > (addr=addr@entry=0, value=value@entry=0x7fcdeab1b8a8, +> +> > > > size=size@entry=2, access_size_min=<optimized out>, +> +> > > > access_size_max=<optimized out>, access_fn=0x561216835ac0 +> +> > > > <memory_region_write_accessor>, mr=0x56121846d340, attrs=...) +> +> > > > at /usr/src/debug/qemu-4.0/memory.c:568 +> +> > > > #3 0x0000561216837c66 in memory_region_dispatch_write +> +> > > > (mr=mr@entry=0x56121846d340, addr=0, data=<optimized out>, size=2, +> +> > > > attrs=attrs@entry=...) at /usr/src/debug/qemu-4.0/memory.c:1503 +> +> > > > #4 0x00005612167e036f in flatview_write_continue +> +> > > > (fv=fv@entry=0x56121852edd0, addr=addr@entry=841813602304, attrs=..., +> +> > > > buf=buf@entry=0x7fce7dd97028 <Address 0x7fce7dd97028 out of bounds>, +> +> > > > len=len@entry=2, addr1=<optimized out>, l=<optimized out>, +> +> > > > mr=0x56121846d340) +> +> > > > at /usr/src/debug/qemu-4.0/exec.c:3279 +> +> > > > #5 0x00005612167e0506 in flatview_write (fv=0x56121852edd0, +> +> > > > addr=841813602304, attrs=..., buf=0x7fce7dd97028 <Address +> +> > > > 0x7fce7dd97028 out of bounds>, len=2) at +> +> > > > /usr/src/debug/qemu-4.0/exec.c:3318 +> +> > > > #6 0x00005612167e4a1b in address_space_write (as=<optimized out>, +> +> > > > addr=<optimized out>, attrs=..., buf=<optimized out>, len=<optimized +> +> > > > out>) at /usr/src/debug/qemu-4.0/exec.c:3408 +> +> > > > #7 0x00005612167e4aa5 in address_space_rw (as=<optimized out>, +> +> > > > addr=<optimized out>, attrs=..., attrs@entry=..., +> +> > > > buf=buf@entry=0x7fce7dd97028 <Address 0x7fce7dd97028 out of bounds>, +> +> > > > len=<optimized out>, is_write=<optimized out>) at +> +> > > > /usr/src/debug/qemu-4.0/exec.c:3419 +> +> > > > #8 0x0000561216849da1 in kvm_cpu_exec (cpu=cpu@entry=0x56121849aa00) +> +> > > > at /usr/src/debug/qemu-4.0/accel/kvm/kvm-all.c:2034 +> +> > > > #9 0x000056121682255e in qemu_kvm_cpu_thread_fn +> +> > > > (arg=arg@entry=0x56121849aa00) at /usr/src/debug/qemu-4.0/cpus.c:1281 +> +> > > > #10 0x0000561216b794d6 in qemu_thread_start (args=<optimized out>) at +> +> > > > /usr/src/debug/qemu-4.0/util/qemu-thread-posix.c:502 +> +> > > > #11 0x00007fce7bef6e25 in start_thread () from /lib64/libpthread.so.0 +> +> > > > #12 0x00007fce7bc1ef1d in clone () from /lib64/libc.so.6 +> +> > > > +> +> > > > And I searched and found +> +> > > > +https://bugzilla.redhat.com/show_bug.cgi?id=1706759 +, which has the +> +> > > > same +> +> > > > backtrace as above, and it seems commit 7bfde688fb1b ("virtio-blk: Add +> +> > > > blk_drain() to virtio_blk_device_unrealize()") is to fix this +> +> > > > particular +> +> > > > bug. +> +> > > > +> +> > > > But I can still hit the bug even after applying the commit. Do I miss +> +> > > > anything? +> +> > > +> +> > > Hi Eryu, +> +> > > This backtrace seems to be caused by this bug (there were two bugs in +> +> > > 1706759): +https://bugzilla.redhat.com/show_bug.cgi?id=1708480 +> +> > > Although the solution hasn't been tested on virtio-blk yet, you may +> +> > > want to apply this patch: +> +> > > +> +> > > +https://lists.nongnu.org/archive/html/qemu-devel/2019-12/msg05197.html +> +> > > Let me know if this works. +> +> > +> +> > Unfortunately, I still see the same segfault & backtrace after applying +> +> > commit 421afd2fe8dd ("virtio: reset region cache when on queue +> +> > deletion") +> +> > +> +> > Anything I can help to debug? +> +> +> +> Please post the QEMU command-line and the QMP commands use to remove the +> +> device. +> +> +It's a normal kata instance using virtio-fs as rootfs. +> +> +/usr/local/libexec/qemu-kvm -name +> +sandbox-a670786fcb1758d2348eb120939d90ffacf9f049f10b337284ad49bbcd60936d \ +> +-uuid e03f6b6b-b80b-40c0-8d5b-0cbfed1305d2 -machine +> +q35,accel=kvm,kernel_irqchip,nvdimm,nosmm,nosmbus,nosata,nopit \ +> +-cpu host -qmp +> +unix:/run/vc/vm/a670786fcb1758d2348eb120939d90ffacf9f049f10b337284ad49bbcd60936d/qmp.sock,server,nowait +> +\ +> +-qmp +> +unix:/run/vc/vm/debug-a670786fcb1758d2348eb120939d90ffacf9f049f10b337284ad49bbcd60936d/qmp.sock,server,nowait +> +\ +> +-m 2048M,slots=10,maxmem=773893M -device +> +pci-bridge,bus=pcie.0,id=pci-bridge-0,chassis_nr=1,shpc=on,addr=2,romfile= \ +> +-device virtio-serial-pci,disable-modern=false,id=serial0,romfile= -device +> +virtconsole,chardev=charconsole0,id=console0 \ +> +-chardev +> +socket,id=charconsole0,path=/run/vc/vm/a670786fcb1758d2348eb120939d90ffacf9f049f10b337284ad49bbcd60936d/console.sock,server,nowait +> +\ +> +-device +> +virtserialport,chardev=metricagent,id=channel10,name=metric.agent.channel.10 \ +> +-chardev +> +socket,id=metricagent,path=/run/vc/vm/a670786fcb1758d2348eb120939d90ffacf9f049f10b337284ad49bbcd60936d/metric.agent.channel.sock,server,nowait +> +\ +> +-device nvdimm,id=nv0,memdev=mem0 -object +> +memory-backend-file,id=mem0,mem-path=/usr/local/share/containers-image-1.9.0.img,size=268435456 +> +\ +> +-object rng-random,id=rng0,filename=/dev/urandom -device +> +virtio-rng,rng=rng0,romfile= \ +> +-device virtserialport,chardev=charch0,id=channel0,name=agent.channel.0 \ +> +-chardev +> +socket,id=charch0,path=/run/vc/vm/a670786fcb1758d2348eb120939d90ffacf9f049f10b337284ad49bbcd60936d/kata.sock,server,nowait +> +\ +> +-chardev +> +socket,id=char-6fca044b801a78a1,path=/run/vc/vm/a670786fcb1758d2348eb120939d90ffacf9f049f10b337284ad49bbcd60936d/vhost-fs.sock +> +\ +> +-device +> +vhost-user-fs-pci,chardev=char-6fca044b801a78a1,tag=kataShared,cache-size=8192M +> +-netdev tap,id=network-0,vhost=on,vhostfds=3,fds=4 \ +> +-device +> +driver=virtio-net-pci,netdev=network-0,mac=76:57:f1:ab:51:5c,disable-modern=false,mq=on,vectors=4,romfile= +> +\ +> +-global kvm-pit.lost_tick_policy=discard -vga none -no-user-config +> +-nodefaults -nographic -daemonize \ +> +-object memory-backend-file,id=dimm1,size=2048M,mem-path=/dev/shm,share=on +> +-numa node,memdev=dimm1 -kernel /usr/local/share/kernel \ +> +-append tsc=reliable no_timer_check rcupdate.rcu_expedited=1 i8042.direct=1 +> +i8042.dumbkbd=1 i8042.nopnp=1 i8042.noaux=1 noreplace-smp reboot=k +> +console=hvc0 console=hvc1 iommu=off cryptomgr.notests net.ifnames=0 +> +pci=lastbus=0 root=/dev/pmem0p1 rootflags=dax,data=ordered,errors=remount-ro +> +ro rootfstype=ext4 quiet systemd.show_status=false panic=1 nr_cpus=96 +> +agent.use_vsock=false init=/usr/lib/systemd/systemd +> +systemd.unit=kata-containers.target systemd.mask=systemd-networkd.service +> +systemd.mask=systemd-networkd.socket \ +> +-pidfile +> +/run/vc/vm/a670786fcb1758d2348eb120939d90ffacf9f049f10b337284ad49bbcd60936d/pid +> +\ +> +-smp 1,cores=1,threads=1,sockets=96,maxcpus=96 +> +> +QMP command to delete device (the device id is just an example, not the +> +one caused the crash): +> +> +"{\"arguments\":{\"id\":\"virtio-drive-5967abfb917c8da6\"},\"execute\":\"device_del\"}" +> +> +which has been hot plugged by: +> +"{\"arguments\":{\"cache\":{\"direct\":true,\"no-flush\":false},\"driver\":\"raw\",\"file\":{\"driver\":\"file\",\"filename\":\"/dev/dm-18\"},\"node-name\":\"drive-5967abfb917c8da6\"},\"execute\":\"blockdev-add\"}" +> +"{\"return\": {}}" +> +"{\"arguments\":{\"addr\":\"01\",\"bus\":\"pci-bridge-0\",\"drive\":\"drive-5967abfb917c8da6\",\"driver\":\"virtio-blk-pci\",\"id\":\"virtio-drive-5967abfb917c8da6\",\"romfile\":\"\",\"share-rw\":\"on\"},\"execute\":\"device_add\"}" +> +"{\"return\": {}}" +Thanks. I wasn't able to reproduce this crash with qemu.git/master. + +One thing that is strange about the latest backtrace you posted: QEMU is +dispatching the memory access instead of using the ioeventfd code that +that virtio-blk-pci normally takes when a virtqueue is notified. I +guess this means ioeventfd has already been disabled due to the hot +unplug. + +Could you try with machine type "i440fx" instead of "q35"? I wonder if +pci-bridge/shpc is part of the problem. + +Stefan +signature.asc +Description: +PGP signature + +On Tue, Jan 14, 2020 at 04:16:24PM +0000, Stefan Hajnoczi wrote: +> +On Tue, Jan 14, 2020 at 10:50:58AM +0800, Eryu Guan wrote: +> +> On Mon, Jan 13, 2020 at 04:38:55PM +0000, Stefan Hajnoczi wrote: +> +> > On Thu, Jan 09, 2020 at 12:58:06PM +0800, Eryu Guan wrote: +> +> > > On Tue, Jan 07, 2020 at 03:01:01PM +0100, Julia Suvorova wrote: +> +> > > > On Tue, Jan 7, 2020 at 2:06 PM Eryu Guan <address@hidden> wrote: +> +> > > > > +> +> > > > > On Thu, Jan 02, 2020 at 10:08:50AM +0800, Eryu Guan wrote: +> +> > > > > > On Tue, Dec 31, 2019 at 11:51:35AM +0100, Igor Mammedov wrote: +> +> > > > > > > On Tue, 31 Dec 2019 18:34:34 +0800 +> +> > > > > > > Eryu Guan <address@hidden> wrote: +> +> > > > > > > +> +> > > > > > > > Hi, +> +> > > > > > > > +> +> > > > > > > > I'm using qemu 4.0 and hit segfault when tearing down kata +> +> > > > > > > > sandbox, I +> +> > > > > > > > think it's because io completion hits use-after-free when +> +> > > > > > > > device is +> +> > > > > > > > already gone. Is this a known bug that has been fixed? (I +> +> > > > > > > > went through +> +> > > > > > > > the git log but didn't find anything obvious). +> +> > > > > > > > +> +> > > > > > > > gdb backtrace is: +> +> > > > > > > > +> +> > > > > > > > Core was generated by `/usr/local/libexec/qemu-kvm -name +> +> > > > > > > > sandbox-5b8df8c6c6901c3c0a9b02879be10fe8d69d6'. +> +> > > > > > > > Program terminated with signal 11, Segmentation fault. +> +> > > > > > > > #0 object_get_class (obj=obj@entry=0x0) at +> +> > > > > > > > /usr/src/debug/qemu-4.0/qom/object.c:903 +> +> > > > > > > > 903 return obj->class; +> +> > > > > > > > (gdb) bt +> +> > > > > > > > #0 object_get_class (obj=obj@entry=0x0) at +> +> > > > > > > > /usr/src/debug/qemu-4.0/qom/object.c:903 +> +> > > > > > > > #1 0x0000558a2c009e9b in virtio_notify_vector +> +> > > > > > > > (vdev=0x558a2e7751d0, +> +> > > > > > > > vector=<optimized out>) at +> +> > > > > > > > /usr/src/debug/qemu-4.0/hw/virtio/virtio.c:1118 +> +> > > > > > > > #2 0x0000558a2bfdcb1e in +> +> > > > > > > > virtio_blk_discard_write_zeroes_complete ( +> +> > > > > > > > opaque=0x558a2f2fd420, ret=0) +> +> > > > > > > > at /usr/src/debug/qemu-4.0/hw/block/virtio-blk.c:186 +> +> > > > > > > > #3 0x0000558a2c261c7e in blk_aio_complete +> +> > > > > > > > (acb=0x558a2eed7420) +> +> > > > > > > > at /usr/src/debug/qemu-4.0/block/block-backend.c:1305 +> +> > > > > > > > #4 0x0000558a2c3031db in coroutine_trampoline (i0=<optimized +> +> > > > > > > > out>, +> +> > > > > > > > i1=<optimized out>) at +> +> > > > > > > > /usr/src/debug/qemu-4.0/util/coroutine-ucontext.c:116 +> +> > > > > > > > #5 0x00007f45b2f8b080 in ?? () from /lib64/libc.so.6 +> +> > > > > > > > #6 0x00007fff9ed75780 in ?? () +> +> > > > > > > > #7 0x0000000000000000 in ?? () +> +> > > > > > > > +> +> > > > > > > > It seems like qemu was completing a discard/write_zero +> +> > > > > > > > request, but +> +> > > > > > > > parent BusState was already freed & set to NULL. +> +> > > > > > > > +> +> > > > > > > > Do we need to drain all pending request before unrealizing +> +> > > > > > > > virtio-blk +> +> > > > > > > > device? Like the following patch proposed? +> +> > > > > > > > +> +> > > > > > > > +https://lists.gnu.org/archive/html/qemu-devel/2017-06/msg02945.html +> +> > > > > > > > +> +> > > > > > > > If more info is needed, please let me know. +> +> > > > > > > +> +> > > > > > > may be this will help: +> +> > > > > > > +https://patchwork.kernel.org/patch/11213047/ +> +> > > > > > +> +> > > > > > Yeah, this looks promising! I'll try it out (though it's a +> +> > > > > > one-time +> +> > > > > > crash for me). Thanks! +> +> > > > > +> +> > > > > After applying this patch, I don't see the original segfaut and +> +> > > > > backtrace, but I see this crash +> +> > > > > +> +> > > > > [Thread debugging using libthread_db enabled] +> +> > > > > Using host libthread_db library "/lib64/libthread_db.so.1". +> +> > > > > Core was generated by `/usr/local/libexec/qemu-kvm -name +> +> > > > > sandbox-a2f34a11a7e1449496503bbc4050ae040c0d3'. +> +> > > > > Program terminated with signal 11, Segmentation fault. +> +> > > > > #0 0x0000561216a57609 in virtio_pci_notify_write +> +> > > > > (opaque=0x5612184747e0, addr=0, val=<optimized out>, +> +> > > > > size=<optimized out>) at +> +> > > > > /usr/src/debug/qemu-4.0/hw/virtio/virtio-pci.c:1324 +> +> > > > > 1324 VirtIOPCIProxy *proxy = +> +> > > > > VIRTIO_PCI(DEVICE(vdev)->parent_bus->parent); +> +> > > > > Missing separate debuginfos, use: debuginfo-install +> +> > > > > glib2-2.42.2-5.1.alios7.x86_64 glibc-2.17-260.alios7.x86_64 +> +> > > > > libgcc-4.8.5-28.alios7.1.x86_64 libseccomp-2.3.1-3.alios7.x86_64 +> +> > > > > libstdc++-4.8.5-28.alios7.1.x86_64 +> +> > > > > numactl-libs-2.0.9-5.1.alios7.x86_64 +> +> > > > > pixman-0.32.6-3.1.alios7.x86_64 zlib-1.2.7-16.2.alios7.x86_64 +> +> > > > > (gdb) bt +> +> > > > > #0 0x0000561216a57609 in virtio_pci_notify_write +> +> > > > > (opaque=0x5612184747e0, addr=0, val=<optimized out>, +> +> > > > > size=<optimized out>) at +> +> > > > > /usr/src/debug/qemu-4.0/hw/virtio/virtio-pci.c:1324 +> +> > > > > #1 0x0000561216835b22 in memory_region_write_accessor +> +> > > > > (mr=<optimized out>, addr=<optimized out>, value=<optimized out>, +> +> > > > > size=<optimized out>, shift=<optimized out>, mask=<optimized out>, +> +> > > > > attrs=...) at /usr/src/debug/qemu-4.0/memory.c:502 +> +> > > > > #2 0x0000561216833c5d in access_with_adjusted_size +> +> > > > > (addr=addr@entry=0, value=value@entry=0x7fcdeab1b8a8, +> +> > > > > size=size@entry=2, access_size_min=<optimized out>, +> +> > > > > access_size_max=<optimized out>, access_fn=0x561216835ac0 +> +> > > > > <memory_region_write_accessor>, mr=0x56121846d340, attrs=...) +> +> > > > > at /usr/src/debug/qemu-4.0/memory.c:568 +> +> > > > > #3 0x0000561216837c66 in memory_region_dispatch_write +> +> > > > > (mr=mr@entry=0x56121846d340, addr=0, data=<optimized out>, size=2, +> +> > > > > attrs=attrs@entry=...) at /usr/src/debug/qemu-4.0/memory.c:1503 +> +> > > > > #4 0x00005612167e036f in flatview_write_continue +> +> > > > > (fv=fv@entry=0x56121852edd0, addr=addr@entry=841813602304, +> +> > > > > attrs=..., buf=buf@entry=0x7fce7dd97028 <Address 0x7fce7dd97028 out +> +> > > > > of bounds>, len=len@entry=2, addr1=<optimized out>, l=<optimized +> +> > > > > out>, mr=0x56121846d340) +> +> > > > > at /usr/src/debug/qemu-4.0/exec.c:3279 +> +> > > > > #5 0x00005612167e0506 in flatview_write (fv=0x56121852edd0, +> +> > > > > addr=841813602304, attrs=..., buf=0x7fce7dd97028 <Address +> +> > > > > 0x7fce7dd97028 out of bounds>, len=2) at +> +> > > > > /usr/src/debug/qemu-4.0/exec.c:3318 +> +> > > > > #6 0x00005612167e4a1b in address_space_write (as=<optimized out>, +> +> > > > > addr=<optimized out>, attrs=..., buf=<optimized out>, +> +> > > > > len=<optimized out>) at /usr/src/debug/qemu-4.0/exec.c:3408 +> +> > > > > #7 0x00005612167e4aa5 in address_space_rw (as=<optimized out>, +> +> > > > > addr=<optimized out>, attrs=..., attrs@entry=..., +> +> > > > > buf=buf@entry=0x7fce7dd97028 <Address 0x7fce7dd97028 out of +> +> > > > > bounds>, len=<optimized out>, is_write=<optimized out>) at +> +> > > > > /usr/src/debug/qemu-4.0/exec.c:3419 +> +> > > > > #8 0x0000561216849da1 in kvm_cpu_exec +> +> > > > > (cpu=cpu@entry=0x56121849aa00) at +> +> > > > > /usr/src/debug/qemu-4.0/accel/kvm/kvm-all.c:2034 +> +> > > > > #9 0x000056121682255e in qemu_kvm_cpu_thread_fn +> +> > > > > (arg=arg@entry=0x56121849aa00) at +> +> > > > > /usr/src/debug/qemu-4.0/cpus.c:1281 +> +> > > > > #10 0x0000561216b794d6 in qemu_thread_start (args=<optimized out>) +> +> > > > > at /usr/src/debug/qemu-4.0/util/qemu-thread-posix.c:502 +> +> > > > > #11 0x00007fce7bef6e25 in start_thread () from +> +> > > > > /lib64/libpthread.so.0 +> +> > > > > #12 0x00007fce7bc1ef1d in clone () from /lib64/libc.so.6 +> +> > > > > +> +> > > > > And I searched and found +> +> > > > > +https://bugzilla.redhat.com/show_bug.cgi?id=1706759 +, which has the +> +> > > > > same +> +> > > > > backtrace as above, and it seems commit 7bfde688fb1b ("virtio-blk: +> +> > > > > Add +> +> > > > > blk_drain() to virtio_blk_device_unrealize()") is to fix this +> +> > > > > particular +> +> > > > > bug. +> +> > > > > +> +> > > > > But I can still hit the bug even after applying the commit. Do I +> +> > > > > miss +> +> > > > > anything? +> +> > > > +> +> > > > Hi Eryu, +> +> > > > This backtrace seems to be caused by this bug (there were two bugs in +> +> > > > 1706759): +https://bugzilla.redhat.com/show_bug.cgi?id=1708480 +> +> > > > Although the solution hasn't been tested on virtio-blk yet, you may +> +> > > > want to apply this patch: +> +> > > > +> +> > > > +https://lists.nongnu.org/archive/html/qemu-devel/2019-12/msg05197.html +> +> > > > Let me know if this works. +> +> > > +> +> > > Unfortunately, I still see the same segfault & backtrace after applying +> +> > > commit 421afd2fe8dd ("virtio: reset region cache when on queue +> +> > > deletion") +> +> > > +> +> > > Anything I can help to debug? +> +> > +> +> > Please post the QEMU command-line and the QMP commands use to remove the +> +> > device. +> +> +> +> It's a normal kata instance using virtio-fs as rootfs. +> +> +> +> /usr/local/libexec/qemu-kvm -name +> +> sandbox-a670786fcb1758d2348eb120939d90ffacf9f049f10b337284ad49bbcd60936d \ +> +> -uuid e03f6b6b-b80b-40c0-8d5b-0cbfed1305d2 -machine +> +> q35,accel=kvm,kernel_irqchip,nvdimm,nosmm,nosmbus,nosata,nopit \ +> +> -cpu host -qmp +> +> unix:/run/vc/vm/a670786fcb1758d2348eb120939d90ffacf9f049f10b337284ad49bbcd60936d/qmp.sock,server,nowait +> +> \ +> +> -qmp +> +> unix:/run/vc/vm/debug-a670786fcb1758d2348eb120939d90ffacf9f049f10b337284ad49bbcd60936d/qmp.sock,server,nowait +> +> \ +> +> -m 2048M,slots=10,maxmem=773893M -device +> +> pci-bridge,bus=pcie.0,id=pci-bridge-0,chassis_nr=1,shpc=on,addr=2,romfile= \ +> +> -device virtio-serial-pci,disable-modern=false,id=serial0,romfile= -device +> +> virtconsole,chardev=charconsole0,id=console0 \ +> +> -chardev +> +> socket,id=charconsole0,path=/run/vc/vm/a670786fcb1758d2348eb120939d90ffacf9f049f10b337284ad49bbcd60936d/console.sock,server,nowait +> +> \ +> +> -device +> +> virtserialport,chardev=metricagent,id=channel10,name=metric.agent.channel.10 +> +> \ +> +> -chardev +> +> socket,id=metricagent,path=/run/vc/vm/a670786fcb1758d2348eb120939d90ffacf9f049f10b337284ad49bbcd60936d/metric.agent.channel.sock,server,nowait +> +> \ +> +> -device nvdimm,id=nv0,memdev=mem0 -object +> +> memory-backend-file,id=mem0,mem-path=/usr/local/share/containers-image-1.9.0.img,size=268435456 +> +> \ +> +> -object rng-random,id=rng0,filename=/dev/urandom -device +> +> virtio-rng,rng=rng0,romfile= \ +> +> -device virtserialport,chardev=charch0,id=channel0,name=agent.channel.0 \ +> +> -chardev +> +> socket,id=charch0,path=/run/vc/vm/a670786fcb1758d2348eb120939d90ffacf9f049f10b337284ad49bbcd60936d/kata.sock,server,nowait +> +> \ +> +> -chardev +> +> socket,id=char-6fca044b801a78a1,path=/run/vc/vm/a670786fcb1758d2348eb120939d90ffacf9f049f10b337284ad49bbcd60936d/vhost-fs.sock +> +> \ +> +> -device +> +> vhost-user-fs-pci,chardev=char-6fca044b801a78a1,tag=kataShared,cache-size=8192M +> +> -netdev tap,id=network-0,vhost=on,vhostfds=3,fds=4 \ +> +> -device +> +> driver=virtio-net-pci,netdev=network-0,mac=76:57:f1:ab:51:5c,disable-modern=false,mq=on,vectors=4,romfile= +> +> \ +> +> -global kvm-pit.lost_tick_policy=discard -vga none -no-user-config +> +> -nodefaults -nographic -daemonize \ +> +> -object memory-backend-file,id=dimm1,size=2048M,mem-path=/dev/shm,share=on +> +> -numa node,memdev=dimm1 -kernel /usr/local/share/kernel \ +> +> -append tsc=reliable no_timer_check rcupdate.rcu_expedited=1 +> +> i8042.direct=1 i8042.dumbkbd=1 i8042.nopnp=1 i8042.noaux=1 noreplace-smp +> +> reboot=k console=hvc0 console=hvc1 iommu=off cryptomgr.notests +> +> net.ifnames=0 pci=lastbus=0 root=/dev/pmem0p1 +> +> rootflags=dax,data=ordered,errors=remount-ro ro rootfstype=ext4 quiet +> +> systemd.show_status=false panic=1 nr_cpus=96 agent.use_vsock=false +> +> init=/usr/lib/systemd/systemd systemd.unit=kata-containers.target +> +> systemd.mask=systemd-networkd.service systemd.mask=systemd-networkd.socket \ +> +> -pidfile +> +> /run/vc/vm/a670786fcb1758d2348eb120939d90ffacf9f049f10b337284ad49bbcd60936d/pid +> +> \ +> +> -smp 1,cores=1,threads=1,sockets=96,maxcpus=96 +> +> +> +> QMP command to delete device (the device id is just an example, not the +> +> one caused the crash): +> +> +> +> "{\"arguments\":{\"id\":\"virtio-drive-5967abfb917c8da6\"},\"execute\":\"device_del\"}" +> +> +> +> which has been hot plugged by: +> +> "{\"arguments\":{\"cache\":{\"direct\":true,\"no-flush\":false},\"driver\":\"raw\",\"file\":{\"driver\":\"file\",\"filename\":\"/dev/dm-18\"},\"node-name\":\"drive-5967abfb917c8da6\"},\"execute\":\"blockdev-add\"}" +> +> "{\"return\": {}}" +> +> "{\"arguments\":{\"addr\":\"01\",\"bus\":\"pci-bridge-0\",\"drive\":\"drive-5967abfb917c8da6\",\"driver\":\"virtio-blk-pci\",\"id\":\"virtio-drive-5967abfb917c8da6\",\"romfile\":\"\",\"share-rw\":\"on\"},\"execute\":\"device_add\"}" +> +> "{\"return\": {}}" +> +> +Thanks. I wasn't able to reproduce this crash with qemu.git/master. +> +> +One thing that is strange about the latest backtrace you posted: QEMU is +> +dispatching the memory access instead of using the ioeventfd code that +> +that virtio-blk-pci normally takes when a virtqueue is notified. I +> +guess this means ioeventfd has already been disabled due to the hot +> +unplug. +> +> +Could you try with machine type "i440fx" instead of "q35"? I wonder if +> +pci-bridge/shpc is part of the problem. +Sure, will try it. But it may take some time, as the test bed is busy +with other testing tasks. I'll report back once I got the results. + +Thanks, +Eryu + diff --git a/results/classifier/016/debug/88281850 b/results/classifier/016/debug/88281850 new file mode 100644 index 00000000..6b63d098 --- /dev/null +++ b/results/classifier/016/debug/88281850 @@ -0,0 +1,308 @@ +arm: 0.989 +debug: 0.964 +kernel: 0.933 +operating system: 0.912 +performance: 0.526 +boot: 0.290 +hypervisor: 0.240 +KVM: 0.045 +TCG: 0.042 +register: 0.037 +virtual: 0.031 +PID: 0.028 +socket: 0.021 +VMM: 0.013 +files: 0.013 +device: 0.013 +user-level: 0.012 +architecture: 0.011 +vnc: 0.010 +semantic: 0.007 +network: 0.005 +assembly: 0.005 +risc-v: 0.004 +peripherals: 0.002 +graphic: 0.002 +permissions: 0.001 +alpha: 0.001 +mistranslation: 0.001 +ppc: 0.000 +x86: 0.000 +i386: 0.000 + +[Bug] Take more 150s to boot qemu on ARM64 + +Hi all, +I encounter a issue with kernel 5.19-rc1 on a ARM64 board: it takes +about 150s between beginning to run qemu command and beginng to boot +Linux kernel ("EFI stub: Booting Linux Kernel..."). +But in kernel 5.18-rc4, it only takes about 5s. I git bisect the kernel +code and it finds c2445d387850 ("srcu: Add contention check to +call_srcu() srcu_data ->lock acquisition"). +The qemu (qemu version is 6.2.92) command i run is : + +./qemu-system-aarch64 -m 4G,slots=4,maxmem=8g \ +--trace "kvm*" \ +-cpu host \ +-machine virt,accel=kvm,gic-version=3 \ +-machine smp.cpus=2,smp.sockets=2 \ +-no-reboot \ +-nographic \ +-monitor unix:/home/cx/qmp-test,server,nowait \ +-bios /home/cx/boot/QEMU_EFI.fd \ +-kernel /home/cx/boot/Image \ +-device +pcie-root-port,port=0x8,chassis=1,id=net1,bus=pcie.0,multifunction=on,addr=0x1 +\ +-device vfio-pci,host=7d:01.3,id=net0 \ +-device virtio-blk-pci,drive=drive0,id=virtblk0,num-queues=4 \ +-drive file=/home/cx/boot/boot_ubuntu.img,if=none,id=drive0 \ +-append "rdinit=init console=ttyAMA0 root=/dev/vda rootfstype=ext4 rw " \ +-net none \ +-D /home/cx/qemu_log.txt +I am not familiar with rcu code, and don't know how it causes the issue. +Do you have any idea about this issue? +Best Regard, + +Xiang Chen + +On Mon, Jun 13, 2022 at 08:26:34PM +0800, chenxiang (M) wrote: +> +Hi all, +> +> +I encounter a issue with kernel 5.19-rc1 on a ARM64 board: it takes about +> +150s between beginning to run qemu command and beginng to boot Linux kernel +> +("EFI stub: Booting Linux Kernel..."). +> +> +But in kernel 5.18-rc4, it only takes about 5s. I git bisect the kernel code +> +and it finds c2445d387850 ("srcu: Add contention check to call_srcu() +> +srcu_data ->lock acquisition"). +> +> +The qemu (qemu version is 6.2.92) command i run is : +> +> +./qemu-system-aarch64 -m 4G,slots=4,maxmem=8g \ +> +--trace "kvm*" \ +> +-cpu host \ +> +-machine virt,accel=kvm,gic-version=3 \ +> +-machine smp.cpus=2,smp.sockets=2 \ +> +-no-reboot \ +> +-nographic \ +> +-monitor unix:/home/cx/qmp-test,server,nowait \ +> +-bios /home/cx/boot/QEMU_EFI.fd \ +> +-kernel /home/cx/boot/Image \ +> +-device +> +pcie-root-port,port=0x8,chassis=1,id=net1,bus=pcie.0,multifunction=on,addr=0x1 +> +\ +> +-device vfio-pci,host=7d:01.3,id=net0 \ +> +-device virtio-blk-pci,drive=drive0,id=virtblk0,num-queues=4 \ +> +-drive file=/home/cx/boot/boot_ubuntu.img,if=none,id=drive0 \ +> +-append "rdinit=init console=ttyAMA0 root=/dev/vda rootfstype=ext4 rw " \ +> +-net none \ +> +-D /home/cx/qemu_log.txt +> +> +I am not familiar with rcu code, and don't know how it causes the issue. Do +> +you have any idea about this issue? +Please see the discussion here: +https://lore.kernel.org/all/20615615-0013-5adc-584f-2b1d5c03ebfc@linaro.org/ +Though that report requires ACPI to be forced on to get the +delay, which results in more than 9,000 back-to-back calls to +synchronize_srcu_expedited(). I cannot reproduce this on my setup, even +with an artificial tight loop invoking synchronize_srcu_expedited(), +but then again I don't have ARM hardware. + +My current guess is that the following patch, but with larger values for +SRCU_MAX_NODELAY_PHASE. Here "larger" might well be up in the hundreds, +or perhaps even larger. + +If you get a chance to experiment with this, could you please reply +to the discussion at the above URL? (Or let me know, and I can CC +you on the next message in that thread.) + + Thanx, Paul + +------------------------------------------------------------------------ + +diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c +index 50ba70f019dea..0db7873f4e95b 100644 +--- a/kernel/rcu/srcutree.c ++++ b/kernel/rcu/srcutree.c +@@ -513,7 +513,7 @@ static bool srcu_readers_active(struct srcu_struct *ssp) + + #define SRCU_INTERVAL 1 // Base delay if no expedited GPs +pending. + #define SRCU_MAX_INTERVAL 10 // Maximum incremental delay from slow +readers. +-#define SRCU_MAX_NODELAY_PHASE 1 // Maximum per-GP-phase consecutive +no-delay instances. ++#define SRCU_MAX_NODELAY_PHASE 3 // Maximum per-GP-phase consecutive +no-delay instances. + #define SRCU_MAX_NODELAY 100 // Maximum consecutive no-delay +instances. + + /* +@@ -522,16 +522,22 @@ static bool srcu_readers_active(struct srcu_struct *ssp) + */ + static unsigned long srcu_get_delay(struct srcu_struct *ssp) + { ++ unsigned long gpstart; ++ unsigned long j; + unsigned long jbase = SRCU_INTERVAL; + + if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), +READ_ONCE(ssp->srcu_gp_seq_needed_exp))) + jbase = 0; +- if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))) +- jbase += jiffies - READ_ONCE(ssp->srcu_gp_start); +- if (!jbase) { +- WRITE_ONCE(ssp->srcu_n_exp_nodelay, +READ_ONCE(ssp->srcu_n_exp_nodelay) + 1); +- if (READ_ONCE(ssp->srcu_n_exp_nodelay) > SRCU_MAX_NODELAY_PHASE) +- jbase = 1; ++ if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))) { ++ j = jiffies - 1; ++ gpstart = READ_ONCE(ssp->srcu_gp_start); ++ if (time_after(j, gpstart)) ++ jbase += j - gpstart; ++ if (!jbase) { ++ WRITE_ONCE(ssp->srcu_n_exp_nodelay, +READ_ONCE(ssp->srcu_n_exp_nodelay) + 1); ++ if (READ_ONCE(ssp->srcu_n_exp_nodelay) > +SRCU_MAX_NODELAY_PHASE) ++ jbase = 1; ++ } + } + return jbase > SRCU_MAX_INTERVAL ? SRCU_MAX_INTERVAL : jbase; + } + +å¨ 2022/6/13 21:22, Paul E. McKenney åé: +On Mon, Jun 13, 2022 at 08:26:34PM +0800, chenxiang (M) wrote: +Hi all, + +I encounter a issue with kernel 5.19-rc1 on a ARM64 board: it takes about +150s between beginning to run qemu command and beginng to boot Linux kernel +("EFI stub: Booting Linux Kernel..."). + +But in kernel 5.18-rc4, it only takes about 5s. I git bisect the kernel code +and it finds c2445d387850 ("srcu: Add contention check to call_srcu() +srcu_data ->lock acquisition"). + +The qemu (qemu version is 6.2.92) command i run is : + +./qemu-system-aarch64 -m 4G,slots=4,maxmem=8g \ +--trace "kvm*" \ +-cpu host \ +-machine virt,accel=kvm,gic-version=3 \ +-machine smp.cpus=2,smp.sockets=2 \ +-no-reboot \ +-nographic \ +-monitor unix:/home/cx/qmp-test,server,nowait \ +-bios /home/cx/boot/QEMU_EFI.fd \ +-kernel /home/cx/boot/Image \ +-device +pcie-root-port,port=0x8,chassis=1,id=net1,bus=pcie.0,multifunction=on,addr=0x1 +\ +-device vfio-pci,host=7d:01.3,id=net0 \ +-device virtio-blk-pci,drive=drive0,id=virtblk0,num-queues=4 \ +-drive file=/home/cx/boot/boot_ubuntu.img,if=none,id=drive0 \ +-append "rdinit=init console=ttyAMA0 root=/dev/vda rootfstype=ext4 rw " \ +-net none \ +-D /home/cx/qemu_log.txt + +I am not familiar with rcu code, and don't know how it causes the issue. Do +you have any idea about this issue? +Please see the discussion here: +https://lore.kernel.org/all/20615615-0013-5adc-584f-2b1d5c03ebfc@linaro.org/ +Though that report requires ACPI to be forced on to get the +delay, which results in more than 9,000 back-to-back calls to +synchronize_srcu_expedited(). I cannot reproduce this on my setup, even +with an artificial tight loop invoking synchronize_srcu_expedited(), +but then again I don't have ARM hardware. + +My current guess is that the following patch, but with larger values for +SRCU_MAX_NODELAY_PHASE. Here "larger" might well be up in the hundreds, +or perhaps even larger. + +If you get a chance to experiment with this, could you please reply +to the discussion at the above URL? (Or let me know, and I can CC +you on the next message in that thread.) +Ok, thanks, i will reply it on above URL. +Thanx, Paul + +------------------------------------------------------------------------ + +diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c +index 50ba70f019dea..0db7873f4e95b 100644 +--- a/kernel/rcu/srcutree.c ++++ b/kernel/rcu/srcutree.c +@@ -513,7 +513,7 @@ static bool srcu_readers_active(struct srcu_struct *ssp) +#define SRCU_INTERVAL 1 // Base delay if no expedited GPs pending. +#define SRCU_MAX_INTERVAL 10 // Maximum incremental delay from slow +readers. +-#define SRCU_MAX_NODELAY_PHASE 1 // Maximum per-GP-phase consecutive +no-delay instances. ++#define SRCU_MAX_NODELAY_PHASE 3 // Maximum per-GP-phase consecutive +no-delay instances. + #define SRCU_MAX_NODELAY 100 // Maximum consecutive no-delay +instances. +/* +@@ -522,16 +522,22 @@ static bool srcu_readers_active(struct srcu_struct *ssp) + */ + static unsigned long srcu_get_delay(struct srcu_struct *ssp) + { ++ unsigned long gpstart; ++ unsigned long j; + unsigned long jbase = SRCU_INTERVAL; +if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp))) +jbase = 0; +- if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))) +- jbase += jiffies - READ_ONCE(ssp->srcu_gp_start); +- if (!jbase) { +- WRITE_ONCE(ssp->srcu_n_exp_nodelay, +READ_ONCE(ssp->srcu_n_exp_nodelay) + 1); +- if (READ_ONCE(ssp->srcu_n_exp_nodelay) > SRCU_MAX_NODELAY_PHASE) +- jbase = 1; ++ if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))) { ++ j = jiffies - 1; ++ gpstart = READ_ONCE(ssp->srcu_gp_start); ++ if (time_after(j, gpstart)) ++ jbase += j - gpstart; ++ if (!jbase) { ++ WRITE_ONCE(ssp->srcu_n_exp_nodelay, +READ_ONCE(ssp->srcu_n_exp_nodelay) + 1); ++ if (READ_ONCE(ssp->srcu_n_exp_nodelay) > +SRCU_MAX_NODELAY_PHASE) ++ jbase = 1; ++ } + } + return jbase > SRCU_MAX_INTERVAL ? SRCU_MAX_INTERVAL : jbase; + } +. + diff --git a/results/classifier/016/debug/96782458 b/results/classifier/016/debug/96782458 new file mode 100644 index 00000000..cae333ff --- /dev/null +++ b/results/classifier/016/debug/96782458 @@ -0,0 +1,1026 @@ +x86: 0.941 +debug: 0.925 +KVM: 0.771 +hypervisor: 0.548 +virtual: 0.313 +operating system: 0.098 +user-level: 0.077 +performance: 0.063 +kernel: 0.061 +register: 0.059 +vnc: 0.048 +files: 0.039 +PID: 0.017 +assembly: 0.014 +device: 0.013 +semantic: 0.013 +VMM: 0.012 +socket: 0.007 +network: 0.005 +TCG: 0.004 +architecture: 0.004 +i386: 0.004 +graphic: 0.002 +risc-v: 0.002 +boot: 0.002 +ppc: 0.001 +permissions: 0.001 +peripherals: 0.001 +alpha: 0.001 +mistranslation: 0.000 +arm: 0.000 + +[Qemu-devel] [BUG] Migrate failes between boards with different PMC counts + +Hi all, + +Recently, I found migration failed when enable vPMU. + +migrate vPMU state was introduced in linux-3.10 + qemu-1.7. + +As long as enable vPMU, qemu will save / load the +vmstate_msr_architectural_pmu(msr_global_ctrl) register during the migration. +But global_ctrl generated based on cpuid(0xA), the number of general-purpose +performance +monitoring counters(PMC) can vary according to Intel SDN. The number of PMC +presented +to vm, does not support configuration currently, it depend on host cpuid, and +enable all pmc +defaultly at KVM. It cause migration to fail between boards with different PMC +counts. + +The return value of cpuid (0xA) is different dur to cpu, according to Intel +SDNï¼18-10 Vol. 3B: + +Note: The number of general-purpose performance monitoring counters (i.e. N in +Figure 18-9) +can vary across processor generations within a processor family, across +processor families, or +could be different depending on the configuration chosen at boot time in the +BIOS regarding +Intel Hyper Threading Technology, (e.g. N=2 for 45 nm Intel Atom processors; N +=4 for processors +based on the Nehalem microarchitecture; for processors based on the Sandy Bridge +microarchitecture, N = 4 if Intel Hyper Threading Technology is active and N=8 +if not active). + +Also I found, N=8 if HT is not active based on the broadwellï¼, +such as CPU E7-8890 v4 @ 2.20GHz + +# ./x86_64-softmmu/qemu-system-x86_64 --enable-kvm -smp 4 -m 4096 -hda +/data/zyy/test_qemu.img.sles12sp1 -vnc :99 -cpu kvm64,pmu=true -incoming +tcp::8888 +Completed 100 % +qemu-system-x86_64: error: failed to set MSR 0x38f to 0x7000000ff +qemu-system-x86_64: /data/zyy/git/test/qemu/target/i386/kvm.c:1833: +kvm_put_msrs: +Assertion `ret == cpu->kvm_msr_buf->nmsrs' failed. +Aborted + +So make number of pmc configurable to vm ? Any better idea ? + + +Regards, +-Zhuang Yanying + +* Zhuangyanying (address@hidden) wrote: +> +Hi all, +> +> +Recently, I found migration failed when enable vPMU. +> +> +migrate vPMU state was introduced in linux-3.10 + qemu-1.7. +> +> +As long as enable vPMU, qemu will save / load the +> +vmstate_msr_architectural_pmu(msr_global_ctrl) register during the migration. +> +But global_ctrl generated based on cpuid(0xA), the number of general-purpose +> +performance +> +monitoring counters(PMC) can vary according to Intel SDN. The number of PMC +> +presented +> +to vm, does not support configuration currently, it depend on host cpuid, and +> +enable all pmc +> +defaultly at KVM. It cause migration to fail between boards with different +> +PMC counts. +> +> +The return value of cpuid (0xA) is different dur to cpu, according to Intel +> +SDNï¼18-10 Vol. 3B: +> +> +Note: The number of general-purpose performance monitoring counters (i.e. N +> +in Figure 18-9) +> +can vary across processor generations within a processor family, across +> +processor families, or +> +could be different depending on the configuration chosen at boot time in the +> +BIOS regarding +> +Intel Hyper Threading Technology, (e.g. N=2 for 45 nm Intel Atom processors; +> +N =4 for processors +> +based on the Nehalem microarchitecture; for processors based on the Sandy +> +Bridge +> +microarchitecture, N = 4 if Intel Hyper Threading Technology is active and +> +N=8 if not active). +> +> +Also I found, N=8 if HT is not active based on the broadwellï¼, +> +such as CPU E7-8890 v4 @ 2.20GHz +> +> +# ./x86_64-softmmu/qemu-system-x86_64 --enable-kvm -smp 4 -m 4096 -hda +> +/data/zyy/test_qemu.img.sles12sp1 -vnc :99 -cpu kvm64,pmu=true -incoming +> +tcp::8888 +> +Completed 100 % +> +qemu-system-x86_64: error: failed to set MSR 0x38f to 0x7000000ff +> +qemu-system-x86_64: /data/zyy/git/test/qemu/target/i386/kvm.c:1833: +> +kvm_put_msrs: +> +Assertion `ret == cpu->kvm_msr_buf->nmsrs' failed. +> +Aborted +> +> +So make number of pmc configurable to vm ? Any better idea ? +Coincidentally we hit a similar problem a few days ago with -cpu host - it +took me +quite a while to spot the difference between the machines was the source +had hyperthreading disabled. + +An option to set the number of counters makes sense to me; but I wonder +how many other options we need as well. Also, I'm not sure there's any +easy way for libvirt etc to figure out how many counters a host supports - it's +not in /proc/cpuinfo. + +Dave + +> +> +Regards, +> +-Zhuang Yanying +-- +Dr. David Alan Gilbert / address@hidden / Manchester, UK + +On Mon, Apr 24, 2017 at 10:23:21AM +0100, Dr. David Alan Gilbert wrote: +> +* Zhuangyanying (address@hidden) wrote: +> +> Hi all, +> +> +> +> Recently, I found migration failed when enable vPMU. +> +> +> +> migrate vPMU state was introduced in linux-3.10 + qemu-1.7. +> +> +> +> As long as enable vPMU, qemu will save / load the +> +> vmstate_msr_architectural_pmu(msr_global_ctrl) register during the +> +> migration. +> +> But global_ctrl generated based on cpuid(0xA), the number of +> +> general-purpose performance +> +> monitoring counters(PMC) can vary according to Intel SDN. The number of PMC +> +> presented +> +> to vm, does not support configuration currently, it depend on host cpuid, +> +> and enable all pmc +> +> defaultly at KVM. It cause migration to fail between boards with different +> +> PMC counts. +> +> +> +> The return value of cpuid (0xA) is different dur to cpu, according to Intel +> +> SDNï¼18-10 Vol. 3B: +> +> +> +> Note: The number of general-purpose performance monitoring counters (i.e. N +> +> in Figure 18-9) +> +> can vary across processor generations within a processor family, across +> +> processor families, or +> +> could be different depending on the configuration chosen at boot time in +> +> the BIOS regarding +> +> Intel Hyper Threading Technology, (e.g. N=2 for 45 nm Intel Atom +> +> processors; N =4 for processors +> +> based on the Nehalem microarchitecture; for processors based on the Sandy +> +> Bridge +> +> microarchitecture, N = 4 if Intel Hyper Threading Technology is active and +> +> N=8 if not active). +> +> +> +> Also I found, N=8 if HT is not active based on the broadwellï¼, +> +> such as CPU E7-8890 v4 @ 2.20GHz +> +> +> +> # ./x86_64-softmmu/qemu-system-x86_64 --enable-kvm -smp 4 -m 4096 -hda +> +> /data/zyy/test_qemu.img.sles12sp1 -vnc :99 -cpu kvm64,pmu=true -incoming +> +> tcp::8888 +> +> Completed 100 % +> +> qemu-system-x86_64: error: failed to set MSR 0x38f to 0x7000000ff +> +> qemu-system-x86_64: /data/zyy/git/test/qemu/target/i386/kvm.c:1833: +> +> kvm_put_msrs: +> +> Assertion `ret == cpu->kvm_msr_buf->nmsrs' failed. +> +> Aborted +> +> +> +> So make number of pmc configurable to vm ? Any better idea ? +> +> +Coincidentally we hit a similar problem a few days ago with -cpu host - it +> +took me +> +quite a while to spot the difference between the machines was the source +> +had hyperthreading disabled. +> +> +An option to set the number of counters makes sense to me; but I wonder +> +how many other options we need as well. Also, I'm not sure there's any +> +easy way for libvirt etc to figure out how many counters a host supports - +> +it's not in /proc/cpuinfo. +We actually try to avoid /proc/cpuinfo whereever possible. We do direct +CPUID asm instructions to identify features, and prefer to use +/sys/devices/system/cpu if that has suitable data + +Where do the PMC counts come from originally ? CPUID or something else ? + +Regards, +Daniel +-- +|: +https://berrange.com +-o- +https://www.flickr.com/photos/dberrange +:| +|: +https://libvirt.org +-o- +https://fstop138.berrange.com +:| +|: +https://entangle-photo.org +-o- +https://www.instagram.com/dberrange +:| + +* Daniel P. Berrange (address@hidden) wrote: +> +On Mon, Apr 24, 2017 at 10:23:21AM +0100, Dr. David Alan Gilbert wrote: +> +> * Zhuangyanying (address@hidden) wrote: +> +> > Hi all, +> +> > +> +> > Recently, I found migration failed when enable vPMU. +> +> > +> +> > migrate vPMU state was introduced in linux-3.10 + qemu-1.7. +> +> > +> +> > As long as enable vPMU, qemu will save / load the +> +> > vmstate_msr_architectural_pmu(msr_global_ctrl) register during the +> +> > migration. +> +> > But global_ctrl generated based on cpuid(0xA), the number of +> +> > general-purpose performance +> +> > monitoring counters(PMC) can vary according to Intel SDN. The number of +> +> > PMC presented +> +> > to vm, does not support configuration currently, it depend on host cpuid, +> +> > and enable all pmc +> +> > defaultly at KVM. It cause migration to fail between boards with +> +> > different PMC counts. +> +> > +> +> > The return value of cpuid (0xA) is different dur to cpu, according to +> +> > Intel SDNï¼18-10 Vol. 3B: +> +> > +> +> > Note: The number of general-purpose performance monitoring counters (i.e. +> +> > N in Figure 18-9) +> +> > can vary across processor generations within a processor family, across +> +> > processor families, or +> +> > could be different depending on the configuration chosen at boot time in +> +> > the BIOS regarding +> +> > Intel Hyper Threading Technology, (e.g. N=2 for 45 nm Intel Atom +> +> > processors; N =4 for processors +> +> > based on the Nehalem microarchitecture; for processors based on the Sandy +> +> > Bridge +> +> > microarchitecture, N = 4 if Intel Hyper Threading Technology is active +> +> > and N=8 if not active). +> +> > +> +> > Also I found, N=8 if HT is not active based on the broadwellï¼, +> +> > such as CPU E7-8890 v4 @ 2.20GHz +> +> > +> +> > # ./x86_64-softmmu/qemu-system-x86_64 --enable-kvm -smp 4 -m 4096 -hda +> +> > /data/zyy/test_qemu.img.sles12sp1 -vnc :99 -cpu kvm64,pmu=true -incoming +> +> > tcp::8888 +> +> > Completed 100 % +> +> > qemu-system-x86_64: error: failed to set MSR 0x38f to 0x7000000ff +> +> > qemu-system-x86_64: /data/zyy/git/test/qemu/target/i386/kvm.c:1833: +> +> > kvm_put_msrs: +> +> > Assertion `ret == cpu->kvm_msr_buf->nmsrs' failed. +> +> > Aborted +> +> > +> +> > So make number of pmc configurable to vm ? Any better idea ? +> +> +> +> Coincidentally we hit a similar problem a few days ago with -cpu host - it +> +> took me +> +> quite a while to spot the difference between the machines was the source +> +> had hyperthreading disabled. +> +> +> +> An option to set the number of counters makes sense to me; but I wonder +> +> how many other options we need as well. Also, I'm not sure there's any +> +> easy way for libvirt etc to figure out how many counters a host supports - +> +> it's not in /proc/cpuinfo. +> +> +We actually try to avoid /proc/cpuinfo whereever possible. We do direct +> +CPUID asm instructions to identify features, and prefer to use +> +/sys/devices/system/cpu if that has suitable data +> +> +Where do the PMC counts come from originally ? CPUID or something else ? +Yes, they're bits 8..15 of CPUID leaf 0xa + +Dave + +> +Regards, +> +Daniel +> +-- +> +|: +https://berrange.com +-o- +https://www.flickr.com/photos/dberrange +:| +> +|: +https://libvirt.org +-o- +https://fstop138.berrange.com +:| +> +|: +https://entangle-photo.org +-o- +https://www.instagram.com/dberrange +:| +-- +Dr. David Alan Gilbert / address@hidden / Manchester, UK + +On Mon, Apr 24, 2017 at 11:27:16AM +0100, Dr. David Alan Gilbert wrote: +> +* Daniel P. Berrange (address@hidden) wrote: +> +> On Mon, Apr 24, 2017 at 10:23:21AM +0100, Dr. David Alan Gilbert wrote: +> +> > * Zhuangyanying (address@hidden) wrote: +> +> > > Hi all, +> +> > > +> +> > > Recently, I found migration failed when enable vPMU. +> +> > > +> +> > > migrate vPMU state was introduced in linux-3.10 + qemu-1.7. +> +> > > +> +> > > As long as enable vPMU, qemu will save / load the +> +> > > vmstate_msr_architectural_pmu(msr_global_ctrl) register during the +> +> > > migration. +> +> > > But global_ctrl generated based on cpuid(0xA), the number of +> +> > > general-purpose performance +> +> > > monitoring counters(PMC) can vary according to Intel SDN. The number of +> +> > > PMC presented +> +> > > to vm, does not support configuration currently, it depend on host +> +> > > cpuid, and enable all pmc +> +> > > defaultly at KVM. It cause migration to fail between boards with +> +> > > different PMC counts. +> +> > > +> +> > > The return value of cpuid (0xA) is different dur to cpu, according to +> +> > > Intel SDNï¼18-10 Vol. 3B: +> +> > > +> +> > > Note: The number of general-purpose performance monitoring counters +> +> > > (i.e. N in Figure 18-9) +> +> > > can vary across processor generations within a processor family, across +> +> > > processor families, or +> +> > > could be different depending on the configuration chosen at boot time +> +> > > in the BIOS regarding +> +> > > Intel Hyper Threading Technology, (e.g. N=2 for 45 nm Intel Atom +> +> > > processors; N =4 for processors +> +> > > based on the Nehalem microarchitecture; for processors based on the +> +> > > Sandy Bridge +> +> > > microarchitecture, N = 4 if Intel Hyper Threading Technology is active +> +> > > and N=8 if not active). +> +> > > +> +> > > Also I found, N=8 if HT is not active based on the broadwellï¼, +> +> > > such as CPU E7-8890 v4 @ 2.20GHz +> +> > > +> +> > > # ./x86_64-softmmu/qemu-system-x86_64 --enable-kvm -smp 4 -m 4096 -hda +> +> > > /data/zyy/test_qemu.img.sles12sp1 -vnc :99 -cpu kvm64,pmu=true +> +> > > -incoming tcp::8888 +> +> > > Completed 100 % +> +> > > qemu-system-x86_64: error: failed to set MSR 0x38f to 0x7000000ff +> +> > > qemu-system-x86_64: /data/zyy/git/test/qemu/target/i386/kvm.c:1833: +> +> > > kvm_put_msrs: +> +> > > Assertion `ret == cpu->kvm_msr_buf->nmsrs' failed. +> +> > > Aborted +> +> > > +> +> > > So make number of pmc configurable to vm ? Any better idea ? +> +> > +> +> > Coincidentally we hit a similar problem a few days ago with -cpu host - +> +> > it took me +> +> > quite a while to spot the difference between the machines was the source +> +> > had hyperthreading disabled. +> +> > +> +> > An option to set the number of counters makes sense to me; but I wonder +> +> > how many other options we need as well. Also, I'm not sure there's any +> +> > easy way for libvirt etc to figure out how many counters a host supports - +> +> > it's not in /proc/cpuinfo. +> +> +> +> We actually try to avoid /proc/cpuinfo whereever possible. We do direct +> +> CPUID asm instructions to identify features, and prefer to use +> +> /sys/devices/system/cpu if that has suitable data +> +> +> +> Where do the PMC counts come from originally ? CPUID or something else ? +> +> +Yes, they're bits 8..15 of CPUID leaf 0xa +Ok, that's easy enough for libvirt to detect then. More a question of what +libvirt should then do this with the info.... + +Regards, +Daniel +-- +|: +https://berrange.com +-o- +https://www.flickr.com/photos/dberrange +:| +|: +https://libvirt.org +-o- +https://fstop138.berrange.com +:| +|: +https://entangle-photo.org +-o- +https://www.instagram.com/dberrange +:| + +> +-----Original Message----- +> +From: Daniel P. Berrange [ +mailto:address@hidden +> +Sent: Monday, April 24, 2017 6:34 PM +> +To: Dr. David Alan Gilbert +> +Cc: Zhuangyanying; Zhanghailiang; wangxin (U); address@hidden; +> +Gonglei (Arei); Huangzhichao; address@hidden +> +Subject: Re: [Qemu-devel] [BUG] Migrate failes between boards with different +> +PMC counts +> +> +On Mon, Apr 24, 2017 at 11:27:16AM +0100, Dr. David Alan Gilbert wrote: +> +> * Daniel P. Berrange (address@hidden) wrote: +> +> > On Mon, Apr 24, 2017 at 10:23:21AM +0100, Dr. David Alan Gilbert wrote: +> +> > > * Zhuangyanying (address@hidden) wrote: +> +> > > > Hi all, +> +> > > > +> +> > > > Recently, I found migration failed when enable vPMU. +> +> > > > +> +> > > > migrate vPMU state was introduced in linux-3.10 + qemu-1.7. +> +> > > > +> +> > > > As long as enable vPMU, qemu will save / load the +> +> > > > vmstate_msr_architectural_pmu(msr_global_ctrl) register during the +> +migration. +> +> > > > But global_ctrl generated based on cpuid(0xA), the number of +> +> > > > general-purpose performance monitoring counters(PMC) can vary +> +> > > > according to Intel SDN. The number of PMC presented to vm, does +> +> > > > not support configuration currently, it depend on host cpuid, and +> +> > > > enable +> +all pmc defaultly at KVM. It cause migration to fail between boards with +> +different PMC counts. +> +> > > > +> +> > > > The return value of cpuid (0xA) is different dur to cpu, according to +> +> > > > Intel +> +SDNï¼18-10 Vol. 3B: +> +> > > > +> +> > > > Note: The number of general-purpose performance monitoring +> +> > > > counters (i.e. N in Figure 18-9) can vary across processor +> +> > > > generations within a processor family, across processor +> +> > > > families, or could be different depending on the configuration +> +> > > > chosen at boot time in the BIOS regarding Intel Hyper Threading +> +> > > > Technology, (e.g. N=2 for 45 nm Intel Atom processors; N =4 for +> +processors based on the Nehalem microarchitecture; for processors based on +> +the Sandy Bridge microarchitecture, N = 4 if Intel Hyper Threading Technology +> +is active and N=8 if not active). +> +> > > > +> +> > > > Also I found, N=8 if HT is not active based on the broadwellï¼, +> +> > > > such as CPU E7-8890 v4 @ 2.20GHz +> +> > > > +> +> > > > # ./x86_64-softmmu/qemu-system-x86_64 --enable-kvm -smp 4 -m +> +> > > > 4096 -hda +> +> > > > /data/zyy/test_qemu.img.sles12sp1 -vnc :99 -cpu kvm64,pmu=true +> +> > > > -incoming tcp::8888 Completed 100 % +> +> > > > qemu-system-x86_64: error: failed to set MSR 0x38f to +> +> > > > 0x7000000ff +> +> > > > qemu-system-x86_64: /data/zyy/git/test/qemu/target/i386/kvm.c:1833: +> +kvm_put_msrs: +> +> > > > Assertion `ret == cpu->kvm_msr_buf->nmsrs' failed. +> +> > > > Aborted +> +> > > > +> +> > > > So make number of pmc configurable to vm ? Any better idea ? +> +> > > +> +> > > Coincidentally we hit a similar problem a few days ago with -cpu +> +> > > host - it took me quite a while to spot the difference between +> +> > > the machines was the source had hyperthreading disabled. +> +> > > +> +> > > An option to set the number of counters makes sense to me; but I +> +> > > wonder how many other options we need as well. Also, I'm not sure +> +> > > there's any easy way for libvirt etc to figure out how many +> +> > > counters a host supports - it's not in /proc/cpuinfo. +> +> > +> +> > We actually try to avoid /proc/cpuinfo whereever possible. We do +> +> > direct CPUID asm instructions to identify features, and prefer to +> +> > use /sys/devices/system/cpu if that has suitable data +> +> > +> +> > Where do the PMC counts come from originally ? CPUID or something +> +else ? +> +> +> +> Yes, they're bits 8..15 of CPUID leaf 0xa +> +> +Ok, that's easy enough for libvirt to detect then. More a question of what +> +libvirt +> +should then do this with the info.... +> +Do you mean to do a validation at the begining of migration? in +qemuMigrationBakeCookie() & qemuMigrationEatCookie(), if the PMC numbers are +not equal, just quit migration? +It maybe a good enough first edition. +But for a further better edition, maybe it's better to support Heterogeneous +migration I think, so we might need to make PMC number configrable, then we +need to modify KVM/qemu as well. + +Regards, +-Zhuang Yanying + +* Zhuangyanying (address@hidden) wrote: +> +> +> +> -----Original Message----- +> +> From: Daniel P. Berrange [ +mailto:address@hidden +> +> Sent: Monday, April 24, 2017 6:34 PM +> +> To: Dr. David Alan Gilbert +> +> Cc: Zhuangyanying; Zhanghailiang; wangxin (U); address@hidden; +> +> Gonglei (Arei); Huangzhichao; address@hidden +> +> Subject: Re: [Qemu-devel] [BUG] Migrate failes between boards with different +> +> PMC counts +> +> +> +> On Mon, Apr 24, 2017 at 11:27:16AM +0100, Dr. David Alan Gilbert wrote: +> +> > * Daniel P. Berrange (address@hidden) wrote: +> +> > > On Mon, Apr 24, 2017 at 10:23:21AM +0100, Dr. David Alan Gilbert wrote: +> +> > > > * Zhuangyanying (address@hidden) wrote: +> +> > > > > Hi all, +> +> > > > > +> +> > > > > Recently, I found migration failed when enable vPMU. +> +> > > > > +> +> > > > > migrate vPMU state was introduced in linux-3.10 + qemu-1.7. +> +> > > > > +> +> > > > > As long as enable vPMU, qemu will save / load the +> +> > > > > vmstate_msr_architectural_pmu(msr_global_ctrl) register during the +> +> migration. +> +> > > > > But global_ctrl generated based on cpuid(0xA), the number of +> +> > > > > general-purpose performance monitoring counters(PMC) can vary +> +> > > > > according to Intel SDN. The number of PMC presented to vm, does +> +> > > > > not support configuration currently, it depend on host cpuid, and +> +> > > > > enable +> +> all pmc defaultly at KVM. It cause migration to fail between boards with +> +> different PMC counts. +> +> > > > > +> +> > > > > The return value of cpuid (0xA) is different dur to cpu, according +> +> > > > > to Intel +> +> SDNï¼18-10 Vol. 3B: +> +> > > > > +> +> > > > > Note: The number of general-purpose performance monitoring +> +> > > > > counters (i.e. N in Figure 18-9) can vary across processor +> +> > > > > generations within a processor family, across processor +> +> > > > > families, or could be different depending on the configuration +> +> > > > > chosen at boot time in the BIOS regarding Intel Hyper Threading +> +> > > > > Technology, (e.g. N=2 for 45 nm Intel Atom processors; N =4 for +> +> processors based on the Nehalem microarchitecture; for processors based on +> +> the Sandy Bridge microarchitecture, N = 4 if Intel Hyper Threading +> +> Technology +> +> is active and N=8 if not active). +> +> > > > > +> +> > > > > Also I found, N=8 if HT is not active based on the broadwellï¼, +> +> > > > > such as CPU E7-8890 v4 @ 2.20GHz +> +> > > > > +> +> > > > > # ./x86_64-softmmu/qemu-system-x86_64 --enable-kvm -smp 4 -m +> +> > > > > 4096 -hda +> +> > > > > /data/zyy/test_qemu.img.sles12sp1 -vnc :99 -cpu kvm64,pmu=true +> +> > > > > -incoming tcp::8888 Completed 100 % +> +> > > > > qemu-system-x86_64: error: failed to set MSR 0x38f to +> +> > > > > 0x7000000ff +> +> > > > > qemu-system-x86_64: /data/zyy/git/test/qemu/target/i386/kvm.c:1833: +> +> kvm_put_msrs: +> +> > > > > Assertion `ret == cpu->kvm_msr_buf->nmsrs' failed. +> +> > > > > Aborted +> +> > > > > +> +> > > > > So make number of pmc configurable to vm ? Any better idea ? +> +> > > > +> +> > > > Coincidentally we hit a similar problem a few days ago with -cpu +> +> > > > host - it took me quite a while to spot the difference between +> +> > > > the machines was the source had hyperthreading disabled. +> +> > > > +> +> > > > An option to set the number of counters makes sense to me; but I +> +> > > > wonder how many other options we need as well. Also, I'm not sure +> +> > > > there's any easy way for libvirt etc to figure out how many +> +> > > > counters a host supports - it's not in /proc/cpuinfo. +> +> > > +> +> > > We actually try to avoid /proc/cpuinfo whereever possible. We do +> +> > > direct CPUID asm instructions to identify features, and prefer to +> +> > > use /sys/devices/system/cpu if that has suitable data +> +> > > +> +> > > Where do the PMC counts come from originally ? CPUID or something +> +> else ? +> +> > +> +> > Yes, they're bits 8..15 of CPUID leaf 0xa +> +> +> +> Ok, that's easy enough for libvirt to detect then. More a question of what +> +> libvirt +> +> should then do this with the info.... +> +> +> +> +Do you mean to do a validation at the begining of migration? in +> +qemuMigrationBakeCookie() & qemuMigrationEatCookie(), if the PMC numbers are +> +not equal, just quit migration? +> +It maybe a good enough first edition. +> +But for a further better edition, maybe it's better to support Heterogeneous +> +migration I think, so we might need to make PMC number configrable, then we +> +need to modify KVM/qemu as well. +Yes agreed; the only thing I wanted to check was that libvirt would have enough +information to be able to use any feature we added to QEMU. + +Dave + +> +Regards, +> +-Zhuang Yanying +-- +Dr. David Alan Gilbert / address@hidden / Manchester, UK + |