diff options
Diffstat (limited to 'classification_output/05/other/21247035')
| -rw-r--r-- | classification_output/05/other/21247035 | 1329 |
1 files changed, 0 insertions, 1329 deletions
diff --git a/classification_output/05/other/21247035 b/classification_output/05/other/21247035 deleted file mode 100644 index 080554a95..000000000 --- a/classification_output/05/other/21247035 +++ /dev/null @@ -1,1329 +0,0 @@ -other: 0.640 -mistranslation: 0.584 -device: 0.525 -KVM: 0.514 -instruction: 0.508 -graphic: 0.426 -assembly: 0.391 -semantic: 0.374 -vnc: 0.367 -boot: 0.345 -network: 0.322 -socket: 0.322 - -[Qemu-devel] [BUG] I/O thread segfault for QEMU on s390x - -Hi, -I have been noticing some segfaults for QEMU on s390x, and I have been -hitting this issue quite reliably (at least once in 10 runs of a test -case). The qemu version is 2.11.50, and I have systemd created coredumps -when this happens. - -Here is a back trace of the segfaulting thread: - - -#0 0x000003ffafed202c in swapcontext () from /lib64/libc.so.6 -#1 0x000002aa355c02ee in qemu_coroutine_new () at -util/coroutine-ucontext.c:164 -#2 0x000002aa355bec34 in qemu_coroutine_create -(address@hidden <blk_aio_read_entry>, -address@hidden) at util/qemu-coroutine.c:76 -#3 0x000002aa35510262 in blk_aio_prwv (blk=0x2aa65fbefa0, -offset=<optimized out>, bytes=<optimized out>, qiov=0x3ffa002a9c0, -address@hidden <blk_aio_read_entry>, flags=0, -cb=0x2aa35340a50 <virtio_blk_rw_complete>, opaque=0x3ffa002a960) at -block/block-backend.c:1299 -#4 0x000002aa35510376 in blk_aio_preadv (blk=<optimized out>, -offset=<optimized out>, qiov=<optimized out>, flags=<optimized out>, -cb=<optimized out>, opaque=0x3ffa002a960) at block/block-backend.c:1392 -#5 0x000002aa3534114e in submit_requests (niov=<optimized out>, -num_reqs=<optimized out>, start=<optimized out>, mrb=<optimized out>, -blk=<optimized out>) at -/usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:372 -#6 virtio_blk_submit_multireq (blk=<optimized out>, -address@hidden) at -/usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:402 -#7 0x000002aa353422e0 in virtio_blk_handle_vq (s=0x2aa6611e7d8, -vq=0x3ffb0f5f010) at /usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:620 -#8 0x000002aa3536655a in virtio_queue_notify_aio_vq -(address@hidden) at -/usr/src/debug/qemu-2.11.50/hw/virtio/virtio.c:1515 -#9 0x000002aa35366cd6 in virtio_queue_notify_aio_vq (vq=0x3ffb0f5f010) -at /usr/src/debug/qemu-2.11.50/hw/virtio/virtio.c:1511 -#10 virtio_queue_host_notifier_aio_poll (opaque=0x3ffb0f5f078) at -/usr/src/debug/qemu-2.11.50/hw/virtio/virtio.c:2409 -#11 0x000002aa355a8ba4 in run_poll_handlers_once -(address@hidden) at util/aio-posix.c:497 -#12 0x000002aa355a9b74 in run_poll_handlers (max_ns=<optimized out>, -ctx=0x2aa65f99310) at util/aio-posix.c:534 -#13 try_poll_mode (blocking=true, ctx=0x2aa65f99310) at util/aio-posix.c:562 -#14 aio_poll (ctx=0x2aa65f99310, address@hidden) at -util/aio-posix.c:602 -#15 0x000002aa353d2d0a in iothread_run (opaque=0x2aa65f990f0) at -iothread.c:60 -#16 0x000003ffb0f07e82 in start_thread () from /lib64/libpthread.so.0 -#17 0x000003ffaff91596 in thread_start () from /lib64/libc.so.6 -I don't have much knowledge about i/o threads and the block layer code -in QEMU, so I would like to report to the community about this issue. -I believe this very similar to the bug that I reported upstream couple -of days ago -( -https://lists.gnu.org/archive/html/qemu-devel/2018-02/msg04452.html -). -Any help would be greatly appreciated. - -Thanks -Farhan - -On Thu, Mar 1, 2018 at 10:33 PM, Farhan Ali <address@hidden> wrote: -> -Hi, -> -> -I have been noticing some segfaults for QEMU on s390x, and I have been -> -hitting this issue quite reliably (at least once in 10 runs of a test case). -> -The qemu version is 2.11.50, and I have systemd created coredumps -> -when this happens. -Can you describe the test case or suggest how to reproduce it for us? - -Fam - -On 03/02/2018 01:13 AM, Fam Zheng wrote: -On Thu, Mar 1, 2018 at 10:33 PM, Farhan Ali <address@hidden> wrote: -Hi, - -I have been noticing some segfaults for QEMU on s390x, and I have been -hitting this issue quite reliably (at least once in 10 runs of a test case). -The qemu version is 2.11.50, and I have systemd created coredumps -when this happens. -Can you describe the test case or suggest how to reproduce it for us? - -Fam -The test case is with a single guest, running a memory intensive -workload. The guest has 8 vpcus and 4G of memory. -Here is the qemu command line, if that helps: - -/usr/bin/qemu-kvm -name guest=sles,debug-threads=on \ --S -object -secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-2-sles/master-key.aes -\ --machine s390-ccw-virtio-2.12,accel=kvm,usb=off,dump-guest-core=off \ --m 4096 -realtime mlock=off -smp 8,sockets=8,cores=1,threads=1 \ --object iothread,id=iothread1 -object iothread,id=iothread2 -uuid -b83a596b-3a1a-4ac9-9f3e-d9a4032ee52c \ --display none -no-user-config -nodefaults -chardev -socket,id=charmonitor,path=/var/lib/libvirt/qemu/domain-2-sles/monitor.sock,server,nowait --mon chardev=charmonitor,id=monitor,mode=control -rtc base=utc --no-shutdown \ --boot strict=on -drive -file=/dev/mapper/360050763998b0883980000002400002b,format=raw,if=none,id=drive-virtio-disk0,cache=none,aio=native --device -virtio-blk-ccw,iothread=iothread1,scsi=off,devno=fe.0.0001,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1 --drive -file=/dev/mapper/360050763998b0883980000002800002f,format=raw,if=none,id=drive-virtio-disk1,cache=none,aio=native --device -virtio-blk-ccw,iothread=iothread2,scsi=off,devno=fe.0.0002,drive=drive-virtio-disk1,id=virtio-disk1 --netdev tap,fd=24,id=hostnet0,vhost=on,vhostfd=26 -device -virtio-net-ccw,netdev=hostnet0,id=net0,mac=02:38:a6:36:e8:1f,devno=fe.0.0000 --chardev pty,id=charconsole0 -device -sclpconsole,chardev=charconsole0,id=console0 -device -virtio-balloon-ccw,id=balloon0,devno=fe.3.ffba -msg timestamp=on -Please let me know if I need to provide any other information. - -Thanks -Farhan - -On Thu, Mar 01, 2018 at 09:33:35AM -0500, Farhan Ali wrote: -> -Hi, -> -> -I have been noticing some segfaults for QEMU on s390x, and I have been -> -hitting this issue quite reliably (at least once in 10 runs of a test case). -> -The qemu version is 2.11.50, and I have systemd created coredumps -> -when this happens. -> -> -Here is a back trace of the segfaulting thread: -The backtrace looks normal. - -Please post the QEMU command-line and the details of the segfault (which -memory access faulted?). - -> -#0 0x000003ffafed202c in swapcontext () from /lib64/libc.so.6 -> -#1 0x000002aa355c02ee in qemu_coroutine_new () at -> -util/coroutine-ucontext.c:164 -> -#2 0x000002aa355bec34 in qemu_coroutine_create -> -(address@hidden <blk_aio_read_entry>, -> -address@hidden) at util/qemu-coroutine.c:76 -> -#3 0x000002aa35510262 in blk_aio_prwv (blk=0x2aa65fbefa0, offset=<optimized -> -out>, bytes=<optimized out>, qiov=0x3ffa002a9c0, -> -address@hidden <blk_aio_read_entry>, flags=0, -> -cb=0x2aa35340a50 <virtio_blk_rw_complete>, opaque=0x3ffa002a960) at -> -block/block-backend.c:1299 -> -#4 0x000002aa35510376 in blk_aio_preadv (blk=<optimized out>, -> -offset=<optimized out>, qiov=<optimized out>, flags=<optimized out>, -> -cb=<optimized out>, opaque=0x3ffa002a960) at block/block-backend.c:1392 -> -#5 0x000002aa3534114e in submit_requests (niov=<optimized out>, -> -num_reqs=<optimized out>, start=<optimized out>, mrb=<optimized out>, -> -blk=<optimized out>) at -> -/usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:372 -> -#6 virtio_blk_submit_multireq (blk=<optimized out>, -> -address@hidden) at -> -/usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:402 -> -#7 0x000002aa353422e0 in virtio_blk_handle_vq (s=0x2aa6611e7d8, -> -vq=0x3ffb0f5f010) at /usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:620 -> -#8 0x000002aa3536655a in virtio_queue_notify_aio_vq -> -(address@hidden) at -> -/usr/src/debug/qemu-2.11.50/hw/virtio/virtio.c:1515 -> -#9 0x000002aa35366cd6 in virtio_queue_notify_aio_vq (vq=0x3ffb0f5f010) at -> -/usr/src/debug/qemu-2.11.50/hw/virtio/virtio.c:1511 -> -#10 virtio_queue_host_notifier_aio_poll (opaque=0x3ffb0f5f078) at -> -/usr/src/debug/qemu-2.11.50/hw/virtio/virtio.c:2409 -> -#11 0x000002aa355a8ba4 in run_poll_handlers_once -> -(address@hidden) at util/aio-posix.c:497 -> -#12 0x000002aa355a9b74 in run_poll_handlers (max_ns=<optimized out>, -> -ctx=0x2aa65f99310) at util/aio-posix.c:534 -> -#13 try_poll_mode (blocking=true, ctx=0x2aa65f99310) at util/aio-posix.c:562 -> -#14 aio_poll (ctx=0x2aa65f99310, address@hidden) at -> -util/aio-posix.c:602 -> -#15 0x000002aa353d2d0a in iothread_run (opaque=0x2aa65f990f0) at -> -iothread.c:60 -> -#16 0x000003ffb0f07e82 in start_thread () from /lib64/libpthread.so.0 -> -#17 0x000003ffaff91596 in thread_start () from /lib64/libc.so.6 -> -> -> -I don't have much knowledge about i/o threads and the block layer code in -> -QEMU, so I would like to report to the community about this issue. -> -I believe this very similar to the bug that I reported upstream couple of -> -days ago -> -( -https://lists.gnu.org/archive/html/qemu-devel/2018-02/msg04452.html -). -> -> -Any help would be greatly appreciated. -> -> -Thanks -> -Farhan -> -signature.asc -Description: -PGP signature - -On 03/02/2018 04:23 AM, Stefan Hajnoczi wrote: -On Thu, Mar 01, 2018 at 09:33:35AM -0500, Farhan Ali wrote: -Hi, - -I have been noticing some segfaults for QEMU on s390x, and I have been -hitting this issue quite reliably (at least once in 10 runs of a test case). -The qemu version is 2.11.50, and I have systemd created coredumps -when this happens. - -Here is a back trace of the segfaulting thread: -The backtrace looks normal. - -Please post the QEMU command-line and the details of the segfault (which -memory access faulted?). -I was able to create another crash today and here is the qemu comand line - -/usr/bin/qemu-kvm -name guest=sles,debug-threads=on \ --S -object -secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-2-sles/master-key.aes -\ --machine s390-ccw-virtio-2.12,accel=kvm,usb=off,dump-guest-core=off \ --m 4096 -realtime mlock=off -smp 8,sockets=8,cores=1,threads=1 \ --object iothread,id=iothread1 -object iothread,id=iothread2 -uuid -b83a596b-3a1a-4ac9-9f3e-d9a4032ee52c \ --display none -no-user-config -nodefaults -chardev -socket,id=charmonitor,path=/var/lib/libvirt/qemu/domain-2-sles/monitor.sock,server,nowait --mon chardev=charmonitor,id=monitor,mode=control -rtc base=utc --no-shutdown \ --boot strict=on -drive -file=/dev/mapper/360050763998b0883980000002400002b,format=raw,if=none,id=drive-virtio-disk0,cache=none,aio=native --device -virtio-blk-ccw,iothread=iothread1,scsi=off,devno=fe.0.0001,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1 --drive -file=/dev/mapper/360050763998b0883980000002800002f,format=raw,if=none,id=drive-virtio-disk1,cache=none,aio=native --device -virtio-blk-ccw,iothread=iothread2,scsi=off,devno=fe.0.0002,drive=drive-virtio-disk1,id=virtio-disk1 --netdev tap,fd=24,id=hostnet0,vhost=on,vhostfd=26 -device -virtio-net-ccw,netdev=hostnet0,id=net0,mac=02:38:a6:36:e8:1f,devno=fe.0.0000 --chardev pty,id=charconsole0 -device -sclpconsole,chardev=charconsole0,id=console0 -device -virtio-balloon-ccw,id=balloon0,devno=fe.3.ffba -msg timestamp=on -This the latest back trace on the segfaulting thread, and it seems to -segfault in swapcontext. -Program terminated with signal SIGSEGV, Segmentation fault. -#0 0x000003ff8595202c in swapcontext () from /lib64/libc.so.6 - - -This is the remaining back trace: - -#0 0x000003ff8595202c in swapcontext () from /lib64/libc.so.6 -#1 0x000002aa33b45566 in qemu_coroutine_new () at -util/coroutine-ucontext.c:164 -#2 0x000002aa33b43eac in qemu_coroutine_create -(address@hidden <blk_aio_write_entry>, -address@hidden) at util/qemu-coroutine.c:76 -#3 0x000002aa33a954da in blk_aio_prwv (blk=0x2aa4f0efda0, -offset=<optimized out>, bytes=<optimized out>, qiov=0x3ff74019080, -address@hidden <blk_aio_write_entry>, flags=0, -cb=0x2aa338c62e8 <virtio_blk_rw_complete>, opaque=0x3ff74019020) at -block/block-backend.c:1299 -#4 0x000002aa33a9563e in blk_aio_pwritev (blk=<optimized out>, -offset=<optimized out>, qiov=<optimized out>, flags=<optimized out>, -cb=<optimized out>, opaque=0x3ff74019020) at block/block-backend.c:1400 -#5 0x000002aa338c6a38 in submit_requests (niov=<optimized out>, -num_reqs=1, start=<optimized out>, mrb=0x3ff831fe6e0, blk=<optimized -out>) at /usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:369 -#6 virtio_blk_submit_multireq (blk=<optimized out>, -address@hidden) at -/usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:426 -#7 0x000002aa338c7b78 in virtio_blk_handle_vq (s=0x2aa4f2507c8, -vq=0x3ff869df010) at /usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:620 -#8 0x000002aa338ebdf2 in virtio_queue_notify_aio_vq (vq=0x3ff869df010) -at /usr/src/debug/qemu-2.11.50/hw/virtio/virtio.c:1515 -#9 0x000002aa33b2df46 in aio_dispatch_handlers -(address@hidden) at util/aio-posix.c:406 -#10 0x000002aa33b2eb50 in aio_poll (ctx=0x2aa4f0ca050, -address@hidden) at util/aio-posix.c:692 -#11 0x000002aa33957f6a in iothread_run (opaque=0x2aa4f0c9630) at -iothread.c:60 -#12 0x000003ff86987e82 in start_thread () from /lib64/libpthread.so.0 -#13 0x000003ff85a11596 in thread_start () from /lib64/libc.so.6 -Backtrace stopped: previous frame identical to this frame (corrupt stack?) - -On Fri, Mar 02, 2018 at 10:30:57AM -0500, Farhan Ali wrote: -> -> -> -On 03/02/2018 04:23 AM, Stefan Hajnoczi wrote: -> -> On Thu, Mar 01, 2018 at 09:33:35AM -0500, Farhan Ali wrote: -> -> > Hi, -> -> > -> -> > I have been noticing some segfaults for QEMU on s390x, and I have been -> -> > hitting this issue quite reliably (at least once in 10 runs of a test -> -> > case). -> -> > The qemu version is 2.11.50, and I have systemd created coredumps -> -> > when this happens. -> -> > -> -> > Here is a back trace of the segfaulting thread: -> -> The backtrace looks normal. -> -> -> -> Please post the QEMU command-line and the details of the segfault (which -> -> memory access faulted?). -> -> -> -> -> -I was able to create another crash today and here is the qemu comand line -> -> -/usr/bin/qemu-kvm -name guest=sles,debug-threads=on \ -> --S -object -> -secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-2-sles/master-key.aes -> -\ -> --machine s390-ccw-virtio-2.12,accel=kvm,usb=off,dump-guest-core=off \ -> --m 4096 -realtime mlock=off -smp 8,sockets=8,cores=1,threads=1 \ -> --object iothread,id=iothread1 -object iothread,id=iothread2 -uuid -> -b83a596b-3a1a-4ac9-9f3e-d9a4032ee52c \ -> --display none -no-user-config -nodefaults -chardev -> -socket,id=charmonitor,path=/var/lib/libvirt/qemu/domain-2-sles/monitor.sock,server,nowait -> -> --mon chardev=charmonitor,id=monitor,mode=control -rtc base=utc -no-shutdown -> -\ -> --boot strict=on -drive -> -file=/dev/mapper/360050763998b0883980000002400002b,format=raw,if=none,id=drive-virtio-disk0,cache=none,aio=native -> --device -> -virtio-blk-ccw,iothread=iothread1,scsi=off,devno=fe.0.0001,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1 -> --drive -> -file=/dev/mapper/360050763998b0883980000002800002f,format=raw,if=none,id=drive-virtio-disk1,cache=none,aio=native -> --device -> -virtio-blk-ccw,iothread=iothread2,scsi=off,devno=fe.0.0002,drive=drive-virtio-disk1,id=virtio-disk1 -> --netdev tap,fd=24,id=hostnet0,vhost=on,vhostfd=26 -device -> -virtio-net-ccw,netdev=hostnet0,id=net0,mac=02:38:a6:36:e8:1f,devno=fe.0.0000 -> --chardev pty,id=charconsole0 -device -> -sclpconsole,chardev=charconsole0,id=console0 -device -> -virtio-balloon-ccw,id=balloon0,devno=fe.3.ffba -msg timestamp=on -> -> -> -This the latest back trace on the segfaulting thread, and it seems to -> -segfault in swapcontext. -> -> -Program terminated with signal SIGSEGV, Segmentation fault. -> -#0 0x000003ff8595202c in swapcontext () from /lib64/libc.so.6 -Please include the following gdb output: - - (gdb) disas swapcontext - (gdb) i r - -That way it's possible to see which instruction faulted and which -registers were being accessed. - -> -This is the remaining back trace: -> -> -#0 0x000003ff8595202c in swapcontext () from /lib64/libc.so.6 -> -#1 0x000002aa33b45566 in qemu_coroutine_new () at -> -util/coroutine-ucontext.c:164 -> -#2 0x000002aa33b43eac in qemu_coroutine_create -> -(address@hidden <blk_aio_write_entry>, -> -address@hidden) at util/qemu-coroutine.c:76 -> -#3 0x000002aa33a954da in blk_aio_prwv (blk=0x2aa4f0efda0, offset=<optimized -> -out>, bytes=<optimized out>, qiov=0x3ff74019080, -> -address@hidden <blk_aio_write_entry>, flags=0, -> -cb=0x2aa338c62e8 <virtio_blk_rw_complete>, opaque=0x3ff74019020) at -> -block/block-backend.c:1299 -> -#4 0x000002aa33a9563e in blk_aio_pwritev (blk=<optimized out>, -> -offset=<optimized out>, qiov=<optimized out>, flags=<optimized out>, -> -cb=<optimized out>, opaque=0x3ff74019020) at block/block-backend.c:1400 -> -#5 0x000002aa338c6a38 in submit_requests (niov=<optimized out>, num_reqs=1, -> -start=<optimized out>, mrb=0x3ff831fe6e0, blk=<optimized out>) at -> -/usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:369 -> -#6 virtio_blk_submit_multireq (blk=<optimized out>, -> -address@hidden) at -> -/usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:426 -> -#7 0x000002aa338c7b78 in virtio_blk_handle_vq (s=0x2aa4f2507c8, -> -vq=0x3ff869df010) at /usr/src/debug/qemu-2.11.50/hw/block/virtio-blk.c:620 -> -#8 0x000002aa338ebdf2 in virtio_queue_notify_aio_vq (vq=0x3ff869df010) at -> -/usr/src/debug/qemu-2.11.50/hw/virtio/virtio.c:1515 -> -#9 0x000002aa33b2df46 in aio_dispatch_handlers -> -(address@hidden) at util/aio-posix.c:406 -> -#10 0x000002aa33b2eb50 in aio_poll (ctx=0x2aa4f0ca050, -> -address@hidden) at util/aio-posix.c:692 -> -#11 0x000002aa33957f6a in iothread_run (opaque=0x2aa4f0c9630) at -> -iothread.c:60 -> -#12 0x000003ff86987e82 in start_thread () from /lib64/libpthread.so.0 -> -#13 0x000003ff85a11596 in thread_start () from /lib64/libc.so.6 -> -Backtrace stopped: previous frame identical to this frame (corrupt stack?) -> -signature.asc -Description: -PGP signature - -On 03/05/2018 06:03 AM, Stefan Hajnoczi wrote: -Please include the following gdb output: - - (gdb) disas swapcontext - (gdb) i r - -That way it's possible to see which instruction faulted and which -registers were being accessed. -here is the disas out for swapcontext, this is on a coredump with -debugging symbols enabled for qemu. So the addresses from the previous -dump is a little different. -(gdb) disas swapcontext -Dump of assembler code for function swapcontext: - 0x000003ff90751fb8 <+0>: lgr %r1,%r2 - 0x000003ff90751fbc <+4>: lgr %r0,%r3 - 0x000003ff90751fc0 <+8>: stfpc 248(%r1) - 0x000003ff90751fc4 <+12>: std %f0,256(%r1) - 0x000003ff90751fc8 <+16>: std %f1,264(%r1) - 0x000003ff90751fcc <+20>: std %f2,272(%r1) - 0x000003ff90751fd0 <+24>: std %f3,280(%r1) - 0x000003ff90751fd4 <+28>: std %f4,288(%r1) - 0x000003ff90751fd8 <+32>: std %f5,296(%r1) - 0x000003ff90751fdc <+36>: std %f6,304(%r1) - 0x000003ff90751fe0 <+40>: std %f7,312(%r1) - 0x000003ff90751fe4 <+44>: std %f8,320(%r1) - 0x000003ff90751fe8 <+48>: std %f9,328(%r1) - 0x000003ff90751fec <+52>: std %f10,336(%r1) - 0x000003ff90751ff0 <+56>: std %f11,344(%r1) - 0x000003ff90751ff4 <+60>: std %f12,352(%r1) - 0x000003ff90751ff8 <+64>: std %f13,360(%r1) - 0x000003ff90751ffc <+68>: std %f14,368(%r1) - 0x000003ff90752000 <+72>: std %f15,376(%r1) - 0x000003ff90752004 <+76>: slgr %r2,%r2 - 0x000003ff90752008 <+80>: stam %a0,%a15,184(%r1) - 0x000003ff9075200c <+84>: stmg %r0,%r15,56(%r1) - 0x000003ff90752012 <+90>: la %r2,2 - 0x000003ff90752016 <+94>: lgr %r5,%r0 - 0x000003ff9075201a <+98>: la %r3,384(%r5) - 0x000003ff9075201e <+102>: la %r4,384(%r1) - 0x000003ff90752022 <+106>: lghi %r5,8 - 0x000003ff90752026 <+110>: svc 175 - 0x000003ff90752028 <+112>: lgr %r5,%r0 -=> 0x000003ff9075202c <+116>: lfpc 248(%r5) - 0x000003ff90752030 <+120>: ld %f0,256(%r5) - 0x000003ff90752034 <+124>: ld %f1,264(%r5) - 0x000003ff90752038 <+128>: ld %f2,272(%r5) - 0x000003ff9075203c <+132>: ld %f3,280(%r5) - 0x000003ff90752040 <+136>: ld %f4,288(%r5) - 0x000003ff90752044 <+140>: ld %f5,296(%r5) - 0x000003ff90752048 <+144>: ld %f6,304(%r5) - 0x000003ff9075204c <+148>: ld %f7,312(%r5) - 0x000003ff90752050 <+152>: ld %f8,320(%r5) - 0x000003ff90752054 <+156>: ld %f9,328(%r5) - 0x000003ff90752058 <+160>: ld %f10,336(%r5) - 0x000003ff9075205c <+164>: ld %f11,344(%r5) - 0x000003ff90752060 <+168>: ld %f12,352(%r5) - 0x000003ff90752064 <+172>: ld %f13,360(%r5) - 0x000003ff90752068 <+176>: ld %f14,368(%r5) - 0x000003ff9075206c <+180>: ld %f15,376(%r5) - 0x000003ff90752070 <+184>: lam %a2,%a15,192(%r5) - 0x000003ff90752074 <+188>: lmg %r0,%r15,56(%r5) - 0x000003ff9075207a <+194>: br %r14 -End of assembler dump. - -(gdb) i r -r0 0x0 0 -r1 0x3ff8fe7de40 4396165881408 -r2 0x0 0 -r3 0x3ff8fe7e1c0 4396165882304 -r4 0x3ff8fe7dfc0 4396165881792 -r5 0x0 0 -r6 0xffffffff88004880 18446744071696304256 -r7 0x3ff880009e0 4396033247712 -r8 0x27ff89000 10736930816 -r9 0x3ff88001460 4396033250400 -r10 0x1000 4096 -r11 0x1261be0 19274720 -r12 0x3ff88001e00 4396033252864 -r13 0x14d0bc0 21826496 -r14 0x1312ac8 19999432 -r15 0x3ff8fe7dc80 4396165880960 -pc 0x3ff9075202c 0x3ff9075202c <swapcontext+116> -cc 0x2 2 - -On 03/05/2018 07:45 PM, Farhan Ali wrote: -> -> -> -On 03/05/2018 06:03 AM, Stefan Hajnoczi wrote: -> -> Please include the following gdb output: -> -> -> ->   (gdb) disas swapcontext -> ->   (gdb) i r -> -> -> -> That way it's possible to see which instruction faulted and which -> -> registers were being accessed. -> -> -> -here is the disas out for swapcontext, this is on a coredump with debugging -> -symbols enabled for qemu. So the addresses from the previous dump is a little -> -different. -> -> -> -(gdb) disas swapcontext -> -Dump of assembler code for function swapcontext: -> -  0x000003ff90751fb8 <+0>:   lgr   %r1,%r2 -> -  0x000003ff90751fbc <+4>:   lgr   %r0,%r3 -> -  0x000003ff90751fc0 <+8>:   stfpc   248(%r1) -> -  0x000003ff90751fc4 <+12>:   std   %f0,256(%r1) -> -  0x000003ff90751fc8 <+16>:   std   %f1,264(%r1) -> -  0x000003ff90751fcc <+20>:   std   %f2,272(%r1) -> -  0x000003ff90751fd0 <+24>:   std   %f3,280(%r1) -> -  0x000003ff90751fd4 <+28>:   std   %f4,288(%r1) -> -  0x000003ff90751fd8 <+32>:   std   %f5,296(%r1) -> -  0x000003ff90751fdc <+36>:   std   %f6,304(%r1) -> -  0x000003ff90751fe0 <+40>:   std   %f7,312(%r1) -> -  0x000003ff90751fe4 <+44>:   std   %f8,320(%r1) -> -  0x000003ff90751fe8 <+48>:   std   %f9,328(%r1) -> -  0x000003ff90751fec <+52>:   std   %f10,336(%r1) -> -  0x000003ff90751ff0 <+56>:   std   %f11,344(%r1) -> -  0x000003ff90751ff4 <+60>:   std   %f12,352(%r1) -> -  0x000003ff90751ff8 <+64>:   std   %f13,360(%r1) -> -  0x000003ff90751ffc <+68>:   std   %f14,368(%r1) -> -  0x000003ff90752000 <+72>:   std   %f15,376(%r1) -> -  0x000003ff90752004 <+76>:   slgr   %r2,%r2 -> -  0x000003ff90752008 <+80>:   stam   %a0,%a15,184(%r1) -> -  0x000003ff9075200c <+84>:   stmg   %r0,%r15,56(%r1) -> -  0x000003ff90752012 <+90>:   la   %r2,2 -> -  0x000003ff90752016 <+94>:   lgr   %r5,%r0 -> -  0x000003ff9075201a <+98>:   la   %r3,384(%r5) -> -  0x000003ff9075201e <+102>:   la   %r4,384(%r1) -> -  0x000003ff90752022 <+106>:   lghi   %r5,8 -> -  0x000003ff90752026 <+110>:   svc   175 -sys_rt_sigprocmask. r0 should not be changed by the system call. - -> -  0x000003ff90752028 <+112>:   lgr   %r5,%r0 -> -=> 0x000003ff9075202c <+116>:   lfpc   248(%r5) -so r5 is zero and it was loaded from r0. r0 was loaded from r3 (which is the -2nd parameter to this -function). Now this is odd. - -> -  0x000003ff90752030 <+120>:   ld   %f0,256(%r5) -> -  0x000003ff90752034 <+124>:   ld   %f1,264(%r5) -> -  0x000003ff90752038 <+128>:   ld   %f2,272(%r5) -> -  0x000003ff9075203c <+132>:   ld   %f3,280(%r5) -> -  0x000003ff90752040 <+136>:   ld   %f4,288(%r5) -> -  0x000003ff90752044 <+140>:   ld   %f5,296(%r5) -> -  0x000003ff90752048 <+144>:   ld   %f6,304(%r5) -> -  0x000003ff9075204c <+148>:   ld   %f7,312(%r5) -> -  0x000003ff90752050 <+152>:   ld   %f8,320(%r5) -> -  0x000003ff90752054 <+156>:   ld   %f9,328(%r5) -> -  0x000003ff90752058 <+160>:   ld   %f10,336(%r5) -> -  0x000003ff9075205c <+164>:   ld   %f11,344(%r5) -> -  0x000003ff90752060 <+168>:   ld   %f12,352(%r5) -> -  0x000003ff90752064 <+172>:   ld   %f13,360(%r5) -> -  0x000003ff90752068 <+176>:   ld   %f14,368(%r5) -> -  0x000003ff9075206c <+180>:   ld   %f15,376(%r5) -> -  0x000003ff90752070 <+184>:   lam   %a2,%a15,192(%r5) -> -  0x000003ff90752074 <+188>:   lmg   %r0,%r15,56(%r5) -> -  0x000003ff9075207a <+194>:   br   %r14 -> -End of assembler dump. -> -> -(gdb) i r -> -r0            0x0   0 -> -r1            0x3ff8fe7de40   4396165881408 -> -r2            0x0   0 -> -r3            0x3ff8fe7e1c0   4396165882304 -> -r4            0x3ff8fe7dfc0   4396165881792 -> -r5            0x0   0 -> -r6            0xffffffff88004880   18446744071696304256 -> -r7            0x3ff880009e0   4396033247712 -> -r8            0x27ff89000   10736930816 -> -r9            0x3ff88001460   4396033250400 -> -r10           0x1000   4096 -> -r11           0x1261be0   19274720 -> -r12           0x3ff88001e00   4396033252864 -> -r13           0x14d0bc0   21826496 -> -r14           0x1312ac8   19999432 -> -r15           0x3ff8fe7dc80   4396165880960 -> -pc            0x3ff9075202c   0x3ff9075202c <swapcontext+116> -> -cc            0x2   2 - -On 5 March 2018 at 18:54, Christian Borntraeger <address@hidden> wrote: -> -> -> -On 03/05/2018 07:45 PM, Farhan Ali wrote: -> -> 0x000003ff90752026 <+110>: svc 175 -> -> -sys_rt_sigprocmask. r0 should not be changed by the system call. -> -> -> 0x000003ff90752028 <+112>: lgr %r5,%r0 -> -> => 0x000003ff9075202c <+116>: lfpc 248(%r5) -> -> -so r5 is zero and it was loaded from r0. r0 was loaded from r3 (which is the -> -2nd parameter to this -> -function). Now this is odd. -...particularly given that the only place we call swapcontext() -the second parameter is always the address of a local variable -and can't be 0... - -thanks --- PMM - -Do you happen to run with a recent host kernel that has - -commit 7041d28115e91f2144f811ffe8a195c696b1e1d0 - s390: scrub registers on kernel entry and KVM exit - - - - - -Can you run with this on top -diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S -index 13a133a6015c..d6dc0e5e8f74 100644 ---- a/arch/s390/kernel/entry.S -+++ b/arch/s390/kernel/entry.S -@@ -426,13 +426,13 @@ ENTRY(system_call) - UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER - BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP - stmg %r0,%r7,__PT_R0(%r11) -- # clear user controlled register to prevent speculative use -- xgr %r0,%r0 - mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC - mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW - mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC - stg %r14,__PT_FLAGS(%r11) - .Lsysc_do_svc: -+ # clear user controlled register to prevent speculative use -+ xgr %r0,%r0 - # load address of system call table - lg %r10,__THREAD_sysc_table(%r13,%r12) - llgh %r8,__PT_INT_CODE+2(%r11) - - -To me it looks like that the critical section cleanup (interrupt during system -call entry) might -save the registers again into ptregs but we have already zeroed out r0. -This patch moves the clearing of r0 after sysc_do_svc, which should fix the -critical -section cleanup. - -Adding Martin and Heiko. Will spin a patch. - - -On 03/05/2018 07:54 PM, Christian Borntraeger wrote: -> -> -> -On 03/05/2018 07:45 PM, Farhan Ali wrote: -> -> -> -> -> -> On 03/05/2018 06:03 AM, Stefan Hajnoczi wrote: -> ->> Please include the following gdb output: -> ->> -> ->>   (gdb) disas swapcontext -> ->>   (gdb) i r -> ->> -> ->> That way it's possible to see which instruction faulted and which -> ->> registers were being accessed. -> -> -> -> -> -> here is the disas out for swapcontext, this is on a coredump with debugging -> -> symbols enabled for qemu. So the addresses from the previous dump is a -> -> little different. -> -> -> -> -> -> (gdb) disas swapcontext -> -> Dump of assembler code for function swapcontext: -> ->   0x000003ff90751fb8 <+0>:   lgr   %r1,%r2 -> ->   0x000003ff90751fbc <+4>:   lgr   %r0,%r3 -> ->   0x000003ff90751fc0 <+8>:   stfpc   248(%r1) -> ->   0x000003ff90751fc4 <+12>:   std   %f0,256(%r1) -> ->   0x000003ff90751fc8 <+16>:   std   %f1,264(%r1) -> ->   0x000003ff90751fcc <+20>:   std   %f2,272(%r1) -> ->   0x000003ff90751fd0 <+24>:   std   %f3,280(%r1) -> ->   0x000003ff90751fd4 <+28>:   std   %f4,288(%r1) -> ->   0x000003ff90751fd8 <+32>:   std   %f5,296(%r1) -> ->   0x000003ff90751fdc <+36>:   std   %f6,304(%r1) -> ->   0x000003ff90751fe0 <+40>:   std   %f7,312(%r1) -> ->   0x000003ff90751fe4 <+44>:   std   %f8,320(%r1) -> ->   0x000003ff90751fe8 <+48>:   std   %f9,328(%r1) -> ->   0x000003ff90751fec <+52>:   std   %f10,336(%r1) -> ->   0x000003ff90751ff0 <+56>:   std   %f11,344(%r1) -> ->   0x000003ff90751ff4 <+60>:   std   %f12,352(%r1) -> ->   0x000003ff90751ff8 <+64>:   std   %f13,360(%r1) -> ->   0x000003ff90751ffc <+68>:   std   %f14,368(%r1) -> ->   0x000003ff90752000 <+72>:   std   %f15,376(%r1) -> ->   0x000003ff90752004 <+76>:   slgr   %r2,%r2 -> ->   0x000003ff90752008 <+80>:   stam   %a0,%a15,184(%r1) -> ->   0x000003ff9075200c <+84>:   stmg   %r0,%r15,56(%r1) -> ->   0x000003ff90752012 <+90>:   la   %r2,2 -> ->   0x000003ff90752016 <+94>:   lgr   %r5,%r0 -> ->   0x000003ff9075201a <+98>:   la   %r3,384(%r5) -> ->   0x000003ff9075201e <+102>:   la   %r4,384(%r1) -> ->   0x000003ff90752022 <+106>:   lghi   %r5,8 -> ->   0x000003ff90752026 <+110>:   svc   175 -> -> -sys_rt_sigprocmask. r0 should not be changed by the system call. -> -> ->   0x000003ff90752028 <+112>:   lgr   %r5,%r0 -> -> => 0x000003ff9075202c <+116>:   lfpc   248(%r5) -> -> -so r5 is zero and it was loaded from r0. r0 was loaded from r3 (which is the -> -2nd parameter to this -> -function). Now this is odd. -> -> ->   0x000003ff90752030 <+120>:   ld   %f0,256(%r5) -> ->   0x000003ff90752034 <+124>:   ld   %f1,264(%r5) -> ->   0x000003ff90752038 <+128>:   ld   %f2,272(%r5) -> ->   0x000003ff9075203c <+132>:   ld   %f3,280(%r5) -> ->   0x000003ff90752040 <+136>:   ld   %f4,288(%r5) -> ->   0x000003ff90752044 <+140>:   ld   %f5,296(%r5) -> ->   0x000003ff90752048 <+144>:   ld   %f6,304(%r5) -> ->   0x000003ff9075204c <+148>:   ld   %f7,312(%r5) -> ->   0x000003ff90752050 <+152>:   ld   %f8,320(%r5) -> ->   0x000003ff90752054 <+156>:   ld   %f9,328(%r5) -> ->   0x000003ff90752058 <+160>:   ld   %f10,336(%r5) -> ->   0x000003ff9075205c <+164>:   ld   %f11,344(%r5) -> ->   0x000003ff90752060 <+168>:   ld   %f12,352(%r5) -> ->   0x000003ff90752064 <+172>:   ld   %f13,360(%r5) -> ->   0x000003ff90752068 <+176>:   ld   %f14,368(%r5) -> ->   0x000003ff9075206c <+180>:   ld   %f15,376(%r5) -> ->   0x000003ff90752070 <+184>:   lam   %a2,%a15,192(%r5) -> ->   0x000003ff90752074 <+188>:   lmg   %r0,%r15,56(%r5) -> ->   0x000003ff9075207a <+194>:   br   %r14 -> -> End of assembler dump. -> -> -> -> (gdb) i r -> -> r0            0x0   0 -> -> r1            0x3ff8fe7de40   4396165881408 -> -> r2            0x0   0 -> -> r3            0x3ff8fe7e1c0   4396165882304 -> -> r4            0x3ff8fe7dfc0   4396165881792 -> -> r5            0x0   0 -> -> r6            0xffffffff88004880   18446744071696304256 -> -> r7            0x3ff880009e0   4396033247712 -> -> r8            0x27ff89000   10736930816 -> -> r9            0x3ff88001460   4396033250400 -> -> r10           0x1000   4096 -> -> r11           0x1261be0   19274720 -> -> r12           0x3ff88001e00   4396033252864 -> -> r13           0x14d0bc0   21826496 -> -> r14           0x1312ac8   19999432 -> -> r15           0x3ff8fe7dc80   4396165880960 -> -> pc            0x3ff9075202c   0x3ff9075202c <swapcontext+116> -> -> cc            0x2   2 - -On 03/05/2018 02:08 PM, Christian Borntraeger wrote: -Do you happen to run with a recent host kernel that has - -commit 7041d28115e91f2144f811ffe8a195c696b1e1d0 - s390: scrub registers on kernel entry and KVM exit -Yes. -Can you run with this on top -diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S -index 13a133a6015c..d6dc0e5e8f74 100644 ---- a/arch/s390/kernel/entry.S -+++ b/arch/s390/kernel/entry.S -@@ -426,13 +426,13 @@ ENTRY(system_call) - UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER - BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP - stmg %r0,%r7,__PT_R0(%r11) -- # clear user controlled register to prevent speculative use -- xgr %r0,%r0 - mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC - mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW - mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC - stg %r14,__PT_FLAGS(%r11) - .Lsysc_do_svc: -+ # clear user controlled register to prevent speculative use -+ xgr %r0,%r0 - # load address of system call table - lg %r10,__THREAD_sysc_table(%r13,%r12) - llgh %r8,__PT_INT_CODE+2(%r11) - - -To me it looks like that the critical section cleanup (interrupt during system -call entry) might -save the registers again into ptregs but we have already zeroed out r0. -This patch moves the clearing of r0 after sysc_do_svc, which should fix the -critical -section cleanup. -Okay I will run with this. -Adding Martin and Heiko. Will spin a patch. - - -On 03/05/2018 07:54 PM, Christian Borntraeger wrote: -On 03/05/2018 07:45 PM, Farhan Ali wrote: -On 03/05/2018 06:03 AM, Stefan Hajnoczi wrote: -Please include the following gdb output: - -   (gdb) disas swapcontext -   (gdb) i r - -That way it's possible to see which instruction faulted and which -registers were being accessed. -here is the disas out for swapcontext, this is on a coredump with debugging -symbols enabled for qemu. So the addresses from the previous dump is a little -different. - - -(gdb) disas swapcontext -Dump of assembler code for function swapcontext: -   0x000003ff90751fb8 <+0>:   lgr   %r1,%r2 -   0x000003ff90751fbc <+4>:   lgr   %r0,%r3 -   0x000003ff90751fc0 <+8>:   stfpc   248(%r1) -   0x000003ff90751fc4 <+12>:   std   %f0,256(%r1) -   0x000003ff90751fc8 <+16>:   std   %f1,264(%r1) -   0x000003ff90751fcc <+20>:   std   %f2,272(%r1) -   0x000003ff90751fd0 <+24>:   std   %f3,280(%r1) -   0x000003ff90751fd4 <+28>:   std   %f4,288(%r1) -   0x000003ff90751fd8 <+32>:   std   %f5,296(%r1) -   0x000003ff90751fdc <+36>:   std   %f6,304(%r1) -   0x000003ff90751fe0 <+40>:   std   %f7,312(%r1) -   0x000003ff90751fe4 <+44>:   std   %f8,320(%r1) -   0x000003ff90751fe8 <+48>:   std   %f9,328(%r1) -   0x000003ff90751fec <+52>:   std   %f10,336(%r1) -   0x000003ff90751ff0 <+56>:   std   %f11,344(%r1) -   0x000003ff90751ff4 <+60>:   std   %f12,352(%r1) -   0x000003ff90751ff8 <+64>:   std   %f13,360(%r1) -   0x000003ff90751ffc <+68>:   std   %f14,368(%r1) -   0x000003ff90752000 <+72>:   std   %f15,376(%r1) -   0x000003ff90752004 <+76>:   slgr   %r2,%r2 -   0x000003ff90752008 <+80>:   stam   %a0,%a15,184(%r1) -   0x000003ff9075200c <+84>:   stmg   %r0,%r15,56(%r1) -   0x000003ff90752012 <+90>:   la   %r2,2 -   0x000003ff90752016 <+94>:   lgr   %r5,%r0 -   0x000003ff9075201a <+98>:   la   %r3,384(%r5) -   0x000003ff9075201e <+102>:   la   %r4,384(%r1) -   0x000003ff90752022 <+106>:   lghi   %r5,8 -   0x000003ff90752026 <+110>:   svc   175 -sys_rt_sigprocmask. r0 should not be changed by the system call. -  0x000003ff90752028 <+112>:   lgr   %r5,%r0 -=> 0x000003ff9075202c <+116>:   lfpc   248(%r5) -so r5 is zero and it was loaded from r0. r0 was loaded from r3 (which is the -2nd parameter to this -function). Now this is odd. -  0x000003ff90752030 <+120>:   ld   %f0,256(%r5) -   0x000003ff90752034 <+124>:   ld   %f1,264(%r5) -   0x000003ff90752038 <+128>:   ld   %f2,272(%r5) -   0x000003ff9075203c <+132>:   ld   %f3,280(%r5) -   0x000003ff90752040 <+136>:   ld   %f4,288(%r5) -   0x000003ff90752044 <+140>:   ld   %f5,296(%r5) -   0x000003ff90752048 <+144>:   ld   %f6,304(%r5) -   0x000003ff9075204c <+148>:   ld   %f7,312(%r5) -   0x000003ff90752050 <+152>:   ld   %f8,320(%r5) -   0x000003ff90752054 <+156>:   ld   %f9,328(%r5) -   0x000003ff90752058 <+160>:   ld   %f10,336(%r5) -   0x000003ff9075205c <+164>:   ld   %f11,344(%r5) -   0x000003ff90752060 <+168>:   ld   %f12,352(%r5) -   0x000003ff90752064 <+172>:   ld   %f13,360(%r5) -   0x000003ff90752068 <+176>:   ld   %f14,368(%r5) -   0x000003ff9075206c <+180>:   ld   %f15,376(%r5) -   0x000003ff90752070 <+184>:   lam   %a2,%a15,192(%r5) -   0x000003ff90752074 <+188>:   lmg   %r0,%r15,56(%r5) -   0x000003ff9075207a <+194>:   br   %r14 -End of assembler dump. - -(gdb) i r -r0            0x0   0 -r1            0x3ff8fe7de40   4396165881408 -r2            0x0   0 -r3            0x3ff8fe7e1c0   4396165882304 -r4            0x3ff8fe7dfc0   4396165881792 -r5            0x0   0 -r6            0xffffffff88004880   18446744071696304256 -r7            0x3ff880009e0   4396033247712 -r8            0x27ff89000   10736930816 -r9            0x3ff88001460   4396033250400 -r10           0x1000   4096 -r11           0x1261be0   19274720 -r12           0x3ff88001e00   4396033252864 -r13           0x14d0bc0   21826496 -r14           0x1312ac8   19999432 -r15           0x3ff8fe7dc80   4396165880960 -pc            0x3ff9075202c   0x3ff9075202c <swapcontext+116> -cc            0x2   2 - -On Mon, 5 Mar 2018 20:08:45 +0100 -Christian Borntraeger <address@hidden> wrote: - -> -Do you happen to run with a recent host kernel that has -> -> -commit 7041d28115e91f2144f811ffe8a195c696b1e1d0 -> -s390: scrub registers on kernel entry and KVM exit -> -> -Can you run with this on top -> -diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S -> -index 13a133a6015c..d6dc0e5e8f74 100644 -> ---- a/arch/s390/kernel/entry.S -> -+++ b/arch/s390/kernel/entry.S -> -@@ -426,13 +426,13 @@ ENTRY(system_call) -> -UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER -> -BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP -> -stmg %r0,%r7,__PT_R0(%r11) -> -- # clear user controlled register to prevent speculative use -> -- xgr %r0,%r0 -> -mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC -> -mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW -> -mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC -> -stg %r14,__PT_FLAGS(%r11) -> -.Lsysc_do_svc: -> -+ # clear user controlled register to prevent speculative use -> -+ xgr %r0,%r0 -> -# load address of system call table -> -lg %r10,__THREAD_sysc_table(%r13,%r12) -> -llgh %r8,__PT_INT_CODE+2(%r11) -> -> -> -To me it looks like that the critical section cleanup (interrupt during -> -system call entry) might -> -save the registers again into ptregs but we have already zeroed out r0. -> -This patch moves the clearing of r0 after sysc_do_svc, which should fix the -> -critical -> -section cleanup. -> -> -Adding Martin and Heiko. Will spin a patch. -Argh, yes. Thanks Chrisitan, this is it. I have been searching for the bug -for days now. The point is that if the system call handler is interrupted -after the xgr but before .Lsysc_do_svc the code at .Lcleanup_system_call -repeats the stmg for %r0-%r7 but now %r0 is already zero. - -Please commit a patch for this and I'll will queue it up immediately. - --- -blue skies, - Martin. - -"Reality continues to ruin my life." - Calvin. - -On 03/06/2018 01:34 AM, Martin Schwidefsky wrote: -On Mon, 5 Mar 2018 20:08:45 +0100 -Christian Borntraeger <address@hidden> wrote: -Do you happen to run with a recent host kernel that has - -commit 7041d28115e91f2144f811ffe8a195c696b1e1d0 - s390: scrub registers on kernel entry and KVM exit - -Can you run with this on top -diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S -index 13a133a6015c..d6dc0e5e8f74 100644 ---- a/arch/s390/kernel/entry.S -+++ b/arch/s390/kernel/entry.S -@@ -426,13 +426,13 @@ ENTRY(system_call) - UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER - BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP - stmg %r0,%r7,__PT_R0(%r11) -- # clear user controlled register to prevent speculative use -- xgr %r0,%r0 - mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC - mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW - mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC - stg %r14,__PT_FLAGS(%r11) - .Lsysc_do_svc: -+ # clear user controlled register to prevent speculative use -+ xgr %r0,%r0 - # load address of system call table - lg %r10,__THREAD_sysc_table(%r13,%r12) - llgh %r8,__PT_INT_CODE+2(%r11) - - -To me it looks like that the critical section cleanup (interrupt during system -call entry) might -save the registers again into ptregs but we have already zeroed out r0. -This patch moves the clearing of r0 after sysc_do_svc, which should fix the -critical -section cleanup. - -Adding Martin and Heiko. Will spin a patch. -Argh, yes. Thanks Chrisitan, this is it. I have been searching for the bug -for days now. The point is that if the system call handler is interrupted -after the xgr but before .Lsysc_do_svc the code at .Lcleanup_system_call -repeats the stmg for %r0-%r7 but now %r0 is already zero. - -Please commit a patch for this and I'll will queue it up immediately. -This patch does fix the QEMU crash. I haven't seen the crash after -running the test case for more than a day. Thanks to everyone for taking -a look at this problem :) -Thanks -Farhan - |