1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
|
QEMU causes host hang / reset on PPC64EL
QEMU causes a host hang / reset on PPC64EL when used in KVM + HV mode (kvm_hv module).
After a random amount of uptime, starting new QEMU virtual machines will cause the host to experience a soft CPU lockup. Depending on configuration and other random factors the host will either checkstop and reboot, or hang indefinitely. The following stacktrace was pulled from an instance where the host simply hung after starting a fourth virtual machine.
Command line:
qemu-system-ppc64 --enable-kvm -name pbuild-vnode001 -M pseries -cpu host -smp 14,cores=14,threads=1,sockets=1 -m 64G -realtime mlock=on -kernel vmlinux-4.7.0-1-powerpc64le -initrd initrd.img-4.7.0-1-powerpc64le
Lockup trace:
[ 527.393933] KVM guest htab at c000003ae4000000 (order 29), LPID 4
[ 574.637695] INFO: rcu_sched self-detected stall on CPU
[ 574.637799] 112-...: (5249 ticks this GP) idle=699/140000000000001/0 softirq=5358/5382 fqs=5072
[ 574.637877] (t=5250 jiffies g=19853 c=19852 q=64401)
[ 574.637947] Task dump for CPU 112:
[ 574.637982] qemu-system-ppc R running task 0 12037 11828 0x00040004
[ 574.638051] Call Trace:
[ 574.638081] [c000001c1cddb430] [c0000000000f2710] sched_show_task+0xe0/0x180 (unreliable)
[ 574.638164] [c000001c1cddb4a0] [c0000000001326f4] rcu_dump_cpu_stacks+0xe4/0x150
[ 574.638246] [c000001c1cddb4f0] [c000000000137a04] rcu_check_callbacks+0x6b4/0x9c0
[ 574.638328] [c000001c1cddb610] [c00000000013f7c4] update_process_times+0x54/0xa0
[ 574.638409] [c000001c1cddb640] [c000000000156c28] tick_sched_handle.isra.5+0x48/0xe0
[ 574.638489] [c000001c1cddb680] [c000000000156d24] tick_sched_timer+0x64/0xd0
[ 574.638602] [c000001c1cddb6c0] [c000000000140274] __hrtimer_run_queues+0x124/0x420
[ 574.638683] [c000001c1cddb750] [c00000000014123c] hrtimer_interrupt+0xec/0x2c0
[ 574.638765] [c000001c1cddb810] [c00000000001fe5c] __timer_interrupt+0x8c/0x270
[ 574.638847] [c000001c1cddb860] [c00000000002053c] timer_interrupt+0x9c/0xe0
[ 574.638915] [c000001c1cddb890] [c000000000002750] decrementer_common+0x150/0x180
[ 574.639001] --- interrupt: 901 at kvmppc_hv_get_dirty_log+0x1c4/0x570 [kvm_hv]
[ 574.639001] LR = kvmppc_hv_get_dirty_log+0x1f8/0x570 [kvm_hv]
[ 574.639114] [c000001c1cddbc30] [d00000001a524980] kvm_vm_ioctl_get_dirty_log_hv+0xd0/0x170 [kvm_hv]
[ 574.639209] [c000001c1cddbc80] [d00000001a4d4140] kvm_vm_ioctl_get_dirty_log+0x40/0x60 [kvm]
[ 574.639291] [c000001c1cddbcb0] [d00000001a4ca3cc] kvm_vm_ioctl+0x3fc/0x760 [kvm]
[ 574.639372] [c000001c1cddbd40] [c0000000002d9e18] do_vfs_ioctl+0xd8/0x8e0
[ 574.639442] [c000001c1cddbde0] [c0000000002da6f4] SyS_ioctl+0xd4/0xf0
[ 574.639512] [c000001c1cddbe30] [c000000000009260] system_call+0x38/0x108
[ 580.601573] NMI watchdog: BUG: soft lockup - CPU#112 stuck for 22s! [qemu-system-ppc:12037]
[ 580.601655] Modules linked in: xt_tcpudp(E) rpcsec_gss_krb5(E) nfsv4(E) dns_resolver(E) ext4(E) ecb(E) crc16(E) jbd2(E) mbcache(E) tun(E) btrfs(E) crc32c_generic(E) raid6_pq(E) xor(E) dm_crypt(E) xts(E) gf128mul(E) algif_skcipher(E) af_alg(E) dm_mod(E) bonding(E) cpufreq_stats(E) iptable_filter(E) ip_tables(E) x_tables(E) bridge(E) stp(E) llc(E) ipmi_devintf(E) ipmi_msghandler(E) i2c_dev(E) fuse(E) raid1(E) md_mod(E) ses(E) sd_mod(E) enclosure(E) sg(E) binfmt_misc(E) radeon(E) ttm(E) drm_kms_helper(E) snd_hda_codec_hdmi(E) snd_hda_intel(E) drm(E) snd_hda_codec(E) snd_hda_core(E) snd_hwdep(E) snd_pcm(E) syscopyarea(E) sysfillrect(E) sysimgblt(E) fb_sys_fops(E) snd_timer(E) evdev(E) i2c_algo_bit(E) snd(E) soundcore(E) at24(E) ahci(E) mpt3sas(E) nvmem_core(E) libahci(E) raid_class(E) scsi_transport_sas(E) powernv_rng(E) rng_core(E) uinput(E) kvm_hv(E) kvm(E) ib_srp(E) scsi_transport_srp(E) ofpart(E) powernv_flash(E) mtd(E) nfsd(E) opal_prd(E) auth_rpcgss(E) parport_pc(E) lp(E) parport(E) autofs4(E) nfsv3(E) nfs_acl(E) nfs(E) lockd(E) grace(E) sunrpc(E) fscache(E) ib_ipoib(E) ib_umad(E) rdma_ucm(E) ib_uverbs(E) rdma_cm(E) iw_cm(E) ib_cm(E) ib_sa(E) configfs(E) hid_generic(E) usbhid(E) hid(E) xhci_pci(E) xhci_hcd(E) usbcore(E) tg3(E) usb_common(E) ptp(E) pps_core(E) libphy(E) ib_mthca(E) ib_mad(E) ib_core(E) ib_addr(E)
[ 580.603295] CPU: 112 PID: 12037 Comm: qemu-system-ppc Tainted: G E 4.6.0-2-powerpc64le #1 Debian 4.6.3-1
[ 580.603386] task: c000001f706f0180 ti: c000001c1cdd8000 task.ti: c000001c1cdd8000
[ 580.603456] NIP: d00000001a52cb54 LR: d00000001a52cb88 CTR: 0000000000000000
[ 580.603524] REGS: c000001c1cddb900 TRAP: 0901 Tainted: G E (4.6.0-2-powerpc64le Debian 4.6.3-1)
[ 580.603613] MSR: 9000000000009033 <SF,HV,EE,ME,IR,DR,RI,LE> CR: 24048444 XER: 00000000
[ 580.603784] CFAR: d00000001a52cb18 SOFTE: 1
GPR00: d00000001a52cb88 c000001c1cddbb80 d00000001a53c580 40016e77790fe611
GPR04: 0000000beceac194 00000000019a4544 0000000000000001 0000000000000000
GPR08: 4000000000000000 0000000000000000 0000000000000001 8000000101a9b824
GPR12: c00000000009aea0 c00000000fbbf000
[ 580.604205] NIP [d00000001a52cb54] kvmppc_hv_get_dirty_log+0x1c4/0x570 [kvm_hv]
[ 580.604274] LR [d00000001a52cb88] kvmppc_hv_get_dirty_log+0x1f8/0x570 [kvm_hv]
[ 580.604341] Call Trace:
[ 580.604366] [c000001c1cddbb80] [d00000001a52cb88] kvmppc_hv_get_dirty_log+0x1f8/0x570 [kvm_hv] (unreliable)
[ 580.604469] [c000001c1cddbc30] [d00000001a524980] kvm_vm_ioctl_get_dirty_log_hv+0xd0/0x170 [kvm_hv]
[ 580.604562] [c000001c1cddbc80] [d00000001a4d4140] kvm_vm_ioctl_get_dirty_log+0x40/0x60 [kvm]
[ 580.604685] [c000001c1cddbcb0] [d00000001a4ca3cc] kvm_vm_ioctl+0x3fc/0x760 [kvm]
[ 580.604765] [c000001c1cddbd40] [c0000000002d9e18] do_vfs_ioctl+0xd8/0x8e0
[ 580.604837] [c000001c1cddbde0] [c0000000002da6f4] SyS_ioctl+0xd4/0xf0
[ 580.604906] [c000001c1cddbe30] [c000000000009260] system_call+0x38/0x108
[ 580.604977] Instruction dump:
[ 580.605012] 2ba90003 419effc4 7fa97000 419effbc 81314310 2f890000 409effb0 7d00b0a8
[ 580.605127] 7d09a039 40820014 7d08a378 7d00b1ad <41e20008> 7e89a378 4c00012c 2fa90000
[ 637.648374] INFO: rcu_sched self-detected stall on CPU
[ 637.648473] 112-...: (21002 ticks this GP) idle=699/140000000000001/0 softirq=5358/5382 fqs=20825
[ 637.648554] (t=21003 jiffies g=19853 c=19852 q=260741)
[ 637.648612] Task dump for CPU 112:
[ 637.648646] qemu-system-ppc R running task 0 12037 11828 0x00040004
[ 637.648719] Call Trace:
[ 637.648745] [c000001c1cddb430] [c0000000000f2710] sched_show_task+0xe0/0x180 (unreliable)
[ 637.648825] [c000001c1cddb4a0] [c0000000001326f4] rcu_dump_cpu_stacks+0xe4/0x150
[ 637.648903] [c000001c1cddb4f0] [c000000000137a04] rcu_check_callbacks+0x6b4/0x9c0
[ 637.648985] [c000001c1cddb610] [c00000000013f7c4] update_process_times+0x54/0xa0
[ 637.649067] [c000001c1cddb640] [c000000000156c28] tick_sched_handle.isra.5+0x48/0xe0
[ 637.649147] [c000001c1cddb680] [c000000000156d24] tick_sched_timer+0x64/0xd0
[ 637.649216] [c000001c1cddb6c0] [c000000000140274] __hrtimer_run_queues+0x124/0x420
[ 637.649296] [c000001c1cddb750] [c00000000014123c] hrtimer_interrupt+0xec/0x2c0
[ 637.649374] [c000001c1cddb810] [c00000000001fe5c] __timer_interrupt+0x8c/0x270
[ 637.649456] [c000001c1cddb860] [c00000000002053c] timer_interrupt+0x9c/0xe0
[ 637.649525] [c000001c1cddb890] [c000000000002750] decrementer_common+0x150/0x180
[ 637.649645] --- interrupt: 901 at kvmppc_hv_get_dirty_log+0x1c4/0x570 [kvm_hv]
[ 637.649645] LR = kvmppc_hv_get_dirty_log+0x1f8/0x570 [kvm_hv]
[ 637.649760] [c000001c1cddbc30] [d00000001a524980] kvm_vm_ioctl_get_dirty_log_hv+0xd0/0x170 [kvm_hv]
[ 637.649854] [c000001c1cddbc80] [d00000001a4d4140] kvm_vm_ioctl_get_dirty_log+0x40/0x60 [kvm]
[ 637.649936] [c000001c1cddbcb0] [d00000001a4ca3cc] kvm_vm_ioctl+0x3fc/0x760 [kvm]
[ 637.650014] [c000001c1cddbd40] [c0000000002d9e18] do_vfs_ioctl+0xd8/0x8e0
[ 637.650083] [c000001c1cddbde0] [c0000000002da6f4] SyS_ioctl+0xd4/0xf0
[ 637.650153] [c000001c1cddbe30] [c000000000009260] system_call+0x38/0x108
|