1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
|
id = 1738
title = "qemu-system-x86_64 crash during kernel PCI init with large number of busses"
state = "opened"
created_at = "2023-06-27T17:26:05.642Z"
closed_at = "n/a"
labels = ["device: PCI"]
url = "https://gitlab.com/qemu-project/qemu/-/issues/1738"
host-os = "Ubuntu 22.04"
host-arch = "x86_64"
qemu-version = "8.0.2"
guest-os = "Linux"
guest-arch = "x86_64"
description = """When booting a Linux kernel under qemu-system-x86_64 (tcg) using a large number of PCI busses (25+), qemu crashes with an invalid memory access during kernel PCI init phase. Failure rate is not 100%; some kernel boots do succeed, but the failure rate increases as the number of pci busses increases. Note that no initrd is needed; crash happens before kernel even gets to the point of trying to mount root."""
reproduce = """Launch qemu using command line above along with 4.19.x kernel image (have not tested 5.x). It may take a few tries but within about 20 boot attempts, qemu will crash at least once."""
additional = """Final kernel logs before crash:
```
...
[ 1.413615] ACPI: Added _OSI(Module Device)
[ 1.413947] ACPI: Added _OSI(Processor Device)
[ 1.414262] ACPI: Added _OSI(3.0 _SCP Extensions)
[ 1.414421] ACPI: Added _OSI(Processor Aggregator Device)
[ 1.414922] ACPI: Added _OSI(Linux-Dell-Video)
[ 1.415445] ACPI: Added _OSI(Linux-Lenovo-NV-HDMI-Audio)
[ 1.444489] ACPI: 1 ACPI AML tables successfully acquired and loaded
[ 1.468218] ACPI: Interpreter enabled
[ 1.469897] ACPI: (supports S0 S3 S4 S5)
[ 1.470200] ACPI: Using IOAPIC for interrupt routing
[ 1.471811] PCI: Using host bridge windows from ACPI; if necessary, use "pci=nocrs" and repog
[ 1.474421] ACPI: Enabled 2 GPEs in block 00 to 3F
[ 1.536854] ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
[ 1.537996] acpi PNP0A08:00: _OSC: OS supports [ExtendedConfig ASPM ClockPM Segments MSI]
[ 1.540988] acpi PNP0A08:00: _OSC: platform does not support [LTR]
[ 1.542232] acpi PNP0A08:00: _OSC: OS now controls [PME AER PCIeCapability]
[ 1.546310] PCI host bridge to bus 0000:00
[ 1.546650] pci_bus 0000:00: root bus resource [io 0x0000-0x0cf7 window]
[ 1.547471] pci_bus 0000:00: root bus resource [io 0x0d00-0xffff window]
[ 1.548039] pci_bus 0000:00: root bus resource [mem 0x000a0000-0x000bffff window]
[ 1.548421] pci_bus 0000:00: root bus resource [mem 0x80000000-0xafffffff window]
[ 1.549086] pci_bus 0000:00: root bus resource [mem 0xc0000000-0xfebfffff window]
[ 1.549945] pci_bus 0000:00: root bus resource [mem 0x280000000-0xa7fffffff window]
[ 1.550994] pci_bus 0000:00: root bus resource [bus 00-ff]
<...crash...>
```
QEMU backtrace:
```
$ gdb build/qemu-system-x86_64 core.3475232
<...>
Reading symbols from build/qemu-system-x86_64...
[New LWP 3475243]
[New LWP 3475244]
[New LWP 3475241]
[New LWP 3475238]
[New LWP 3475245]
[New LWP 3475239]
[New LWP 3475246]
[New LWP 3475240]
[New LWP 3475232]
[New LWP 3475242]
[New LWP 3475236]
[New LWP 3475247]
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
Core was generated by `build/qemu-system-x86_64 -m 8192 -smp cpus=10,threads=2 -nographic -machine q35'.
Program terminated with signal SIGSEGV, Segmentation fault.
#0 0x0000556065897e0e in memory_region_dispatch_write (mr=mr@entry=0x0, addr=addr@entry=768, data=data@entry=253,
op=op@entry=MO_32, attrs=...) at ../softmmu/memory.c:1497
1497\t if (mr->alias) {
[Current thread is 1 (Thread 0x7fe2e951d640 (LWP 3475243))]
(gdb) bt full
#0 0x0000556065897e0e in memory_region_dispatch_write
(mr=mr@entry=0x0, addr=addr@entry=768, data=data@entry=253, op=op@entry=MO_32, attrs=...) at ../softmmu/memory.c:1497
size = <optimized out>
#1 0x00005560659112c2 in io_writex
(env=env@entry=0x556066bbd5d0, full=0x7fe08401ec70, mmu_idx=mmu_idx@entry=2, val=val@entry=253, addr=addr@entry=18446744073699050240, retaddr=retaddr@entry=140611404753775, op=MO_32) at ../accel/tcg/cputlb.c:1430
_iothread_lock_auto = 0x1
cpu = 0x556066bbb1e0
mr_offset = 768
section = 0x7fe078d7d570
mr = 0x0
r = <optimized out>
#2 0x0000556065915f14 in store_helper
(op=MO_32, retaddr=140611404753775, oi=<optimized out>, val=<optimized out>, addr=18446744073699050240, env=0x556066bbd5d0)
at ../accel/tcg/cputlb.c:2454
full = <optimized out>
need_swap = false
a_bits = <optimized out>
mmu_idx = 2
tlb_addr = <optimized out>
haddr = <optimized out>
size = 4
index = <optimized out>
entry = 0x7fe08401bc40
#3 full_le_stl_mmu (env=0x556066bbd5d0, addr=18446744073699050240, val=253, oi=<optimized out>, retaddr=140611404753775)
at ../accel/tcg/cputlb.c:2542
#4 0x00007fe2a4d4eb6f in code_gen_buffer ()
#5 0x00005560659065bb in cpu_tb_exec
(cpu=cpu@entry=0x556066bbb1e0, itb=itb@entry=0x7fe2a4d4e9c0 <code_gen_buffer+13953427>, tb_exit=tb_exit@entry=0x7fe2e951c758)
at ../accel/tcg/cpu-exec.c:460
env = 0x556066bbd5d0
ret = <optimized out>
last_tb = <optimized out>
tb_ptr = 0x7fe2a4d4ea80 <code_gen_buffer+13953619>
__PRETTY_FUNCTION__ = "cpu_tb_exec"
#6 0x0000556065906ab6 in cpu_loop_exec_tb
(tb_exit=0x7fe2e951c758, last_tb=<synthetic pointer>, pc=<optimized out>, tb=0x7fe2a4d4e9c0 <code_gen_buffer+13953427>, cpu=0x556066bbb1e0) at ../accel/tcg/cpu-exec.c:893
insns_left = <optimized out>
__PRETTY_FUNCTION__ = "cpu_loop_exec_tb"
tb = 0x7fe2a4d4e9c0 <code_gen_buffer+13953427>
flags = <optimized out>
cflags = 4280811520
cs_base = <optimized out>
pc = <optimized out>
last_tb = <optimized out>
tb_exit = 0
--Type <RET> for more, q to quit, c to continue without paging--
ret = <optimized out>
#7 cpu_exec_loop (cpu=cpu@entry=0x556066bbb1e0, sc=sc@entry=0x7fe2e951c7f0) at ../accel/tcg/cpu-exec.c:1013
tb = 0x7fe2a4d4e9c0 <code_gen_buffer+13953427>
flags = <optimized out>
cflags = 4280811520
cs_base = <optimized out>
pc = <optimized out>
last_tb = <optimized out>
tb_exit = 0
ret = <optimized out>
#8 0x0000556065907311 in cpu_exec_setjmp (cpu=cpu@entry=0x556066bbb1e0, sc=sc@entry=0x7fe2e951c7f0) at ../accel/tcg/cpu-exec.c:1043
__func__ = "cpu_exec_setjmp"
#9 0x00005560659079f0 in cpu_exec (cpu=cpu@entry=0x556066bbb1e0) at ../accel/tcg/cpu-exec.c:1069
ret = <optimized out>
sc = {diff_clk = 0, last_cpu_icount = 0, realtime_clock = 0}
#10 0x000055606592a854 in tcg_cpus_exec (cpu=cpu@entry=0x556066bbb1e0) at ../accel/tcg/tcg-accel-ops.c:81
ret = <optimized out>
__PRETTY_FUNCTION__ = "tcg_cpus_exec"
#11 0x000055606592a9a7 in mttcg_cpu_thread_fn (arg=arg@entry=0x556066bbb1e0) at ../accel/tcg/tcg-accel-ops-mttcg.c:95
r = <optimized out>
force_rcu = {notifier = {notify = 0x55606592aac0 <mttcg_force_rcu>, node = {le_next = 0x0, le_prev = 0x7fe2e951d4a0}}, cpu = 0x556066bbb1e0}
cpu = 0x556066bbb1e0
__PRETTY_FUNCTION__ = "mttcg_cpu_thread_fn"
__func__ = "mttcg_cpu_thread_fn"
#12 0x0000556065aa2e91 in qemu_thread_start (args=<optimized out>) at ../util/qemu-thread-posix.c:541
__cancel_buf = {__cancel_jmp_buf = {{__cancel_jmp_buf = {140612553791040, -3809744250012005023, 93872529245600, 25, 140612607756368, 140729970282144, -7051494707616903839, -3809738403745854111}, __mask_was_saved = 0}}, __pad = {0x7fe2e951c970, 0x0, 0x0, 0x0}}
__cancel_routine = 0x556065aa2ee0 <qemu_thread_atexit_notify>
__not_first_call = <optimized out>
start_routine = 0x55606592a8a0 <mttcg_cpu_thread_fn>
arg = 0x556066bbb1e0
r = <optimized out>
#13 0x00007fe2ec894b43 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
ret = <optimized out>
pd = <optimized out>
unwind_buf = {cancel_jmp_buf = {{jmp_buf = {140729970281792, 7053160723592154465, 140612553791040, 25, 140612607756368, 140729970282144, -7051494707570766495, -7051505217351676575}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}}
not_first_call = <optimized out>
#14 0x00007fe2ec926a00 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
```"""
|