blob: 19a9a9a6aefc696328aa3d134384a6dbda805b9e (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
|
qemu 2.7.0 segfaults in qemu_co_queue_run_restart()
I've been experiencing frequent segfaults lately with qemu 2.7.0 running Ubuntu 16.04 guests. The crash usually happens in qemu_co_queue_run_restart(). I haven't seen this so far with any other guests or distros.
Here is one back trace I obtained from one of the crashing VMs.
-------------------------------------------------------------------------------------------------
(gdb) bt
#0 qemu_co_queue_run_restart (co=0x7fba8ff05aa0) at /build/pb-qemu-pssKUp/pb-qemu-2.7.0/util/qemu-coroutine-lock.c:59
#1 0x000055c1656f39a9 in qemu_coroutine_enter (co=0x7fba8ff05aa0) at /build/pb-qemu-pssKUp/pb-qemu-2.7.0/util/qemu-coroutine.c:119
#2 0x000055c1656f3e74 in qemu_co_queue_run_restart (co=0x7fba8dd20430) at /build/pb-qemu-pssKUp/pb-qemu-2.7.0/util/qemu-coroutine-lock.c:60
#3 0x000055c1656f39a9 in qemu_coroutine_enter (co=0x7fba8dd20430) at /build/pb-qemu-pssKUp/pb-qemu-2.7.0/util/qemu-coroutine.c:119
#4 0x000055c1656f3e74 in qemu_co_queue_run_restart (co=0x7fba8dd14ea0) at /build/pb-qemu-pssKUp/pb-qemu-2.7.0/util/qemu-coroutine-lock.c:60
#5 0x000055c1656f39a9 in qemu_coroutine_enter (co=0x7fba8dd14ea0) at /build/pb-qemu-pssKUp/pb-qemu-2.7.0/util/qemu-coroutine.c:119
#6 0x000055c1656f3e74 in qemu_co_queue_run_restart (co=0x7fba80c11dc0) at /build/pb-qemu-pssKUp/pb-qemu-2.7.0/util/qemu-coroutine-lock.c:60
#7 0x000055c1656f39a9 in qemu_coroutine_enter (co=0x7fba80c11dc0) at /build/pb-qemu-pssKUp/pb-qemu-2.7.0/util/qemu-coroutine.c:119
#8 0x000055c1656f3e74 in qemu_co_queue_run_restart (co=0x7fba8dd0bd70) at /build/pb-qemu-pssKUp/pb-qemu-2.7.0/util/qemu-coroutine-lock.c:60
#9 0x000055c1656f39a9 in qemu_coroutine_enter (co=0x7fba8dd0bd70) at /build/pb-qemu-pssKUp/pb-qemu-2.7.0/util/qemu-coroutine.c:119
#10 0x000055c1656f3fa0 in qemu_co_enter_next (queue=queue@entry=0x55c1669e75e0) at /build/pb-qemu-pssKUp/pb-qemu-2.7.0/util/qemu-coroutine-lock.c:106
#11 0x000055c165692060 in timer_cb (blk=0x55c1669e7590, is_write=<optimized out>) at /build/pb-qemu-pssKUp/pb-qemu-2.7.0/block/throttle-groups.c:400
#12 0x000055c16564f615 in timerlist_run_timers (timer_list=0x55c166a53e80) at /build/pb-qemu-pssKUp/pb-qemu-2.7.0/qemu-timer.c:528
#13 0x000055c16564f679 in timerlistgroup_run_timers (tlg=tlg@entry=0x55c167c81cf8) at /build/pb-qemu-pssKUp/pb-qemu-2.7.0/qemu-timer.c:564
#14 0x000055c16564ff47 in aio_dispatch (ctx=ctx@entry=0x55c167c81bb0) at /build/pb-qemu-pssKUp/pb-qemu-2.7.0/aio-posix.c:357
#15 0x000055c1656500e8 in aio_poll (ctx=0x55c167c81bb0, blocking=<optimized out>) at /build/pb-qemu-pssKUp/pb-qemu-2.7.0/aio-posix.c:479
#16 0x000055c1654b1c79 in iothread_run (opaque=0x55c167c81960) at /build/pb-qemu-pssKUp/pb-qemu-2.7.0/iothread.c:46
#17 0x00007fbc4b64f0a4 in allocate_stack (stack=<synthetic pointer>, pdp=<synthetic pointer>, attr=0x0) at allocatestack.c:416
#18 __pthread_create_2_1 (newthread=<error reading variable: Cannot access memory at address 0xffffffffffffff48>, attr=<error reading variable: Cannot access memory at address 0xffffffffffffff40>,
start_routine=<error reading variable: Cannot access memory at address 0xffffffffffffff58>, arg=<error reading variable: Cannot access memory at address 0xffffffffffffff50>) at pthread_create.c:539
Backtrace stopped: Cannot access memory at address 0x8
-------------------------------------------------------------------------------------------------
The code that crashes is this
-------------------------------------------------------------------------------------------------
void qemu_co_queue_run_restart(Coroutine *co)
{
Coroutine *next;
trace_qemu_co_queue_run_restart(co);
while ((next = QSIMPLEQ_FIRST(&co->co_queue_wakeup))) {
QSIMPLEQ_REMOVE_HEAD(&co->co_queue_wakeup, co_queue_next); <--- Crash occurs here this time
qemu_coroutine_enter(next);
}
}
-------------------------------------------------------------------------------------------------
Expanding the macro QSIMPLEQ_REMOVE_HEAD gives us
-------------------------------------------------------------------------------------------------
#define QSIMPLEQ_REMOVE_HEAD(head, field) do { \
if (((head)->sqh_first = (head)->sqh_first->field.sqe_next) == NULL)\
(head)->sqh_last = &(head)->sqh_first; \
} while (/*CONSTCOND*/0)
-------------------------------------------------------------------------------------------------
which corrsponds to
-------------------------------------------------------------------------------------------------
if (((&co->co_queue_wakeup)->sqh_first = (&co->co_queue_wakeup)->sqh_first->co_queue_next.sqe_next) == NULL)\
(&co->co_queue_wakeup)->sqh_last = &(&co->co_queue_wakeup)->sqh_first;
-------------------------------------------------------------------------------------------------
Debugging the list we see
-------------------------------------------------------------------------------------------------
(gdb) print *(&co->co_queue_wakeup->sqh_first)
$6 = (struct Coroutine *) 0x1000
(gdb) print *(&co->co_queue_wakeup->sqh_first->co_queue_next)
Cannot access memory at address 0x1030
-------------------------------------------------------------------------------------------------
So the data in co->co_queue_wakeup->sqh_first is corrupted and represents an invalid address. Any idea why is that?
|