1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
|
other: 0.806
device: 0.772
KVM: 0.767
vnc: 0.742
permissions: 0.735
graphic: 0.730
performance: 0.717
debug: 0.706
network: 0.679
socket: 0.673
boot: 0.660
files: 0.641
semantic: 0.639
PID: 0.608
qemu segfaults in virtio-scsi driver if underlying device returns -EIO
Reported downstream in Fedora: https://bugzilla.redhat.com/show_bug.cgi?id=1650975
Using qemu from git and reasonably recent nbdkit, this command injects -EIO
errors into the block device which virtio-scsi is reading from:
$ nbdkit --filter=error memory size=64M error-rate=100% \
--run 'x86_64-softmmu/qemu-system-x86_64 -device virtio-scsi,id=scsi -drive file=$nbd,format=raw,id=hd0,if=none -device scsi-hd,drive=hd0'
nbdkit: memory[1]: error: injecting EIO error into pread
nbdkit: memory[1]: error: injecting EIO error into pread
qemu-system-x86_64: hw/scsi/scsi-bus.c:1374: scsi_req_complete: Assertion `req->status == -1' failed.
The stack trace is:
Thread 5 (Thread 0x7f33e1f8b700 (LWP 10474)):
#0 0x00007f33fe0bf371 in __GI___poll (fds=0x559b07199490, nfds=1, timeout=-1)
at ../sysdeps/unix/sysv/linux/poll.c:29
#1 0x00007f34061df5e6 in () at /lib64/libglib-2.0.so.0
#2 0x00007f34061df710 in g_main_context_iteration ()
at /lib64/libglib-2.0.so.0
#3 0x00007f34061df761 in () at /lib64/libglib-2.0.so.0
#4 0x00007f34062086ea in () at /lib64/libglib-2.0.so.0
#5 0x00007f33fe19b58e in start_thread (arg=<optimized out>)
at pthread_create.c:486
#6 0x00007f33fe0ca593 in clone ()
at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 4 (Thread 0x7f33e3fff700 (LWP 10473)):
#0 0x00007f33fe1a4a8d in __lll_lock_wait ()
at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:103
#1 0x00007f33fe19ddf8 in __GI___pthread_mutex_lock (mutex=mutex@entry=0x559b054697a0 <qemu_global_mutex>) at ../nptl/pthread_mutex_lock.c:78
#2 0x0000559b04f6b103 in qemu_mutex_lock_impl (mutex=0x559b054697a0 <qemu_global_mutex>, file=0x559b04f87041 "/home/rjones/d/qemu/exec.c", line=3197)
at util/qemu-thread-posix.c:66
#3 0x0000559b04b722ee in qemu_mutex_lock_iothread_impl (file=file@entry=0x559b04f87041 "/home/rjones/d/qemu/exec.c", line=line@entry=3197)
at /home/rjones/d/qemu/cpus.c:1845
#4 0x0000559b04b31859 in prepare_mmio_access (mr=<optimized out>, mr=<optimized out>) at /home/rjones/d/qemu/exec.c:3197
#5 0x0000559b04b381d4 in address_space_ldub (as=<optimized out>, addr=<optimized out>, attrs=..., result=result@entry=0x0)
at /home/rjones/d/qemu/memory_ldst.inc.c:188
#6 0x0000559b04c61cd0 in helper_inb (env=<optimized out>, port=<optimized out>) at /home/rjones/d/qemu/target/i386/cpu.h:1846
#7 0x00007f33e889dc3e in code_gen_buffer ()
#8 0x0000559b04bb3b87 in cpu_tb_exec (itb=<optimized out>, cpu=0x7f33e8876100 <code_gen_buffer+684243>) at /home/rjones/d/qemu/accel/tcg/cpu-exec.c:171
#9 0x0000559b04bb3b87 in cpu_loop_exec_tb (tb_exit=<synthetic pointer>, last_tb=<synthetic pointer>, tb=<optimized out>, cpu=0x7f33e8876100 <code_gen_buffer+684243>) at /home/rjones/d/qemu/accel/tcg/cpu-exec.c:615
#10 0x0000559b04bb3b87 in cpu_exec (cpu=cpu@entry=0x559b05db57a0)
at /home/rjones/d/qemu/accel/tcg/cpu-exec.c:725
#11 0x0000559b04b7088f in tcg_cpu_exec (cpu=0x559b05db57a0)
at /home/rjones/d/qemu/cpus.c:1425
#12 0x0000559b04b72c03 in qemu_tcg_cpu_thread_fn (arg=0x559b05db57a0)
at /home/rjones/d/qemu/cpus.c:1729
#13 0x0000559b04b72c03 in qemu_tcg_cpu_thread_fn (arg=arg@entry=0x559b05db57a0)
at /home/rjones/d/qemu/cpus.c:1703
#14 0x0000559b04f6afba in qemu_thread_start (args=<optimized out>)
at util/qemu-thread-posix.c:498
#15 0x00007f33fe19b58e in start_thread (arg=<optimized out>)
at pthread_create.c:486
#16 0x00007f33fe0ca593 in clone ()
at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 3 (Thread 0x7f33e178a700 (LWP 10475)):
#0 0x00007f33fe0bf371 in __GI___poll (fds=0x559b071aa760, nfds=2, timeout=-1)
at ../sysdeps/unix/sysv/linux/poll.c:29
#1 0x00007f34061df5e6 in () at /lib64/libglib-2.0.so.0
#2 0x00007f34061df9a2 in g_main_loop_run () at /lib64/libglib-2.0.so.0
#3 0x00007f34032ca90a in () at /lib64/libgio-2.0.so.0
#4 0x00007f34062086ea in () at /lib64/libglib-2.0.so.0
#5 0x00007f33fe19b58e in start_thread (arg=<optimized out>)
at pthread_create.c:486
#6 0x00007f33fe0ca593 in clone ()
at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 2 (Thread 0x7f33eb050700 (LWP 10471)):
#0 0x00007f33fe1a5400 in __GI___nanosleep (requested_time=0x7f33eb04d270, remaining=0x7f33eb04d280) at ../sysdeps/unix/sysv/linux/nanosleep.c:28
#1 0x00007f3406209e17 in g_usleep () at /lib64/libglib-2.0.so.0
#2 0x0000559b04f7cb80 in call_rcu_thread (opaque=opaque@entry=0x0)
at util/rcu.c:253
#3 0x0000559b04f6afba in qemu_thread_start (args=<optimized out>)
at util/qemu-thread-posix.c:498
#4 0x00007f33fe19b58e in start_thread (arg=<optimized out>)
at pthread_create.c:486
#5 0x00007f33fe0ca593 in clone ()
at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
Thread 1 (Thread 0x7f33eb9b42c0 (LWP 10470)):
#0 0x00007f33fe00553f in __GI_raise (sig=sig@entry=6)
at ../sysdeps/unix/sysv/linux/raise.c:50
#1 0x00007f33fdfef895 in __GI_abort () at abort.c:79
#2 0x00007f33fdfef769 in __assert_fail_base (fmt=0x7f33fe156ea8 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=0x559b050203cf "req->status == -1", file=0x559b050203aa "hw/scsi/scsi-bus.c", line=1374, function=0x559b05020dc0 <__PRETTY_FUNCTION__.32414> "scsi_req_complete") at assert.c:92
#3 0x00007f33fdffd9f6 in __GI___assert_fail (assertion=assertion@entry=0x559b050203cf "req->status == -1", file=file@entry=0x559b050203aa "hw/scsi/scsi-bus.c", line=line@entry=1374, function=function@entry=0x559b05020dc0 <__PRETTY_FUNCTION__.32414> "scsi_req_complete") at assert.c:101
#4 0x0000559b04da23c0 in scsi_req_complete (req=<optimized out>, status=<optimized out>) at hw/scsi/scsi-bus.c:1374
#5 0x0000559b04d9cc60 in scsi_dma_complete_noio (r=0x559b069c3600, ret=<optimized out>) at hw/scsi/scsi-disk.c:281
#6 0x0000559b04d9cd0f in scsi_dma_complete (opaque=0x559b069c3600, ret=-5)
at hw/scsi/scsi-disk.c:302
#7 0x0000559b04c91607 in dma_complete (ret=-5, dbs=0x559b07808000)
at dma-helpers.c:116
#8 0x0000559b04c91607 in dma_blk_cb (opaque=0x559b07808000, ret=-5)
at dma-helpers.c:138
#9 0x0000559b04ec411e in blk_aio_complete (acb=0x559b07514fa0)
at block/block-backend.c:1345
#10 0x0000559b04f7e32b in coroutine_trampoline (i0=<optimized out>, i1=<optimized out>) at util/coroutine-ucontext.c:116
#11 0x00007f33fe01b200 in __start_context ()
at ../sysdeps/unix/sysv/linux/x86_64/__start_context.S:91
#12 0x00007ffc0896b040 in ()
#13 0x0000000000000000 in ()
I bisected this to:
40dce4ee61c68395f6d463fae792f61b7c003bce is the first bad commit
commit 40dce4ee61c68395f6d463fae792f61b7c003bce
Author: Paolo Bonzini <email address hidden>
Date: Sat Oct 13 11:52:34 2018 +0200
scsi-disk: fix rerror/werror=ignore
rerror=ignore was returning true from scsi_handle_rw_error but the callers were not
calling scsi_req_complete when rerror=ignore returns true (this is the correct thing
to do when true is returned after executing a passthrough command). Fix this by
calling it in scsi_handle_rw_error.
Signed-off-by: Paolo Bonzini <email address hidden>
:040000 040000 311386b9b91d77840a849459ab6ae41a37fd7f42 8adcda67d7487bcc18966f096c9923da3b8dc0b9 M hw
Kevin suggested this change, which works for me:
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 6eb258d3f3..0e9027c8f3 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -482,7 +482,7 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed)
if (action == BLOCK_ERROR_ACTION_STOP) {
scsi_req_retry(&r->req);
}
- return false;
+ return true;
}
static void scsi_write_complete_noio(SCSIDiskReq *r, int ret)
The fix as suggested in comment 1 is now in QEMU master as commit 1c7f618f689b0b5; this is in 3.1.0-rc3 and will be in the final 3.1 release.
|