summary refs log tree commit diff stats
path: root/results/scraper/launchpad/1906193
blob: e3d3ea6f3394f937aebcfbb461033dd011a3592a (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
riscv32 user mode emulation: fork return values broken

When running in a chroot with riscv32 (on x86_64; qemu git master as of today):

The following short program forks; the child immediately returns with exit(42). The parent checks for the return value - and obtains 40!

gcc-10.2

===============================================
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <sys/wait.h>

main(c, v)
     int c;
     char **v;
{
  pid_t pid, p;
  int s, i, n;

  s = 0;
  pid = fork();
  if (pid == 0)
    exit(42);

  /* wait for the process */
  p = wait(&s);
  if (p != pid)
    exit (255);

  if (WIFEXITED(s))
  {
     int r=WEXITSTATUS(s);
     if (r!=42) {
      printf("child wants to return %i (0x%X), parent received %i (0x%X), difference %i\n",42,42,r,r,r-42);
     }
  }
}
===============================================

(riscv-ilp32 chroot) farino /tmp # ./wait-test-short 
child wants to return 42 (0x2A), parent received 40 (0x28), difference -2

===============================================
(riscv-ilp32 chroot) farino /tmp # gcc --version
gcc (Gentoo 10.2.0-r1 p2) 10.2.0
Copyright (C) 2020 Free Software Foundation, Inc.
Dies ist freie Software; die Kopierbedingungen stehen in den Quellen. Es
gibt KEINE Garantie; auch nicht für MARKTGÄNGIGKEIT oder FÜR SPEZIELLE ZWECKE.

(riscv-ilp32 chroot) farino /tmp # ld --version
GNU ld (Gentoo 2.34 p6) 2.34.0
Copyright (C) 2020 Free Software Foundation, Inc.
This program is free software; you may redistribute it under the terms of
the GNU General Public License version 3 or (at your option) a later version.
This program has absolutely no warranty.

This is the (statically linked) binary resulting from the source; with it the problem can be demonstrated "standalone", without any other rv32 libraries or a complete chroot, just running the binary with qemu-riscv32.

Generated with

(riscv-ilp32 chroot) farino /tmp # gcc -static -o wait-test-short -g wait-test-short.c


I can confirm that the same binary works fine with qemu system emulation:

(riscv-ilp32 qemu) (none) /tmp # ./wait-test-short 
(riscv-ilp32 qemu) (none) /tmp # 


Here's the (abbreviated) output of strace'ing qemu: 

farino ~ # strace -f /usr/bin/qemu-riscv32 /chroot/riscv-ilp32/tmp/wait-test-short
execve("/usr/bin/qemu-riscv32", ["/usr/bin/qemu-riscv32", "/chroot/riscv-ilp32/tmp/wait-tes"...], 0x7ffd95fb1330 /* 40 vars */) = 0

[...]

[pid 16569] uname({sysname="Linux", nodename="farino", ...}) = 0
[pid 16569] lstat("/chroot", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
[pid 16569] lstat("/chroot/riscv-ilp32", {st_mode=S_IFDIR|S_ISGID|0755, st_size=4096, ...}) = 0
[pid 16569] lstat("/chroot/riscv-ilp32/tmp", {st_mode=S_IFDIR|S_ISVTX|0777, st_size=4096, ...}) = 0
[pid 16569] lstat("/chroot/riscv-ilp32/tmp/wait-test-short", {st_mode=S_IFREG|0755, st_size=445632, ...}) = 0
[pid 16569] mmap(0x413f1000, 135168, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x413f1000
[pid 16569] mprotect(0x413eb000, 8192, PROT_READ) = 0
[pid 16569] rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], NULL, 8) = 0
[pid 16569] clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x1339710) = 16571
strace: Process 16571 attached
[pid 16571] set_robust_list(0x1339720, 24 <unfinished ...>
[pid 16569] rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], NULL, 8) = 0
[pid 16571] <... set_robust_list resumed>) = 0
[pid 16569] rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
[pid 16571] rt_sigprocmask(SIG_SETMASK, ~[ILL FPE SEGV RTMIN RT_1], ~[KILL STOP RTMIN RT_1], 8) = 0
[pid 16571] rt_sigprocmask(SIG_BLOCK, ~[], ~[ILL FPE KILL SEGV STOP RTMIN RT_1], 8) = 0
[pid 16571] clone(child_stack=0x7fe5b73871f0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tid=[16572], tls=0x7fe5b7387640, child_tidptr=0x7fe5b7387910) = 16572
[pid 16571] rt_sigprocmask(SIG_SETMASK, ~[ILL FPE KILL SEGV STOP RTMIN RT_1], NULL, 8) = 0
[pid 16571] rt_sigprocmask(SIG_SETMASK, ~[KILL STOP RTMIN RT_1], NULL, 8) = 0
[pid 16571] gettid()                    = 16571
[pid 16571] rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], NULL, 8) = 0
[pid 16571] rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
[pid 16569] waitid(P_ALL, -1,  <unfinished ...>
[pid 16571] exit_group(42)              = ?
strace: Process 16572 attached
[pid 16572] +++ exited with 42 +++
[pid 16571] +++ exited with 42 +++
[pid 16569] <... waitid resumed>{si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=16571, si_uid=0, si_status=42, si_utime=3472328296226648184, si_stime=3475143045726351408}, WEXITED, NULL) = 0
[pid 16569] --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=16571, si_uid=0, si_status=42, si_utime=0, si_stime=0} ---
[pid 16569] statx(1, "", AT_STATX_SYNC_AS_STAT|AT_EMPTY_PATH, STATX_BASIC_STATS, {stx_mask=STATX_BASIC_STATS|STATX_MNT_ID, stx_attributes=0, stx_mode=S_IFCHR|0600, stx_size=0, ...}) = 0
[pid 16569] write(1, "child wants to return 42 (0x2A),"..., 74child wants to return 42 (0x2A), parent received 40 (0x28), difference -2
) = 74
[pid 16569] brk(0x13c1000)              = 0x13c1000
[pid 16569] brk(0x13c0000)              = 0x13c0000
[pid 16569] exit_group(0)               = ?
[pid 16570] <... futex resumed>)        = ?
[pid 16570] +++ exited with 0 +++
+++ exited with 0 +++


Here's qemu's own strace log:

farino ~ # /usr/bin/qemu-riscv32 -strace /chroot/riscv-ilp32/tmp/wait-test-short
10123 brk(NULL) = 0x00073000
10123 brk(0x00073880) = 0x00073880
10123 uname(0x407ffed8) = 0
10123 readlinkat(AT_FDCWD,"/proc/self/exe",0x407feff0,4096) = 39
10123 brk(0x00094880) = 0x00094880
10123 brk(0x00095000) = 0x00095000
10123 mprotect(0x0006e000,8192,PROT_READ) = 0
10123 clone(CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|0x11,child_stack=0x00000000,parent_tidptr=0x00000000,tls=0x00000000,child_tidptr=0x00073068) = 10125
10123 clone(CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|0x11,child_stack=0x00000000,parent_tidptr=0x00000000,tls=0x00000000,child_tidptr=0x00073068) = 0
10125 exit_group(42)
10123 waitid(0,-1,0x407fff8c,0x4) = 0
10123 statx(1,"",AT_EMPTY_PATH,STATX_BASIC_STATS,0x407ff8e8) = 0
child wants to return 42 (0x2A), parent received 40 (0x28), difference -2
10123 write(1,0x73ad0,74) = 74
10123 exit_group(0)


I have sent a patch, you can see it here: https://patchwork.ozlabs.org/project/qemu-devel/list/?series=221381

It seems like QEMU's waitid implementation has a bug with handling the status.

Thanks a lot! Will test and post the result on monday when I'm back home.

After applying this patch on top of qemu-5.2.0, I can confirm that it fixes the problem.

Thank you!!

Just as a general remark, while this specific problem seems to be solved, there may still be issues surrounding waitid().

(With this patch applied, in a rather complex environment I see bash processes hanging in an infinite loop, with waitid involved. I am working on isolating the problem and providing a simple test case, but so far I have not even found the code triggering it.)

Can you add a Tested-by: tag to the patch?

Done (took a while to figure out how...)

A fix has been merged into master.