instruction: 0.954 other: 0.952 device: 0.951 semantic: 0.936 graphic: 0.925 socket: 0.915 assembly: 0.912 boot: 0.903 mistranslation: 0.901 network: 0.891 vnc: 0.890 KVM: 0.890 riscv32 user mode emulation: fork return values broken When running in a chroot with riscv32 (on x86_64; qemu git master as of today): The following short program forks; the child immediately returns with exit(42). The parent checks for the return value - and obtains 40! gcc-10.2 =============================================== #include #include #include #include main(c, v) int c; char **v; { pid_t pid, p; int s, i, n; s = 0; pid = fork(); if (pid == 0) exit(42); /* wait for the process */ p = wait(&s); if (p != pid) exit (255); if (WIFEXITED(s)) { int r=WEXITSTATUS(s); if (r!=42) { printf("child wants to return %i (0x%X), parent received %i (0x%X), difference %i\n",42,42,r,r,r-42); } } } =============================================== (riscv-ilp32 chroot) farino /tmp # ./wait-test-short child wants to return 42 (0x2A), parent received 40 (0x28), difference -2 =============================================== (riscv-ilp32 chroot) farino /tmp # gcc --version gcc (Gentoo 10.2.0-r1 p2) 10.2.0 Copyright (C) 2020 Free Software Foundation, Inc. Dies ist freie Software; die Kopierbedingungen stehen in den Quellen. Es gibt KEINE Garantie; auch nicht für MARKTGÄNGIGKEIT oder FÜR SPEZIELLE ZWECKE. (riscv-ilp32 chroot) farino /tmp # ld --version GNU ld (Gentoo 2.34 p6) 2.34.0 Copyright (C) 2020 Free Software Foundation, Inc. This program is free software; you may redistribute it under the terms of the GNU General Public License version 3 or (at your option) a later version. This program has absolutely no warranty. This is the (statically linked) binary resulting from the source; with it the problem can be demonstrated "standalone", without any other rv32 libraries or a complete chroot, just running the binary with qemu-riscv32. Generated with (riscv-ilp32 chroot) farino /tmp # gcc -static -o wait-test-short -g wait-test-short.c I can confirm that the same binary works fine with qemu system emulation: (riscv-ilp32 qemu) (none) /tmp # ./wait-test-short (riscv-ilp32 qemu) (none) /tmp # Here's the (abbreviated) output of strace'ing qemu: farino ~ # strace -f /usr/bin/qemu-riscv32 /chroot/riscv-ilp32/tmp/wait-test-short execve("/usr/bin/qemu-riscv32", ["/usr/bin/qemu-riscv32", "/chroot/riscv-ilp32/tmp/wait-tes"...], 0x7ffd95fb1330 /* 40 vars */) = 0 [...] [pid 16569] uname({sysname="Linux", nodename="farino", ...}) = 0 [pid 16569] lstat("/chroot", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0 [pid 16569] lstat("/chroot/riscv-ilp32", {st_mode=S_IFDIR|S_ISGID|0755, st_size=4096, ...}) = 0 [pid 16569] lstat("/chroot/riscv-ilp32/tmp", {st_mode=S_IFDIR|S_ISVTX|0777, st_size=4096, ...}) = 0 [pid 16569] lstat("/chroot/riscv-ilp32/tmp/wait-test-short", {st_mode=S_IFREG|0755, st_size=445632, ...}) = 0 [pid 16569] mmap(0x413f1000, 135168, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x413f1000 [pid 16569] mprotect(0x413eb000, 8192, PROT_READ) = 0 [pid 16569] rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], NULL, 8) = 0 [pid 16569] clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x1339710) = 16571 strace: Process 16571 attached [pid 16571] set_robust_list(0x1339720, 24 [pid 16569] rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], NULL, 8) = 0 [pid 16571] <... set_robust_list resumed>) = 0 [pid 16569] rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0 [pid 16571] rt_sigprocmask(SIG_SETMASK, ~[ILL FPE SEGV RTMIN RT_1], ~[KILL STOP RTMIN RT_1], 8) = 0 [pid 16571] rt_sigprocmask(SIG_BLOCK, ~[], ~[ILL FPE KILL SEGV STOP RTMIN RT_1], 8) = 0 [pid 16571] clone(child_stack=0x7fe5b73871f0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tid=[16572], tls=0x7fe5b7387640, child_tidptr=0x7fe5b7387910) = 16572 [pid 16571] rt_sigprocmask(SIG_SETMASK, ~[ILL FPE KILL SEGV STOP RTMIN RT_1], NULL, 8) = 0 [pid 16571] rt_sigprocmask(SIG_SETMASK, ~[KILL STOP RTMIN RT_1], NULL, 8) = 0 [pid 16571] gettid() = 16571 [pid 16571] rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], NULL, 8) = 0 [pid 16571] rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0 [pid 16569] waitid(P_ALL, -1, [pid 16571] exit_group(42) = ? strace: Process 16572 attached [pid 16572] +++ exited with 42 +++ [pid 16571] +++ exited with 42 +++ [pid 16569] <... waitid resumed>{si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=16571, si_uid=0, si_status=42, si_utime=3472328296226648184, si_stime=3475143045726351408}, WEXITED, NULL) = 0 [pid 16569] --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=16571, si_uid=0, si_status=42, si_utime=0, si_stime=0} --- [pid 16569] statx(1, "", AT_STATX_SYNC_AS_STAT|AT_EMPTY_PATH, STATX_BASIC_STATS, {stx_mask=STATX_BASIC_STATS|STATX_MNT_ID, stx_attributes=0, stx_mode=S_IFCHR|0600, stx_size=0, ...}) = 0 [pid 16569] write(1, "child wants to return 42 (0x2A),"..., 74child wants to return 42 (0x2A), parent received 40 (0x28), difference -2 ) = 74 [pid 16569] brk(0x13c1000) = 0x13c1000 [pid 16569] brk(0x13c0000) = 0x13c0000 [pid 16569] exit_group(0) = ? [pid 16570] <... futex resumed>) = ? [pid 16570] +++ exited with 0 +++ +++ exited with 0 +++ Here's qemu's own strace log: farino ~ # /usr/bin/qemu-riscv32 -strace /chroot/riscv-ilp32/tmp/wait-test-short 10123 brk(NULL) = 0x00073000 10123 brk(0x00073880) = 0x00073880 10123 uname(0x407ffed8) = 0 10123 readlinkat(AT_FDCWD,"/proc/self/exe",0x407feff0,4096) = 39 10123 brk(0x00094880) = 0x00094880 10123 brk(0x00095000) = 0x00095000 10123 mprotect(0x0006e000,8192,PROT_READ) = 0 10123 clone(CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|0x11,child_stack=0x00000000,parent_tidptr=0x00000000,tls=0x00000000,child_tidptr=0x00073068) = 10125 10123 clone(CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|0x11,child_stack=0x00000000,parent_tidptr=0x00000000,tls=0x00000000,child_tidptr=0x00073068) = 0 10125 exit_group(42) 10123 waitid(0,-1,0x407fff8c,0x4) = 0 10123 statx(1,"",AT_EMPTY_PATH,STATX_BASIC_STATS,0x407ff8e8) = 0 child wants to return 42 (0x2A), parent received 40 (0x28), difference -2 10123 write(1,0x73ad0,74) = 74 10123 exit_group(0) I have sent a patch, you can see it here: https://patchwork.ozlabs.org/project/qemu-devel/list/?series=221381 It seems like QEMU's waitid implementation has a bug with handling the status. Thanks a lot! Will test and post the result on monday when I'm back home. After applying this patch on top of qemu-5.2.0, I can confirm that it fixes the problem. Thank you!! Just as a general remark, while this specific problem seems to be solved, there may still be issues surrounding waitid(). (With this patch applied, in a rather complex environment I see bash processes hanging in an infinite loop, with waitid involved. I am working on isolating the problem and providing a simple test case, but so far I have not even found the code triggering it.) Can you add a Tested-by: tag to the patch? Done (took a while to figure out how...) A fix has been merged into master.