From 2466119c9551d606a0f92f9832e0c865bc04b488 Mon Sep 17 00:00:00 2001 From: Timothy E Baldwin Date: Thu, 12 May 2016 18:47:25 +0100 Subject: linux-user: Check array bounds in errno conversion Check array bounds in host_to_target_errno() and target_to_host_errno(). Signed-off-by: Timothy Edward Baldwin Message-id: 1441497448-32489-2-git-send-email-T.E.Baldwin99@members.leeds.ac.uk [PMM: Add a lower-bound check, use braces on if(), tweak commit message] Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio Reviewed-by: Laurent Vivier --- linux-user/syscall.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'linux-user/syscall.c') diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 032d338869..5246f360df 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -619,15 +619,19 @@ static uint16_t host_to_target_errno_table[ERRNO_TABLE_SIZE] = { static inline int host_to_target_errno(int err) { - if(host_to_target_errno_table[err]) + if (err >= 0 && err < ERRNO_TABLE_SIZE && + host_to_target_errno_table[err]) { return host_to_target_errno_table[err]; + } return err; } static inline int target_to_host_errno(int err) { - if (target_to_host_errno_table[err]) + if (err >= 0 && err < ERRNO_TABLE_SIZE && + target_to_host_errno_table[err]) { return target_to_host_errno_table[err]; + } return err; } -- cgit 1.4.1 From a3ca7bb2592010eedca31abd11d3ab451cf3b738 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 12 May 2016 18:47:26 +0100 Subject: linux-user: Consistently return host errnos from do_openat() The function do_openat() is not consistent about whether it is returning a host errno or a guest errno in case of failure. Standardise on returning -1 with errno set (ie caller has to call get_errno()). Signed-off-by: Peter Maydell Reported-by: Timothy Edward Baldwin Signed-off-by: Riku Voipio Reviewed-by: Laurent Vivier --- linux-user/syscall.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'linux-user/syscall.c') diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 5246f360df..f4c2e190f8 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -5559,7 +5559,9 @@ static int open_self_cmdline(void *cpu_env, int fd) nb_read = read(fd_orig, buf, sizeof(buf)); if (nb_read < 0) { + int e = errno; fd_orig = close(fd_orig); + errno = e; return -1; } else if (nb_read == 0) { break; @@ -5579,7 +5581,9 @@ static int open_self_cmdline(void *cpu_env, int fd) if (word_skipped) { if (write(fd, cp_buf, nb_read) != nb_read) { + int e = errno; close(fd_orig); + errno = e; return -1; } } @@ -5599,7 +5603,7 @@ static int open_self_maps(void *cpu_env, int fd) fp = fopen("/proc/self/maps", "r"); if (fp == NULL) { - return -EACCES; + return -1; } while ((read = getline(&line, &len, fp)) != -1) { @@ -5743,7 +5747,7 @@ static int open_net_route(void *cpu_env, int fd) fp = fopen("/proc/net/route", "r"); if (fp == NULL) { - return -EACCES; + return -1; } /* read header */ @@ -5793,7 +5797,7 @@ static int do_openat(void *cpu_env, int dirfd, const char *pathname, int flags, if (is_proc_myself(pathname, "exe")) { int execfd = qemu_getauxval(AT_EXECFD); - return execfd ? execfd : get_errno(sys_openat(dirfd, exec_path, flags, mode)); + return execfd ? execfd : sys_openat(dirfd, exec_path, flags, mode); } for (fake_open = fakes; fake_open->filename; fake_open++) { @@ -5819,7 +5823,9 @@ static int do_openat(void *cpu_env, int dirfd, const char *pathname, int flags, unlink(filename); if ((r = fake_open->fill(cpu_env, fd))) { + int e = errno; close(fd); + errno = e; return r; } lseek(fd, 0, SEEK_SET); @@ -5827,7 +5833,7 @@ static int do_openat(void *cpu_env, int dirfd, const char *pathname, int flags, return fd; } - return get_errno(sys_openat(dirfd, path(pathname), flags, mode)); + return sys_openat(dirfd, path(pathname), flags, mode); } #define TIMER_MAGIC 0x0caf0000 -- cgit 1.4.1 From 0284b03ba3f47da53b6b46293a3d586c08829f7e Mon Sep 17 00:00:00 2001 From: Timothy E Baldwin Date: Thu, 12 May 2016 18:47:30 +0100 Subject: linux-user: Support for restarting system calls for x86 targets Update the x86 main loop and sigreturn code: * on TARGET_ERESTARTSYS, wind guest PC backwards to repeat syscall insn * set all guest CPU state within signal.c code rather than passing it back out as the "return code" from do_sigreturn() * handle TARGET_QEMU_ESIGRETURN in the main loop as the indication that the main loop should not touch EAX Signed-off-by: Timothy Edward Baldwin Message-id: 1441497448-32489-5-git-send-email-T.E.Baldwin99@members.leeds.ac.uk Reviewed-by: Peter Maydell [PMM: Commit message tweaks; drop TARGET_USE_ERESTARTSYS define] Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/main.c | 47 +++++++++++++++++++++++++++++------------------ linux-user/signal.c | 15 +++++++-------- linux-user/syscall.c | 2 -- 3 files changed, 36 insertions(+), 28 deletions(-) (limited to 'linux-user/syscall.c') diff --git a/linux-user/main.c b/linux-user/main.c index 95ed11d85c..da5a0333b6 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -285,6 +285,7 @@ void cpu_loop(CPUX86State *env) CPUState *cs = CPU(x86_env_get_cpu(env)); int trapnr; abi_ulong pc; + abi_ulong ret; target_siginfo_t info; for(;;) { @@ -294,28 +295,38 @@ void cpu_loop(CPUX86State *env) switch(trapnr) { case 0x80: /* linux syscall from int $0x80 */ - env->regs[R_EAX] = do_syscall(env, - env->regs[R_EAX], - env->regs[R_EBX], - env->regs[R_ECX], - env->regs[R_EDX], - env->regs[R_ESI], - env->regs[R_EDI], - env->regs[R_EBP], - 0, 0); + ret = do_syscall(env, + env->regs[R_EAX], + env->regs[R_EBX], + env->regs[R_ECX], + env->regs[R_EDX], + env->regs[R_ESI], + env->regs[R_EDI], + env->regs[R_EBP], + 0, 0); + if (ret == -TARGET_ERESTARTSYS) { + env->eip -= 2; + } else if (ret != -TARGET_QEMU_ESIGRETURN) { + env->regs[R_EAX] = ret; + } break; #ifndef TARGET_ABI32 case EXCP_SYSCALL: /* linux syscall from syscall instruction */ - env->regs[R_EAX] = do_syscall(env, - env->regs[R_EAX], - env->regs[R_EDI], - env->regs[R_ESI], - env->regs[R_EDX], - env->regs[10], - env->regs[8], - env->regs[9], - 0, 0); + ret = do_syscall(env, + env->regs[R_EAX], + env->regs[R_EDI], + env->regs[R_ESI], + env->regs[R_EDX], + env->regs[10], + env->regs[8], + env->regs[9], + 0, 0); + if (ret == -TARGET_ERESTARTSYS) { + env->eip -= 2; + } else if (ret != -TARGET_QEMU_ESIGRETURN) { + env->regs[R_EAX] = ret; + } break; #endif case EXCP0B_NOSEG: diff --git a/linux-user/signal.c b/linux-user/signal.c index 04c21d0ccd..11ddd05173 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -1024,7 +1024,7 @@ give_sigsegv: } static int -restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc, int *peax) +restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc) { unsigned int err = 0; abi_ulong fpstate_addr; @@ -1042,6 +1042,7 @@ restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc, int *peax) env->regs[R_EBX] = tswapl(sc->ebx); env->regs[R_EDX] = tswapl(sc->edx); env->regs[R_ECX] = tswapl(sc->ecx); + env->regs[R_EAX] = tswapl(sc->eax); env->eip = tswapl(sc->eip); cpu_x86_load_seg(env, R_CS, lduw_p(&sc->cs) | 3); @@ -1059,7 +1060,6 @@ restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc, int *peax) cpu_x86_frstor(env, fpstate_addr, 1); } - *peax = tswapl(sc->eax); return err; badframe: return 1; @@ -1071,7 +1071,7 @@ long do_sigreturn(CPUX86State *env) abi_ulong frame_addr = env->regs[R_ESP] - 8; target_sigset_t target_set; sigset_t set; - int eax, i; + int i; trace_user_do_sigreturn(env, frame_addr); if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1)) @@ -1086,10 +1086,10 @@ long do_sigreturn(CPUX86State *env) do_sigprocmask(SIG_SETMASK, &set, NULL); /* restore registers */ - if (restore_sigcontext(env, &frame->sc, &eax)) + if (restore_sigcontext(env, &frame->sc)) goto badframe; unlock_user_struct(frame, frame_addr, 0); - return eax; + return -TARGET_QEMU_ESIGRETURN; badframe: unlock_user_struct(frame, frame_addr, 0); @@ -1102,7 +1102,6 @@ long do_rt_sigreturn(CPUX86State *env) abi_ulong frame_addr; struct rt_sigframe *frame; sigset_t set; - int eax; frame_addr = env->regs[R_ESP] - 4; trace_user_do_rt_sigreturn(env, frame_addr); @@ -1111,7 +1110,7 @@ long do_rt_sigreturn(CPUX86State *env) target_to_host_sigset(&set, &frame->uc.tuc_sigmask); do_sigprocmask(SIG_SETMASK, &set, NULL); - if (restore_sigcontext(env, &frame->uc.tuc_mcontext, &eax)) { + if (restore_sigcontext(env, &frame->uc.tuc_mcontext)) { goto badframe; } @@ -1121,7 +1120,7 @@ long do_rt_sigreturn(CPUX86State *env) } unlock_user_struct(frame, frame_addr, 0); - return eax; + return -TARGET_QEMU_ESIGRETURN; badframe: unlock_user_struct(frame, frame_addr, 0); diff --git a/linux-user/syscall.c b/linux-user/syscall.c index f4c2e190f8..a4a1af75f6 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -6940,12 +6940,10 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, break; #ifdef TARGET_NR_sigreturn case TARGET_NR_sigreturn: - /* NOTE: ret is eax, so not transcoding must be done */ ret = do_sigreturn(cpu_env); break; #endif case TARGET_NR_rt_sigreturn: - /* NOTE: ret is eax, so not transcoding must be done */ ret = do_rt_sigreturn(cpu_env); break; case TARGET_NR_sethostname: -- cgit 1.4.1 From 71a8f7fece3e42dc55e865e081866f62f5c8c07e Mon Sep 17 00:00:00 2001 From: Timothy E Baldwin Date: Thu, 12 May 2016 18:47:45 +0100 Subject: linux-user: Add debug code to exercise restarting system calls If DEBUG_ERESTARTSYS is set restart all system calls once. This is pure debug code for exercising the syscall restart code paths in the per-architecture cpu main loops. Signed-off-by: Timothy Edward Baldwin Message-id: 1441497448-32489-10-git-send-email-T.E.Baldwin99@members.leeds.ac.uk [PMM: Add comment and a commented-out #define next to the commented-out generic DEBUG #define; remove the check on TARGET_USE_ERESTARTSYS; tweak comment message] Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/syscall.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'linux-user/syscall.c') diff --git a/linux-user/syscall.c b/linux-user/syscall.c index a4a1af75f6..ced519d692 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -110,6 +110,10 @@ int __clone2(int (*fn)(void *), void *child_stack_base, CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID) //#define DEBUG +/* Define DEBUG_ERESTARTSYS to force every syscall to be restarted + * once. This exercises the codepaths for restart. + */ +//#define DEBUG_ERESTARTSYS //#include #define VFAT_IOCTL_READDIR_BOTH _IOR('r', 1, struct linux_dirent [2]) @@ -5871,6 +5875,21 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, struct statfs stfs; void *p; +#if defined(DEBUG_ERESTARTSYS) + /* Debug-only code for exercising the syscall-restart code paths + * in the per-architecture cpu main loops: restart every syscall + * the guest makes once before letting it through. + */ + { + static int flag; + + flag = !flag; + if (flag) { + return -TARGET_ERESTARTSYS; + } + } +#endif + #ifdef DEBUG gemu_log("syscall %d", num); #endif -- cgit 1.4.1 From 4d330cee37a21aabfc619a1948953559e66951a4 Mon Sep 17 00:00:00 2001 From: Timothy E Baldwin Date: Thu, 12 May 2016 18:47:46 +0100 Subject: linux-user: Provide safe_syscall for fixing races between signals and syscalls If a signal is delivered immediately before a blocking system call the handler will only be called after the system call returns, which may be a long time later or never. This is fixed by using a function (safe_syscall) that checks if a guest signal is pending prior to making a system call, and if so does not call the system call and returns -TARGET_ERESTARTSYS. If a signal is received between the check and the system call host_signal_handler() rewinds execution to before the check. This rewinding has the effect of closing the race window so that safe_syscall will reliably either (a) go into the host syscall with no unprocessed guest signals pending or or (b) return -TARGET_ERESTARTSYS so that the caller can deal with the signals. Implementing this requires a per-host-architecture assembly language fragment. This will also resolve the mishandling of the SA_RESTART flag where we would restart a host system call and not call the guest signal handler until the syscall finally completed -- syscall restarting now always happens at the guest syscall level so the guest signal handler will run. (The host syscall will never be restarted because if the host kernel rewinds the PC to point at the syscall insn for a restart then our host_signal_handler() will see this and arrange the guest PC rewind.) This commit contains the infrastructure for implementing safe_syscall and the assembly language fragment for x86-64, but does not change any syscalls to use it. Signed-off-by: Timothy Edward Baldwin Message-id: 1441497448-32489-14-git-send-email-T.E.Baldwin99@members.leeds.ac.uk [PMM: * Avoid having an architecture if-ladder in configure by putting linux-user/host/$(ARCH) on the include path and including safe-syscall.inc.S from it * Avoid ifdef ladder in signal.c by creating new hostdep.h to hold host-architecture-specific things * Added copyright/license header to safe-syscall.inc.S * Rewrote commit message * Added comments to safe-syscall.inc.S * Changed calling convention of safe_syscall() to match syscall() (returns -1 and host error in errno on failure) * Added a long comment in qemu.h about how to use safe_syscall() to implement guest syscalls. ] RV: squashed Peters "fixup! linux-user: compile on non-x86-64 hosts" patch Signed-off-by: Peter Maydell --- Makefile.target | 7 +- linux-user/Makefile.objs | 3 +- linux-user/host/generic/hostdep.h | 20 +++++ linux-user/host/x86_64/hostdep.h | 38 +++++++++ linux-user/host/x86_64/safe-syscall.inc.S | 81 +++++++++++++++++++ linux-user/qemu.h | 127 +++++++++++++++++++++++++++++- linux-user/safe-syscall.S | 30 +++++++ linux-user/signal.c | 10 +++ linux-user/syscall.c | 47 +++++++++++ 9 files changed, 360 insertions(+), 3 deletions(-) create mode 100644 linux-user/host/generic/hostdep.h create mode 100644 linux-user/host/x86_64/hostdep.h create mode 100644 linux-user/host/x86_64/safe-syscall.inc.S create mode 100644 linux-user/safe-syscall.S (limited to 'linux-user/syscall.c') diff --git a/Makefile.target b/Makefile.target index 34ddb7e762..5b80dd7fc9 100644 --- a/Makefile.target +++ b/Makefile.target @@ -108,7 +108,12 @@ obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/dpd/decimal128.o ifdef CONFIG_LINUX_USER -QEMU_CFLAGS+=-I$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR) -I$(SRC_PATH)/linux-user +# Note that we only add linux-user/host/$ARCH if it exists, and +# that it must come before linux-user/host/generic in the search path. +QEMU_CFLAGS+=-I$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR) \ + $(patsubst %,-I%,$(wildcard $(SRC_PATH)/linux-user/host/$(ARCH))) \ + -I$(SRC_PATH)/linux-user/host/generic \ + -I$(SRC_PATH)/linux-user obj-y += linux-user/ obj-y += gdbstub.o thunk.o user-exec.o diff --git a/linux-user/Makefile.objs b/linux-user/Makefile.objs index fd5021788f..8c93058100 100644 --- a/linux-user/Makefile.objs +++ b/linux-user/Makefile.objs @@ -1,5 +1,6 @@ obj-y = main.o syscall.o strace.o mmap.o signal.o \ - elfload.o linuxload.o uaccess.o uname.o + elfload.o linuxload.o uaccess.o uname.o \ + safe-syscall.o obj-$(TARGET_HAS_BFLT) += flatload.o obj-$(TARGET_I386) += vm86.o diff --git a/linux-user/host/generic/hostdep.h b/linux-user/host/generic/hostdep.h new file mode 100644 index 0000000000..cfabc3590b --- /dev/null +++ b/linux-user/host/generic/hostdep.h @@ -0,0 +1,20 @@ +/* + * hostdep.h : fallback generic version of header for things + * which are dependent on the host architecture + * + * * Written by Peter Maydell + * + * Copyright (C) 2016 Linaro Limited + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_HOSTDEP_H +#define QEMU_HOSTDEP_H + +/* This is the fallback header which is only used if the host + * architecture doesn't provide one in linux-user/host/$ARCH. + */ + +#endif diff --git a/linux-user/host/x86_64/hostdep.h b/linux-user/host/x86_64/hostdep.h new file mode 100644 index 0000000000..9dfbf3ae6a --- /dev/null +++ b/linux-user/host/x86_64/hostdep.h @@ -0,0 +1,38 @@ +/* + * hostdep.h : things which are dependent on the host architecture + * + * * Written by Peter Maydell + * + * Copyright (C) 2016 Linaro Limited + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_HOSTDEP_H +#define QEMU_HOSTDEP_H + +/* We have a safe-syscall.inc.S */ +#define HAVE_SAFE_SYSCALL + +#ifndef __ASSEMBLER__ + +/* These are defined by the safe-syscall.inc.S file */ +extern char safe_syscall_start[]; +extern char safe_syscall_end[]; + +/* Adjust the signal context to rewind out of safe-syscall if we're in it */ +static inline void rewind_if_in_safe_syscall(void *puc) +{ + struct ucontext *uc = puc; + greg_t *pcreg = &uc->uc_mcontext.gregs[REG_RIP]; + + if (*pcreg > (uintptr_t)safe_syscall_start + && *pcreg < (uintptr_t)safe_syscall_end) { + *pcreg = (uintptr_t)safe_syscall_start; + } +} + +#endif /* __ASSEMBLER__ */ + +#endif diff --git a/linux-user/host/x86_64/safe-syscall.inc.S b/linux-user/host/x86_64/safe-syscall.inc.S new file mode 100644 index 0000000000..dde434c8d7 --- /dev/null +++ b/linux-user/host/x86_64/safe-syscall.inc.S @@ -0,0 +1,81 @@ +/* + * safe-syscall.inc.S : host-specific assembly fragment + * to handle signals occurring at the same time as system calls. + * This is intended to be included by linux-user/safe-syscall.S + * + * Copyright (C) 2015 Timothy Edward Baldwin + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + + .global safe_syscall_base + .global safe_syscall_start + .global safe_syscall_end + .type safe_syscall_base, @function + + /* This is the entry point for making a system call. The calling + * convention here is that of a C varargs function with the + * first argument an 'int *' to the signal_pending flag, the + * second one the system call number (as a 'long'), and all further + * arguments being syscall arguments (also 'long'). + * We return a long which is the syscall's return value, which + * may be negative-errno on failure. Conversion to the + * -1-and-errno-set convention is done by the calling wrapper. + */ +safe_syscall_base: + /* This saves a frame pointer and aligns the stack for the syscall. + * (It's unclear if the syscall ABI has the same stack alignment + * requirements as the userspace function call ABI, but better safe than + * sorry. Appendix A2 of http://www.x86-64.org/documentation/abi.pdf + * does not list any ABI differences regarding stack alignment.) + */ + push %rbp + + /* The syscall calling convention isn't the same as the + * C one: + * we enter with rdi == *signal_pending + * rsi == syscall number + * rdx, rcx, r8, r9, (stack), (stack) == syscall arguments + * and return the result in rax + * and the syscall instruction needs + * rax == syscall number + * rdi, rsi, rdx, r10, r8, r9 == syscall arguments + * and returns the result in rax + * Shuffle everything around appropriately. + * Note that syscall will trash rcx and r11. + */ + mov %rsi, %rax /* syscall number */ + mov %rdi, %rbp /* signal_pending pointer */ + /* and the syscall arguments */ + mov %rdx, %rdi + mov %rcx, %rsi + mov %r8, %rdx + mov %r9, %r10 + mov 16(%rsp), %r8 + mov 24(%rsp), %r9 + + /* This next sequence of code works in conjunction with the + * rewind_if_safe_syscall_function(). If a signal is taken + * and the interrupted PC is anywhere between 'safe_syscall_start' + * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'. + * The code sequence must therefore be able to cope with this, and + * the syscall instruction must be the final one in the sequence. + */ +safe_syscall_start: + /* if signal_pending is non-zero, don't do the call */ + testl $1, (%rbp) + jnz return_ERESTARTSYS + syscall +safe_syscall_end: + /* code path for having successfully executed the syscall */ + pop %rbp + ret + +return_ERESTARTSYS: + /* code path when we didn't execute the syscall */ + mov $-TARGET_ERESTARTSYS, %rax + pop %rbp + ret + + .size safe_syscall_base, .-safe_syscall_base diff --git a/linux-user/qemu.h b/linux-user/qemu.h index 208c63eb2a..f09b750bbf 100644 --- a/linux-user/qemu.h +++ b/linux-user/qemu.h @@ -1,7 +1,7 @@ #ifndef QEMU_H #define QEMU_H - +#include "hostdep.h" #include "cpu.h" #include "exec/exec-all.h" #include "exec/cpu_ldst.h" @@ -205,6 +205,131 @@ unsigned long init_guest_space(unsigned long host_start, #include "qemu/log.h" +/* safe_syscall.S */ + +/** + * safe_syscall: + * @int number: number of system call to make + * ...: arguments to the system call + * + * Call a system call if guest signal not pending. + * This has the same API as the libc syscall() function, except that it + * may return -1 with errno == TARGET_ERESTARTSYS if a signal was pending. + * + * Returns: the system call result, or -1 with an error code in errno + * (Errnos are host errnos; we rely on TARGET_ERESTARTSYS not clashing + * with any of the host errno values.) + */ + +/* A guide to using safe_syscall() to handle interactions between guest + * syscalls and guest signals: + * + * Guest syscalls come in two flavours: + * + * (1) Non-interruptible syscalls + * + * These are guest syscalls that never get interrupted by signals and + * so never return EINTR. They can be implemented straightforwardly in + * QEMU: just make sure that if the implementation code has to make any + * blocking calls that those calls are retried if they return EINTR. + * It's also OK to implement these with safe_syscall, though it will be + * a little less efficient if a signal is delivered at the 'wrong' moment. + * + * (2) Interruptible syscalls + * + * These are guest syscalls that can be interrupted by signals and + * for which we need to either return EINTR or arrange for the guest + * syscall to be restarted. This category includes both syscalls which + * always restart (and in the kernel return -ERESTARTNOINTR), ones + * which only restart if there is no handler (kernel returns -ERESTARTNOHAND + * or -ERESTART_RESTARTBLOCK), and the most common kind which restart + * if the handler was registered with SA_RESTART (kernel returns + * -ERESTARTSYS). System calls which are only interruptible in some + * situations (like 'open') also need to be handled this way. + * + * Here it is important that the host syscall is made + * via this safe_syscall() function, and *not* via the host libc. + * If the host libc is used then the implementation will appear to work + * most of the time, but there will be a race condition where a + * signal could arrive just before we make the host syscall inside libc, + * and then then guest syscall will not correctly be interrupted. + * Instead the implementation of the guest syscall can use the safe_syscall + * function but otherwise just return the result or errno in the usual + * way; the main loop code will take care of restarting the syscall + * if appropriate. + * + * (If the implementation needs to make multiple host syscalls this is + * OK; any which might really block must be via safe_syscall(); for those + * which are only technically blocking (ie which we know in practice won't + * stay in the host kernel indefinitely) it's OK to use libc if necessary. + * You must be able to cope with backing out correctly if some safe_syscall + * you make in the implementation returns either -TARGET_ERESTARTSYS or + * EINTR though.) + * + * + * How and why the safe_syscall implementation works: + * + * The basic setup is that we make the host syscall via a known + * section of host native assembly. If a signal occurs, our signal + * handler checks the interrupted host PC against the addresse of that + * known section. If the PC is before or at the address of the syscall + * instruction then we change the PC to point at a "return + * -TARGET_ERESTARTSYS" code path instead, and then exit the signal handler + * (causing the safe_syscall() call to immediately return that value). + * Then in the main.c loop if we see this magic return value we adjust + * the guest PC to wind it back to before the system call, and invoke + * the guest signal handler as usual. + * + * This winding-back will happen in two cases: + * (1) signal came in just before we took the host syscall (a race); + * in this case we'll take the guest signal and have another go + * at the syscall afterwards, and this is indistinguishable for the + * guest from the timing having been different such that the guest + * signal really did win the race + * (2) signal came in while the host syscall was blocking, and the + * host kernel decided the syscall should be restarted; + * in this case we want to restart the guest syscall also, and so + * rewinding is the right thing. (Note that "restart" semantics mean + * "first call the signal handler, then reattempt the syscall".) + * The other situation to consider is when a signal came in while the + * host syscall was blocking, and the host kernel decided that the syscall + * should not be restarted; in this case QEMU's host signal handler will + * be invoked with the PC pointing just after the syscall instruction, + * with registers indicating an EINTR return; the special code in the + * handler will not kick in, and we will return EINTR to the guest as + * we should. + * + * Notice that we can leave the host kernel to make the decision for + * us about whether to do a restart of the syscall or not; we do not + * need to check SA_RESTART flags in QEMU or distinguish the various + * kinds of restartability. + */ +#ifdef HAVE_SAFE_SYSCALL +/* The core part of this function is implemented in assembly */ +extern long safe_syscall_base(int *pending, long number, ...); + +#define safe_syscall(...) \ + ({ \ + long ret_; \ + int *psp_ = &((TaskState *)thread_cpu->opaque)->signal_pending; \ + ret_ = safe_syscall_base(psp_, __VA_ARGS__); \ + if (is_error(ret_)) { \ + errno = -ret_; \ + ret_ = -1; \ + } \ + ret_; \ + }) + +#else + +/* Fallback for architectures which don't yet provide a safe-syscall assembly + * fragment; note that this is racy! + * This should go away when all host architectures have been updated. + */ +#define safe_syscall syscall + +#endif + /* syscall.c */ int host_to_target_waitstatus(int status); diff --git a/linux-user/safe-syscall.S b/linux-user/safe-syscall.S new file mode 100644 index 0000000000..b5df6254ae --- /dev/null +++ b/linux-user/safe-syscall.S @@ -0,0 +1,30 @@ +/* + * safe-syscall.S : include the host-specific assembly fragment + * to handle signals occurring at the same time as system calls. + * + * Written by Peter Maydell + * + * Copyright (C) 2016 Linaro Limited + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "hostdep.h" +#include "errno_defs.h" + +/* We have the correct host directory on our include path + * so that this will pull in the right fragment for the architecture. + */ +#ifdef HAVE_SAFE_SYSCALL +#include "safe-syscall.inc.S" +#endif + +/* We must specifically say that we're happy for the stack to not be + * executable, otherwise the toolchain will default to assuming our + * assembly needs an executable stack and the whole QEMU binary will + * needlessly end up with one. This should be the last thing in this file. + */ +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/linux-user/signal.c b/linux-user/signal.c index 9e8555096a..ff4de4fb8f 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -561,6 +561,13 @@ int queue_signal(CPUArchState *env, int sig, target_siginfo_t *info) } } +#ifndef HAVE_SAFE_SYSCALL +static inline void rewind_if_in_safe_syscall(void *puc) +{ + /* Default version: never rewind */ +} +#endif + static void host_signal_handler(int host_signum, siginfo_t *info, void *puc) { @@ -581,6 +588,9 @@ static void host_signal_handler(int host_signum, siginfo_t *info, if (sig < 1 || sig > TARGET_NSIG) return; trace_user_host_signal(env, host_signum, sig); + + rewind_if_in_safe_syscall(puc); + host_to_target_siginfo_noswap(&tinfo, info); if (queue_signal(env, sig, &tinfo) == 1) { /* interrupt the virtual CPU as soon as possible */ diff --git a/linux-user/syscall.c b/linux-user/syscall.c index ced519d692..b6a8ed6c7a 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -660,6 +660,53 @@ char *target_strerror(int err) return strerror(target_to_host_errno(err)); } +#define safe_syscall0(type, name) \ +static type safe_##name(void) \ +{ \ + return safe_syscall(__NR_##name); \ +} + +#define safe_syscall1(type, name, type1, arg1) \ +static type safe_##name(type1 arg1) \ +{ \ + return safe_syscall(__NR_##name, arg1); \ +} + +#define safe_syscall2(type, name, type1, arg1, type2, arg2) \ +static type safe_##name(type1 arg1, type2 arg2) \ +{ \ + return safe_syscall(__NR_##name, arg1, arg2); \ +} + +#define safe_syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ +static type safe_##name(type1 arg1, type2 arg2, type3 arg3) \ +{ \ + return safe_syscall(__NR_##name, arg1, arg2, arg3); \ +} + +#define safe_syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4) \ +static type safe_##name(type1 arg1, type2 arg2, type3 arg3, type4 arg4) \ +{ \ + return safe_syscall(__NR_##name, arg1, arg2, arg3, arg4); \ +} + +#define safe_syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5) \ +static type safe_##name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) \ +{ \ + return safe_syscall(__NR_##name, arg1, arg2, arg3, arg4, arg5); \ +} + +#define safe_syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5, type6, arg6) \ +static type safe_##name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) \ +{ \ + return safe_syscall(__NR_##name, arg1, arg2, arg3, arg4, arg5, arg6); \ +} + static inline int host_to_target_sock_type(int host_type) { int target_type; -- cgit 1.4.1 From 50afd02b841cf0ccbc988a0fa868bbc4ca67c09e Mon Sep 17 00:00:00 2001 From: Timothy E Baldwin Date: Thu, 12 May 2016 18:47:47 +0100 Subject: linux-user: Use safe_syscall for read and write system calls Restart read() and write() if signals occur before, or during with SA_RESTART Signed-off-by: Timothy Edward Baldwin Message-id: 1441497448-32489-15-git-send-email-T.E.Baldwin99@members.leeds.ac.uk [PMM: Update to new safe_syscall() convention of setting errno] Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/syscall.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'linux-user/syscall.c') diff --git a/linux-user/syscall.c b/linux-user/syscall.c index b6a8ed6c7a..bee13607be 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -707,6 +707,9 @@ static type safe_##name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ return safe_syscall(__NR_##name, arg1, arg2, arg3, arg4, arg5, arg6); \ } +safe_syscall3(ssize_t, read, int, fd, void *, buff, size_t, count) +safe_syscall3(ssize_t, write, int, fd, const void *, buff, size_t, count) + static inline int host_to_target_sock_type(int host_type) { int target_type; @@ -5983,7 +5986,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, else { if (!(p = lock_user(VERIFY_WRITE, arg2, arg3, 0))) goto efault; - ret = get_errno(read(arg1, p, arg3)); + ret = get_errno(safe_read(arg1, p, arg3)); if (ret >= 0 && fd_trans_host_to_target_data(arg1)) { ret = fd_trans_host_to_target_data(arg1)(p, ret); @@ -5994,7 +5997,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, case TARGET_NR_write: if (!(p = lock_user(VERIFY_READ, arg2, arg3, 1))) goto efault; - ret = get_errno(write(arg1, p, arg3)); + ret = get_errno(safe_write(arg1, p, arg3)); unlock_user(p, arg2, 0); break; #ifdef TARGET_NR_open -- cgit 1.4.1 From c10a07387b77b94d8f7233f3b5bb559211d4e49a Mon Sep 17 00:00:00 2001 From: Timothy E Baldwin Date: Thu, 12 May 2016 18:47:48 +0100 Subject: linux-user: Use safe_syscall for open and openat system calls Restart open() and openat() if signals occur before, or during with SA_RESTART. Signed-off-by: Timothy Edward Baldwin Message-id: 1441497448-32489-17-git-send-email-T.E.Baldwin99@members.leeds.ac.uk [PMM: Adjusted to follow new -1-and-set-errno safe_syscall convention] Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/syscall.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) (limited to 'linux-user/syscall.c') diff --git a/linux-user/syscall.c b/linux-user/syscall.c index bee13607be..0037ee7dd7 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -359,18 +359,6 @@ static int sys_getcwd1(char *buf, size_t size) return strlen(buf)+1; } -static int sys_openat(int dirfd, const char *pathname, int flags, mode_t mode) -{ - /* - * open(2) has extra parameter 'mode' when called with - * flag O_CREAT. - */ - if ((flags & O_CREAT) != 0) { - return (openat(dirfd, pathname, flags, mode)); - } - return (openat(dirfd, pathname, flags)); -} - #ifdef TARGET_NR_utimensat #ifdef CONFIG_UTIMENSAT static int sys_utimensat(int dirfd, const char *pathname, @@ -709,6 +697,8 @@ static type safe_##name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ safe_syscall3(ssize_t, read, int, fd, void *, buff, size_t, count) safe_syscall3(ssize_t, write, int, fd, const void *, buff, size_t, count) +safe_syscall4(int, openat, int, dirfd, const char *, pathname, \ + int, flags, mode_t, mode) static inline int host_to_target_sock_type(int host_type) { @@ -5851,7 +5841,7 @@ static int do_openat(void *cpu_env, int dirfd, const char *pathname, int flags, if (is_proc_myself(pathname, "exe")) { int execfd = qemu_getauxval(AT_EXECFD); - return execfd ? execfd : sys_openat(dirfd, exec_path, flags, mode); + return execfd ? execfd : safe_openat(dirfd, exec_path, flags, mode); } for (fake_open = fakes; fake_open->filename; fake_open++) { @@ -5887,7 +5877,7 @@ static int do_openat(void *cpu_env, int dirfd, const char *pathname, int flags, return fd; } - return sys_openat(dirfd, path(pathname), flags, mode); + return safe_openat(dirfd, path(pathname), flags, mode); } #define TIMER_MAGIC 0x0caf0000 -- cgit 1.4.1 From 4af80a3783950380df85ecca78aea3e3bad2e846 Mon Sep 17 00:00:00 2001 From: Timothy E Baldwin Date: Thu, 12 May 2016 18:47:49 +0100 Subject: linux-user: Use safe_syscall for wait system calls Use safe_syscall for waitpid, waitid and wait4 syscalls. Note that this change allows us to implement support for waitid's fifth (rusage) argument in future; for the moment we ignore it as we have done up til now. Signed-off-by: Timothy Edward Baldwin Message-id: 1441497448-32489-18-git-send-email-T.E.Baldwin99@members.leeds.ac.uk [PMM: Adjust to new safe_syscall convention. Add fifth waitid syscall argument (which isn't present in the libc interface but is in the syscall ABI)] Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/syscall.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'linux-user/syscall.c') diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 0037ee7dd7..d9f4695624 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -699,6 +699,10 @@ safe_syscall3(ssize_t, read, int, fd, void *, buff, size_t, count) safe_syscall3(ssize_t, write, int, fd, const void *, buff, size_t, count) safe_syscall4(int, openat, int, dirfd, const char *, pathname, \ int, flags, mode_t, mode) +safe_syscall4(pid_t, wait4, pid_t, pid, int *, status, int, options, \ + struct rusage *, rusage) +safe_syscall5(int, waitid, idtype_t, idtype, id_t, id, siginfo_t *, infop, \ + int, options, struct rusage *, rusage) static inline int host_to_target_sock_type(int host_type) { @@ -6037,7 +6041,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, case TARGET_NR_waitpid: { int status; - ret = get_errno(waitpid(arg1, &status, arg3)); + ret = get_errno(safe_wait4(arg1, &status, arg3, 0)); if (!is_error(ret) && arg2 && ret && put_user_s32(host_to_target_waitstatus(status), arg2)) goto efault; @@ -6049,7 +6053,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, { siginfo_t info; info.si_pid = 0; - ret = get_errno(waitid(arg1, arg2, &info, arg4)); + ret = get_errno(safe_waitid(arg1, arg2, &info, arg4, NULL)); if (!is_error(ret) && arg3 && info.si_pid != 0) { if (!(p = lock_user(VERIFY_WRITE, arg3, sizeof(target_siginfo_t), 0))) goto efault; @@ -7761,7 +7765,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, rusage_ptr = &rusage; else rusage_ptr = NULL; - ret = get_errno(wait4(arg1, &status, arg3, rusage_ptr)); + ret = get_errno(safe_wait4(arg1, &status, arg3, rusage_ptr)); if (!is_error(ret)) { if (status_ptr && ret) { status = host_to_target_waitstatus(status); -- cgit 1.4.1 From ffdcbe223d23461669869e85786145cce65e1e8c Mon Sep 17 00:00:00 2001 From: Timothy E Baldwin Date: Thu, 12 May 2016 18:47:50 +0100 Subject: linux-user: Use safe_syscall for execve syscall Wrap execve() in the safe-syscall handling. Although execve() is not an interruptible syscall, it is a special case: if we allow a signal to happen before we make the host$ syscall then we will 'lose' it, because at the point of execve the process leaves QEMU's control. So we use the safe syscall wrapper to ensure that we either take the signal as a guest signal, or else it does not happen before the execve completes and makes it the other program's problem. The practical upshot is that without this SIGTERM could fail to terminate the process. Signed-off-by: Timothy Edward Baldwin Message-id: 1441497448-32489-25-git-send-email-T.E.Baldwin99@members.leeds.ac.uk [PMM: expanded commit message to explain in more detail why this is needed, and add comment about it too] Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/syscall.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'linux-user/syscall.c') diff --git a/linux-user/syscall.c b/linux-user/syscall.c index d9f4695624..dea827fe04 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -703,6 +703,7 @@ safe_syscall4(pid_t, wait4, pid_t, pid, int *, status, int, options, \ struct rusage *, rusage) safe_syscall5(int, waitid, idtype_t, idtype, id_t, id, siginfo_t *, infop, \ int, options, struct rusage *, rusage) +safe_syscall3(int, execve, const char *, filename, char **, argv, char **, envp) static inline int host_to_target_sock_type(int host_type) { @@ -6179,7 +6180,17 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, if (!(p = lock_user_string(arg1))) goto execve_efault; - ret = get_errno(execve(p, argp, envp)); + /* Although execve() is not an interruptible syscall it is + * a special case where we must use the safe_syscall wrapper: + * if we allow a signal to happen before we make the host + * syscall then we will 'lose' it, because at the point of + * execve the process leaves QEMU's control. So we use the + * safe syscall wrapper to ensure that we either take the + * signal as a guest signal, or else it does not happen + * before the execve completes and makes it the other + * program's problem. + */ + ret = get_errno(safe_execve(p, argp, envp)); unlock_user(p, arg1, 0); goto execve_end; -- cgit 1.4.1 From 6df9d38d33be8397bda8838dea5a4a10d662749b Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 12 May 2016 18:47:51 +0100 Subject: linux-user: Use safe_syscall for pselect, select syscalls Use the safe_syscall wrapper for the pselect and select syscalls. Since not every architecture has the select syscall, we now have to implement select in terms of pselect, which means doing timeval<->timespec conversion. (Five years on from the initial patch that added pselect support to QEMU and a decade after pselect6 went into the kernel, it seems safe to not try to support hosts with header files which don't define __NR_pselect6.) Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/syscall.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) (limited to 'linux-user/syscall.c') diff --git a/linux-user/syscall.c b/linux-user/syscall.c index dea827fe04..c9c2ae9903 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -430,15 +430,6 @@ _syscall5(int, sys_ppoll, struct pollfd *, fds, nfds_t, nfds, size_t, sigsetsize) #endif -#if defined(TARGET_NR_pselect6) -#ifndef __NR_pselect6 -# define __NR_pselect6 -1 -#endif -#define __NR_sys_pselect6 __NR_pselect6 -_syscall6(int, sys_pselect6, int, nfds, fd_set *, readfds, fd_set *, writefds, - fd_set *, exceptfds, struct timespec *, timeout, void *, sig); -#endif - #if defined(TARGET_NR_prlimit64) #ifndef __NR_prlimit64 # define __NR_prlimit64 -1 @@ -704,6 +695,8 @@ safe_syscall4(pid_t, wait4, pid_t, pid, int *, status, int, options, \ safe_syscall5(int, waitid, idtype_t, idtype, id_t, id, siginfo_t *, infop, \ int, options, struct rusage *, rusage) safe_syscall3(int, execve, const char *, filename, char **, argv, char **, envp) +safe_syscall6(int, pselect6, int, nfds, fd_set *, readfds, fd_set *, writefds, \ + fd_set *, exceptfds, struct timespec *, timeout, void *, sig) static inline int host_to_target_sock_type(int host_type) { @@ -1115,7 +1108,8 @@ static abi_long do_select(int n, { fd_set rfds, wfds, efds; fd_set *rfds_ptr, *wfds_ptr, *efds_ptr; - struct timeval tv, *tv_ptr; + struct timeval tv; + struct timespec ts, *ts_ptr; abi_long ret; ret = copy_from_user_fdset_ptr(&rfds, &rfds_ptr, rfd_addr, n); @@ -1134,12 +1128,15 @@ static abi_long do_select(int n, if (target_tv_addr) { if (copy_from_user_timeval(&tv, target_tv_addr)) return -TARGET_EFAULT; - tv_ptr = &tv; + ts.tv_sec = tv.tv_sec; + ts.tv_nsec = tv.tv_usec * 1000; + ts_ptr = &ts; } else { - tv_ptr = NULL; + ts_ptr = NULL; } - ret = get_errno(select(n, rfds_ptr, wfds_ptr, efds_ptr, tv_ptr)); + ret = get_errno(safe_pselect6(n, rfds_ptr, wfds_ptr, efds_ptr, + ts_ptr, NULL)); if (!is_error(ret)) { if (rfd_addr && copy_to_user_fdset(rfd_addr, &rfds, n)) @@ -1149,8 +1146,13 @@ static abi_long do_select(int n, if (efd_addr && copy_to_user_fdset(efd_addr, &efds, n)) return -TARGET_EFAULT; - if (target_tv_addr && copy_to_user_timeval(target_tv_addr, &tv)) - return -TARGET_EFAULT; + if (target_tv_addr) { + tv.tv_sec = ts.tv_sec; + tv.tv_usec = ts.tv_nsec / 1000; + if (copy_to_user_timeval(target_tv_addr, &tv)) { + return -TARGET_EFAULT; + } + } } return ret; @@ -7206,8 +7208,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, sig_ptr = NULL; } - ret = get_errno(sys_pselect6(n, rfds_ptr, wfds_ptr, efds_ptr, - ts_ptr, sig_ptr)); + ret = get_errno(safe_pselect6(n, rfds_ptr, wfds_ptr, efds_ptr, + ts_ptr, sig_ptr)); if (!is_error(ret)) { if (rfd_addr && copy_to_user_fdset(rfd_addr, &rfds, n)) -- cgit 1.4.1 From d509eeb13c9c6fef4a29ca43c64f591d8c61d201 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 12 May 2016 18:47:52 +0100 Subject: linux-user: Use safe_syscall for futex syscall Use the safe_syscall wrapper for the futex syscall. In particular, this fixes hangs when using programs that link against the Boehm garbage collector, including the Mono runtime. (We don't change the sys_futex() call in the implementation of the exit syscall, because as the FIXME comment there notes that should be handled by disabling signals, since we can't easily back out if the futex were to return ERESTARTSYS.) Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/syscall.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'linux-user/syscall.c') diff --git a/linux-user/syscall.c b/linux-user/syscall.c index c9c2ae9903..4e419fb18a 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -697,6 +697,8 @@ safe_syscall5(int, waitid, idtype_t, idtype, id_t, id, siginfo_t *, infop, \ safe_syscall3(int, execve, const char *, filename, char **, argv, char **, envp) safe_syscall6(int, pselect6, int, nfds, fd_set *, readfds, fd_set *, writefds, \ fd_set *, exceptfds, struct timespec *, timeout, void *, sig) +safe_syscall6(int,futex,int *,uaddr,int,op,int,val, \ + const struct timespec *,timeout,int *,uaddr2,int,val3) static inline int host_to_target_sock_type(int host_type) { @@ -5381,12 +5383,12 @@ static int do_futex(target_ulong uaddr, int op, int val, target_ulong timeout, } else { pts = NULL; } - return get_errno(sys_futex(g2h(uaddr), op, tswap32(val), + return get_errno(safe_futex(g2h(uaddr), op, tswap32(val), pts, NULL, val3)); case FUTEX_WAKE: - return get_errno(sys_futex(g2h(uaddr), op, val, NULL, NULL, 0)); + return get_errno(safe_futex(g2h(uaddr), op, val, NULL, NULL, 0)); case FUTEX_FD: - return get_errno(sys_futex(g2h(uaddr), op, val, NULL, NULL, 0)); + return get_errno(safe_futex(g2h(uaddr), op, val, NULL, NULL, 0)); case FUTEX_REQUEUE: case FUTEX_CMP_REQUEUE: case FUTEX_WAKE_OP: @@ -5396,11 +5398,11 @@ static int do_futex(target_ulong uaddr, int op, int val, target_ulong timeout, to satisfy the compiler. We do not need to tswap TIMEOUT since it's not compared to guest memory. */ pts = (struct timespec *)(uintptr_t) timeout; - return get_errno(sys_futex(g2h(uaddr), op, val, pts, - g2h(uaddr2), - (base_op == FUTEX_CMP_REQUEUE - ? tswap32(val3) - : val3))); + return get_errno(safe_futex(g2h(uaddr), op, val, pts, + g2h(uaddr2), + (base_op == FUTEX_CMP_REQUEUE + ? tswap32(val3) + : val3))); default: return -TARGET_ENOSYS; } -- cgit 1.4.1 From c7e35da348e2e4df072e6979c48fa5283e07d1db Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 19 May 2016 12:01:40 +0100 Subject: linux-user: Handle negative values in timespec conversion In a struct timespec, both fields are signed longs. Converting them from guest to host with code like host_ts->tv_sec = tswapal(target_ts->tv_sec); mishandles negative values if the guest has 32-bit longs and the host has 64-bit longs because tswapal()'s return type is abi_ulong: the assignment will zero-extend into the host long type rather than sign-extending it. Make the conversion routines use __get_user() and __set_user() instead: this automatically picks up the signedness of the field type and does the correct kind of sign or zero extension. It also handles the possibility that the target struct is not sufficiently aligned for the host's requirements. In particular, this fixes a hang when running the Linux Test Project mq_timedsend01 and mq_timedreceive01 tests: one of the test cases sets the timeout to -1 and expects an EINVAL failure, but we were setting a very long timeout instead. Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/syscall.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'linux-user/syscall.c') diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 4e419fb18a..6c4f5c6907 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -5194,8 +5194,8 @@ static inline abi_long target_to_host_timespec(struct timespec *host_ts, if (!lock_user_struct(VERIFY_READ, target_ts, target_addr, 1)) return -TARGET_EFAULT; - host_ts->tv_sec = tswapal(target_ts->tv_sec); - host_ts->tv_nsec = tswapal(target_ts->tv_nsec); + __get_user(host_ts->tv_sec, &target_ts->tv_sec); + __get_user(host_ts->tv_nsec, &target_ts->tv_nsec); unlock_user_struct(target_ts, target_addr, 0); return 0; } @@ -5207,8 +5207,8 @@ static inline abi_long host_to_target_timespec(abi_ulong target_addr, if (!lock_user_struct(VERIFY_WRITE, target_ts, target_addr, 0)) return -TARGET_EFAULT; - target_ts->tv_sec = tswapal(host_ts->tv_sec); - target_ts->tv_nsec = tswapal(host_ts->tv_nsec); + __put_user(host_ts->tv_sec, &target_ts->tv_sec); + __put_user(host_ts->tv_nsec, &target_ts->tv_nsec); unlock_user_struct(target_ts, target_addr, 1); return 0; } -- cgit 1.4.1 From 99874f65526ed7827202c6e17c62f30d47652bdd Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 20 May 2016 19:00:56 +0100 Subject: linux-user: Handle msgrcv error case correctly The msgrcv ABI is a bit odd -- the msgsz argument is a size_t, which is unsigned, but it must fail EINVAL if the value is negative when cast to a long. We were incorrectly passing the value through an "unsigned int", which meant that if the guest was 32-bit longs and the host was 64-bit longs an input of 0xffffffff (which should trigger EINVAL) would simply be passed to the host msgrcv() as 0xffffffff, where it does not cause the host kernel to reject it. Follow the same approach as do_msgsnd() in using a ssize_t and doing the check for negative values by hand, so we correctly fail in this corner case. This fixes the msgrcv03 Linux Test Project test case, which otherwise hangs. Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/syscall.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'linux-user/syscall.c') diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 6c4f5c6907..cec5b80331 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -3152,7 +3152,7 @@ static inline abi_long do_msgsnd(int msqid, abi_long msgp, } static inline abi_long do_msgrcv(int msqid, abi_long msgp, - unsigned int msgsz, abi_long msgtyp, + ssize_t msgsz, abi_long msgtyp, int msgflg) { struct target_msgbuf *target_mb; @@ -3160,6 +3160,10 @@ static inline abi_long do_msgrcv(int msqid, abi_long msgp, struct msgbuf *host_mb; abi_long ret = 0; + if (msgsz < 0) { + return -TARGET_EINVAL; + } + if (!lock_user_struct(VERIFY_WRITE, target_mb, msgp, 0)) return -TARGET_EFAULT; -- cgit 1.4.1 From 415d847110e3f8cd176160b92a5fdc56d8a20792 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 20 May 2016 19:00:57 +0100 Subject: linux-user: Use g_try_malloc() in do_msgrcv() In do_msgrcv() we want to allocate a message buffer, whose size is passed to us by the guest. That means we could legitimately fail, so use g_try_malloc() and handle the error case, in the same way that do_msgsnd() does. Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/syscall.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'linux-user/syscall.c') diff --git a/linux-user/syscall.c b/linux-user/syscall.c index cec5b80331..40e8742924 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -3167,7 +3167,11 @@ static inline abi_long do_msgrcv(int msqid, abi_long msgp, if (!lock_user_struct(VERIFY_WRITE, target_mb, msgp, 0)) return -TARGET_EFAULT; - host_mb = g_malloc(msgsz+sizeof(long)); + host_mb = g_try_malloc(msgsz + sizeof(long)); + if (!host_mb) { + ret = -TARGET_ENOMEM; + goto end; + } ret = get_errno(msgrcv(msqid, host_mb, msgsz, msgtyp, msgflg)); if (ret > 0) { -- cgit 1.4.1 From fd6f7798ac3066ad9e3956defd37521830197666 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 1 Mar 2016 16:33:02 +0000 Subject: linux-user: Use direct syscalls for setuid(), etc On Linux the setuid(), setgid(), etc system calls have different semantics from the libc functions. The libc functions follow POSIX and update the credentials for all threads in the process; the system calls update only the thread which makes the call. (This impedance mismatch is worked around in libc by signalling all threads to tell them to do a syscall, in a byzantine and fragile way; see http://ewontfix.com/17/.) Since in linux-user we are trying to emulate the system call semantics, we must implement all these syscalls to directly call the underlying host syscall, rather than calling the host libc function. Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/syscall.c | 58 +++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 12 deletions(-) (limited to 'linux-user/syscall.c') diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 40e8742924..df70255e5f 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -5099,6 +5099,40 @@ static inline int tswapid(int id) #endif /* USE_UID16 */ +/* We must do direct syscalls for setting UID/GID, because we want to + * implement the Linux system call semantics of "change only for this thread", + * not the libc/POSIX semantics of "change for all threads in process". + * (See http://ewontfix.com/17/ for more details.) + * We use the 32-bit version of the syscalls if present; if it is not + * then either the host architecture supports 32-bit UIDs natively with + * the standard syscall, or the 16-bit UID is the best we can do. + */ +#ifdef __NR_setuid32 +#define __NR_sys_setuid __NR_setuid32 +#else +#define __NR_sys_setuid __NR_setuid +#endif +#ifdef __NR_setgid32 +#define __NR_sys_setgid __NR_setgid32 +#else +#define __NR_sys_setgid __NR_setgid +#endif +#ifdef __NR_setresuid32 +#define __NR_sys_setresuid __NR_setresuid32 +#else +#define __NR_sys_setresuid __NR_setresuid +#endif +#ifdef __NR_setresgid32 +#define __NR_sys_setresgid __NR_setresgid32 +#else +#define __NR_sys_setresgid __NR_setresgid +#endif + +_syscall1(int, sys_setuid, uid_t, uid) +_syscall1(int, sys_setgid, gid_t, gid) +_syscall3(int, sys_setresuid, uid_t, ruid, uid_t, euid, uid_t, suid) +_syscall3(int, sys_setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid) + void syscall_init(void) { IOCTLEntry *ie; @@ -8834,9 +8868,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #endif #ifdef TARGET_NR_setresuid case TARGET_NR_setresuid: - ret = get_errno(setresuid(low2highuid(arg1), - low2highuid(arg2), - low2highuid(arg3))); + ret = get_errno(sys_setresuid(low2highuid(arg1), + low2highuid(arg2), + low2highuid(arg3))); break; #endif #ifdef TARGET_NR_getresuid @@ -8855,9 +8889,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #endif #ifdef TARGET_NR_getresgid case TARGET_NR_setresgid: - ret = get_errno(setresgid(low2highgid(arg1), - low2highgid(arg2), - low2highgid(arg3))); + ret = get_errno(sys_setresgid(low2highgid(arg1), + low2highgid(arg2), + low2highgid(arg3))); break; #endif #ifdef TARGET_NR_getresgid @@ -8883,10 +8917,10 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, break; #endif case TARGET_NR_setuid: - ret = get_errno(setuid(low2highuid(arg1))); + ret = get_errno(sys_setuid(low2highuid(arg1))); break; case TARGET_NR_setgid: - ret = get_errno(setgid(low2highgid(arg1))); + ret = get_errno(sys_setgid(low2highgid(arg1))); break; case TARGET_NR_setfsuid: ret = get_errno(setfsuid(arg1)); @@ -9168,7 +9202,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #endif #ifdef TARGET_NR_setresuid32 case TARGET_NR_setresuid32: - ret = get_errno(setresuid(arg1, arg2, arg3)); + ret = get_errno(sys_setresuid(arg1, arg2, arg3)); break; #endif #ifdef TARGET_NR_getresuid32 @@ -9187,7 +9221,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #endif #ifdef TARGET_NR_setresgid32 case TARGET_NR_setresgid32: - ret = get_errno(setresgid(arg1, arg2, arg3)); + ret = get_errno(sys_setresgid(arg1, arg2, arg3)); break; #endif #ifdef TARGET_NR_getresgid32 @@ -9214,12 +9248,12 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #endif #ifdef TARGET_NR_setuid32 case TARGET_NR_setuid32: - ret = get_errno(setuid(arg1)); + ret = get_errno(sys_setuid(arg1)); break; #endif #ifdef TARGET_NR_setgid32 case TARGET_NR_setgid32: - ret = get_errno(setgid(arg1)); + ret = get_errno(sys_setgid(arg1)); break; #endif #ifdef TARGET_NR_setfsuid32 -- cgit 1.4.1