summary refs log tree commit diff stats
path: root/linux-user/syscall.c
diff options
context:
space:
mode:
Diffstat (limited to 'linux-user/syscall.c')
-rw-r--r--linux-user/syscall.c241
1 files changed, 187 insertions, 54 deletions
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index ca06943f3b..7aa2c1d720 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -112,8 +112,56 @@ int __clone2(int (*fn)(void *), void *child_stack_base,
 
 #include "qemu.h"
 
-#define CLONE_NPTL_FLAGS2 (CLONE_SETTLS | \
-    CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID)
+#ifndef CLONE_IO
+#define CLONE_IO                0x80000000      /* Clone io context */
+#endif
+
+/* We can't directly call the host clone syscall, because this will
+ * badly confuse libc (breaking mutexes, for example). So we must
+ * divide clone flags into:
+ *  * flag combinations that look like pthread_create()
+ *  * flag combinations that look like fork()
+ *  * flags we can implement within QEMU itself
+ *  * flags we can't support and will return an error for
+ */
+/* For thread creation, all these flags must be present; for
+ * fork, none must be present.
+ */
+#define CLONE_THREAD_FLAGS                              \
+    (CLONE_VM | CLONE_FS | CLONE_FILES |                \
+     CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM)
+
+/* These flags are ignored:
+ * CLONE_DETACHED is now ignored by the kernel;
+ * CLONE_IO is just an optimisation hint to the I/O scheduler
+ */
+#define CLONE_IGNORED_FLAGS                     \
+    (CLONE_DETACHED | CLONE_IO)
+
+/* Flags for fork which we can implement within QEMU itself */
+#define CLONE_OPTIONAL_FORK_FLAGS               \
+    (CLONE_SETTLS | CLONE_PARENT_SETTID |       \
+     CLONE_CHILD_CLEARTID | CLONE_CHILD_SETTID)
+
+/* Flags for thread creation which we can implement within QEMU itself */
+#define CLONE_OPTIONAL_THREAD_FLAGS                             \
+    (CLONE_SETTLS | CLONE_PARENT_SETTID |                       \
+     CLONE_CHILD_CLEARTID | CLONE_CHILD_SETTID | CLONE_PARENT)
+
+#define CLONE_INVALID_FORK_FLAGS                                        \
+    (~(CSIGNAL | CLONE_OPTIONAL_FORK_FLAGS | CLONE_IGNORED_FLAGS))
+
+#define CLONE_INVALID_THREAD_FLAGS                                      \
+    (~(CSIGNAL | CLONE_THREAD_FLAGS | CLONE_OPTIONAL_THREAD_FLAGS |     \
+       CLONE_IGNORED_FLAGS))
+
+/* CLONE_VFORK is special cased early in do_fork(). The other flag bits
+ * have almost all been allocated. We cannot support any of
+ * CLONE_NEWNS, CLONE_NEWCGROUP, CLONE_NEWUTS, CLONE_NEWIPC,
+ * CLONE_NEWUSER, CLONE_NEWPID, CLONE_NEWNET, CLONE_PTRACE, CLONE_UNTRACED.
+ * The checks against the invalid thread masks above will catch these.
+ * (The one remaining unallocated bit is 0x1000 which used to be CLONE_PID.)
+ */
 
 //#define DEBUG
 /* Define DEBUG_ERESTARTSYS to force every syscall to be restarted
@@ -520,16 +568,7 @@ static int sys_getcwd1(char *buf, size_t size)
 }
 
 #ifdef TARGET_NR_utimensat
-#ifdef CONFIG_UTIMENSAT
-static int sys_utimensat(int dirfd, const char *pathname,
-    const struct timespec times[2], int flags)
-{
-    if (pathname == NULL)
-        return futimens(dirfd, times);
-    else
-        return utimensat(dirfd, pathname, times, flags);
-}
-#elif defined(__NR_utimensat)
+#if defined(__NR_utimensat)
 #define __NR_sys_utimensat __NR_utimensat
 _syscall4(int,sys_utimensat,int,dirfd,const char *,pathname,
           const struct timespec *,tsp,int,flags)
@@ -1405,6 +1444,29 @@ static abi_long do_select(int n,
 
     return ret;
 }
+
+#if defined(TARGET_WANT_OLD_SYS_SELECT)
+static abi_long do_old_select(abi_ulong arg1)
+{
+    struct target_sel_arg_struct *sel;
+    abi_ulong inp, outp, exp, tvp;
+    long nsel;
+
+    if (!lock_user_struct(VERIFY_READ, sel, arg1, 1)) {
+        return -TARGET_EFAULT;
+    }
+
+    nsel = tswapal(sel->n);
+    inp = tswapal(sel->inp);
+    outp = tswapal(sel->outp);
+    exp = tswapal(sel->exp);
+    tvp = tswapal(sel->tvp);
+
+    unlock_user_struct(sel, arg1, 0);
+
+    return do_select(nsel, inp, outp, exp, tvp);
+}
+#endif
 #endif
 
 static abi_long do_pipe2(int host_pipe[], int flags)
@@ -3119,7 +3181,7 @@ static abi_long do_getsockopt(int sockfd, int level, int optname,
 }
 
 static struct iovec *lock_iovec(int type, abi_ulong target_addr,
-                                int count, int copy)
+                                abi_ulong count, int copy)
 {
     struct target_iovec *target_vec;
     struct iovec *vec;
@@ -3132,7 +3194,7 @@ static struct iovec *lock_iovec(int type, abi_ulong target_addr,
         errno = 0;
         return NULL;
     }
-    if (count < 0 || count > IOV_MAX) {
+    if (count > IOV_MAX) {
         errno = EINVAL;
         return NULL;
     }
@@ -3207,7 +3269,7 @@ static struct iovec *lock_iovec(int type, abi_ulong target_addr,
 }
 
 static void unlock_iovec(struct iovec *vec, abi_ulong target_addr,
-                         int count, int copy)
+                         abi_ulong count, int copy)
 {
     struct target_iovec *target_vec;
     int i;
@@ -3462,7 +3524,7 @@ static abi_long do_sendrecvmsg_locked(int fd, struct target_msghdr *msgp,
 {
     abi_long ret, len;
     struct msghdr msg;
-    int count;
+    abi_ulong count;
     struct iovec *vec;
     abi_ulong target_vec;
 
@@ -3472,7 +3534,14 @@ static abi_long do_sendrecvmsg_locked(int fd, struct target_msghdr *msgp,
         ret = target_to_host_sockaddr(fd, msg.msg_name,
                                       tswapal(msgp->msg_name),
                                       msg.msg_namelen);
-        if (ret) {
+        if (ret == -TARGET_EFAULT) {
+            /* For connected sockets msg_name and msg_namelen must
+             * be ignored, so returning EFAULT immediately is wrong.
+             * Instead, pass a bad msg_name to the host kernel, and
+             * let it decide whether to return EFAULT or not.
+             */
+            msg.msg_name = (void *)-1;
+        } else if (ret) {
             goto out2;
         }
     } else {
@@ -3485,6 +3554,15 @@ static abi_long do_sendrecvmsg_locked(int fd, struct target_msghdr *msgp,
 
     count = tswapal(msgp->msg_iovlen);
     target_vec = tswapal(msgp->msg_iov);
+
+    if (count > IOV_MAX) {
+        /* sendrcvmsg returns a different errno for this condition than
+         * readv/writev, so we must catch it here before lock_iovec() does.
+         */
+        ret = -TARGET_EMSGSIZE;
+        goto out2;
+    }
+
     vec = lock_iovec(send ? VERIFY_READ : VERIFY_WRITE,
                      target_vec, count, send);
     if (vec == NULL) {
@@ -3525,7 +3603,7 @@ static abi_long do_sendrecvmsg_locked(int fd, struct target_msghdr *msgp,
             }
             if (!is_error(ret)) {
                 msgp->msg_namelen = tswap32(msg.msg_namelen);
-                if (msg.msg_name != NULL) {
+                if (msg.msg_name != NULL && msg.msg_name != (void *)-1) {
                     ret = host_to_target_sockaddr(tswapal(msgp->msg_name),
                                     msg.msg_name, msg.msg_namelen);
                     if (ret) {
@@ -4568,12 +4646,34 @@ static inline abi_long do_shmctl(int shmid, int cmd, abi_long buf)
     return ret;
 }
 
-static inline abi_ulong do_shmat(int shmid, abi_ulong shmaddr, int shmflg)
+#ifndef TARGET_FORCE_SHMLBA
+/* For most architectures, SHMLBA is the same as the page size;
+ * some architectures have larger values, in which case they should
+ * define TARGET_FORCE_SHMLBA and provide a target_shmlba() function.
+ * This corresponds to the kernel arch code defining __ARCH_FORCE_SHMLBA
+ * and defining its own value for SHMLBA.
+ *
+ * The kernel also permits SHMLBA to be set by the architecture to a
+ * value larger than the page size without setting __ARCH_FORCE_SHMLBA;
+ * this means that addresses are rounded to the large size if
+ * SHM_RND is set but addresses not aligned to that size are not rejected
+ * as long as they are at least page-aligned. Since the only architecture
+ * which uses this is ia64 this code doesn't provide for that oddity.
+ */
+static inline abi_ulong target_shmlba(CPUArchState *cpu_env)
+{
+    return TARGET_PAGE_SIZE;
+}
+#endif
+
+static inline abi_ulong do_shmat(CPUArchState *cpu_env,
+                                 int shmid, abi_ulong shmaddr, int shmflg)
 {
     abi_long raddr;
     void *host_raddr;
     struct shmid_ds shm_info;
     int i,ret;
+    abi_ulong shmlba;
 
     /* find out the length of the shared memory segment */
     ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info));
@@ -4582,6 +4682,16 @@ static inline abi_ulong do_shmat(int shmid, abi_ulong shmaddr, int shmflg)
         return ret;
     }
 
+    shmlba = target_shmlba(cpu_env);
+
+    if (shmaddr & (shmlba - 1)) {
+        if (shmflg & SHM_RND) {
+            shmaddr &= ~(shmlba - 1);
+        } else {
+            return -TARGET_EINVAL;
+        }
+    }
+
     mmap_lock();
 
     if (shmaddr)
@@ -4640,7 +4750,8 @@ static inline abi_long do_shmdt(abi_ulong shmaddr)
 #ifdef TARGET_NR_ipc
 /* ??? This only works with linear mappings.  */
 /* do_ipc() must return target values and target errnos. */
-static abi_long do_ipc(unsigned int call, abi_long first,
+static abi_long do_ipc(CPUArchState *cpu_env,
+                       unsigned int call, abi_long first,
                        abi_long second, abi_long third,
                        abi_long ptr, abi_long fifth)
 {
@@ -4709,7 +4820,7 @@ static abi_long do_ipc(unsigned int call, abi_long first,
         default:
         {
             abi_ulong raddr;
-            raddr = do_shmat(first, ptr, second);
+            raddr = do_shmat(cpu_env, first, ptr, second);
             if (is_error(raddr))
                 return get_errno(raddr);
             if (put_user_ual(raddr, third))
@@ -4994,13 +5105,18 @@ static abi_long do_ioctl_dm(const IOCTLEntry *ie, uint8_t *buf_temp, int fd,
 
     guest_data = arg + host_dm->data_start;
     if ((guest_data - arg) < 0) {
-        ret = -EINVAL;
+        ret = -TARGET_EINVAL;
         goto out;
     }
     guest_data_size = host_dm->data_size - host_dm->data_start;
     host_data = (char*)host_dm + host_dm->data_start;
 
     argptr = lock_user(VERIFY_READ, guest_data, guest_data_size, 1);
+    if (!argptr) {
+        ret = -TARGET_EFAULT;
+        goto out;
+    }
+
     switch (ie->host_cmd) {
     case DM_REMOVE_ALL:
     case DM_LIST_DEVICES:
@@ -5966,9 +6082,10 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp,
     TaskState *ts;
     CPUState *new_cpu;
     CPUArchState *new_env;
-    unsigned int nptl_flags;
     sigset_t sigmask;
 
+    flags &= ~CLONE_IGNORED_FLAGS;
+
     /* Emulate vfork() with fork() */
     if (flags & CLONE_VFORK)
         flags &= ~(CLONE_VFORK | CLONE_VM);
@@ -5978,6 +6095,11 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp,
         new_thread_info info;
         pthread_attr_t attr;
 
+        if (((flags & CLONE_THREAD_FLAGS) != CLONE_THREAD_FLAGS) ||
+            (flags & CLONE_INVALID_THREAD_FLAGS)) {
+            return -TARGET_EINVAL;
+        }
+
         ts = g_new0(TaskState, 1);
         init_task_state(ts);
         /* we create a new CPU instance. */
@@ -5989,15 +6111,14 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp,
         ts->bprm = parent_ts->bprm;
         ts->info = parent_ts->info;
         ts->signal_mask = parent_ts->signal_mask;
-        nptl_flags = flags;
-        flags &= ~CLONE_NPTL_FLAGS2;
 
-        if (nptl_flags & CLONE_CHILD_CLEARTID) {
+        if (flags & CLONE_CHILD_CLEARTID) {
             ts->child_tidptr = child_tidptr;
         }
 
-        if (nptl_flags & CLONE_SETTLS)
+        if (flags & CLONE_SETTLS) {
             cpu_set_tls (new_env, newtls);
+        }
 
         /* Grab a mutex so that thread setup appears atomic.  */
         pthread_mutex_lock(&clone_lock);
@@ -6007,10 +6128,12 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp,
         pthread_mutex_lock(&info.mutex);
         pthread_cond_init(&info.cond, NULL);
         info.env = new_env;
-        if (nptl_flags & CLONE_CHILD_SETTID)
+        if (flags & CLONE_CHILD_SETTID) {
             info.child_tidptr = child_tidptr;
-        if (nptl_flags & CLONE_PARENT_SETTID)
+        }
+        if (flags & CLONE_PARENT_SETTID) {
             info.parent_tidptr = parent_tidptr;
+        }
 
         ret = pthread_attr_init(&attr);
         ret = pthread_attr_setstacksize(&attr, NEW_STACK_SIZE);
@@ -6029,8 +6152,6 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp,
             /* Wait for the child to initialize.  */
             pthread_cond_wait(&info.cond, &info.mutex);
             ret = info.tid;
-            if (flags & CLONE_PARENT_SETTID)
-                put_user_u32(ret, parent_tidptr);
         } else {
             ret = -1;
         }
@@ -6040,7 +6161,12 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp,
         pthread_mutex_unlock(&clone_lock);
     } else {
         /* if no CLONE_VM, we consider it is a fork */
-        if ((flags & ~(CSIGNAL | CLONE_NPTL_FLAGS2)) != 0) {
+        if (flags & CLONE_INVALID_FORK_FLAGS) {
+            return -TARGET_EINVAL;
+        }
+
+        /* We can't support custom termination signals */
+        if ((flags & CSIGNAL) != TARGET_SIGCHLD) {
             return -TARGET_EINVAL;
         }
 
@@ -8565,24 +8691,15 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         break;
 #if defined(TARGET_NR_select)
     case TARGET_NR_select:
-#if defined(TARGET_S390X) || defined(TARGET_ALPHA)
-        ret = do_select(arg1, arg2, arg3, arg4, arg5);
+#if defined(TARGET_WANT_NI_OLD_SELECT)
+        /* some architectures used to have old_select here
+         * but now ENOSYS it.
+         */
+        ret = -TARGET_ENOSYS;
+#elif defined(TARGET_WANT_OLD_SYS_SELECT)
+        ret = do_old_select(arg1);
 #else
-        {
-            struct target_sel_arg_struct *sel;
-            abi_ulong inp, outp, exp, tvp;
-            long nsel;
-
-            if (!lock_user_struct(VERIFY_READ, sel, arg1, 1))
-                goto efault;
-            nsel = tswapal(sel->n);
-            inp = tswapal(sel->inp);
-            outp = tswapal(sel->outp);
-            exp = tswapal(sel->exp);
-            tvp = tswapal(sel->tvp);
-            unlock_user_struct(sel, arg1, 0);
-            ret = do_select(nsel, inp, outp, exp, tvp);
-        }
+        ret = do_select(arg1, arg2, arg3, arg4, arg5);
 #endif
         break;
 #endif
@@ -9292,8 +9409,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         break;
 #ifdef TARGET_NR_ipc
     case TARGET_NR_ipc:
-	ret = do_ipc(arg1, arg2, arg3, arg4, arg5, arg6);
-	break;
+        ret = do_ipc(cpu_env, arg1, arg2, arg3, arg4, arg5, arg6);
+        break;
 #endif
 #ifdef TARGET_NR_semget
     case TARGET_NR_semget:
@@ -9342,7 +9459,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 #endif
 #ifdef TARGET_NR_shmat
     case TARGET_NR_shmat:
-        ret = do_shmat(arg1, arg2, arg3);
+        ret = do_shmat(cpu_env, arg1, arg2, arg3);
         break;
 #endif
 #ifdef TARGET_NR_shmdt
@@ -9654,6 +9771,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
             pfd = NULL;
             target_pfd = NULL;
             if (nfds) {
+                if (nfds > (INT_MAX / sizeof(struct target_pollfd))) {
+                    ret = -TARGET_EINVAL;
+                    break;
+                }
+
                 target_pfd = lock_user(VERIFY_WRITE, arg1,
                                        sizeof(struct target_pollfd) * nfds, 1);
                 if (!target_pfd) {
@@ -10527,7 +10649,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
                     info.si_code = si_code;
                     info._sifields._sigfault._addr
                         = ((CPUArchState *)cpu_env)->pc;
-                    queue_signal((CPUArchState *)cpu_env, info.si_signo, &info);
+                    queue_signal((CPUArchState *)cpu_env, info.si_signo,
+                                 QEMU_SI_FAULT, &info);
                 }
             }
             break;
@@ -11259,6 +11382,10 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 
     case TARGET_NR_mq_unlink:
         p = lock_user_string(arg1 - 1);
+        if (!p) {
+            ret = -TARGET_EFAULT;
+            break;
+        }
         ret = get_errno(mq_unlink(p));
         unlock_user (p, arg1, 0);
         break;
@@ -11494,6 +11621,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         int maxevents = arg3;
         int timeout = arg4;
 
+        if (maxevents <= 0 || maxevents > TARGET_EP_MAX_EVENTS) {
+            ret = -TARGET_EINVAL;
+            break;
+        }
+
         target_ep = lock_user(VERIFY_WRITE, arg2,
                               maxevents * sizeof(struct target_epoll_event), 1);
         if (!target_ep) {
@@ -11606,7 +11738,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
             info.si_errno = 0;
             info.si_code = TARGET_SEGV_MAPERR;
             info._sifields._sigfault._addr = arg6;
-            queue_signal((CPUArchState *)cpu_env, info.si_signo, &info);
+            queue_signal((CPUArchState *)cpu_env, info.si_signo,
+                         QEMU_SI_FAULT, &info);
             ret = 0xdeadbeef;
 
         }