summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--tools/virtiofsd/passthrough_ll.c224
-rw-r--r--tools/virtiofsd/passthrough_seccomp.c2
2 files changed, 150 insertions, 76 deletions
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 5fb36d9407..147b59338a 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -459,17 +459,17 @@ static void lo_map_remove(struct lo_map *map, size_t key)
 }
 
 /* Assumes lo->mutex is held */
-static ssize_t lo_add_fd_mapping(fuse_req_t req, int fd)
+static ssize_t lo_add_fd_mapping(struct lo_data *lo, int fd)
 {
     struct lo_map_elem *elem;
 
-    elem = lo_map_alloc_elem(&lo_data(req)->fd_map);
+    elem = lo_map_alloc_elem(&lo->fd_map);
     if (!elem) {
         return -1;
     }
 
     elem->fd = fd;
-    return elem - lo_data(req)->fd_map.elems;
+    return elem - lo->fd_map.elems;
 }
 
 /* Assumes lo->mutex is held */
@@ -555,6 +555,38 @@ static int lo_fd(fuse_req_t req, fuse_ino_t ino)
     return fd;
 }
 
+/*
+ * Open a file descriptor for an inode. Returns -EBADF if the inode is not a
+ * regular file or a directory.
+ *
+ * Use this helper function instead of raw openat(2) to prevent security issues
+ * when a malicious client opens special files such as block device nodes.
+ * Symlink inodes are also rejected since symlinks must already have been
+ * traversed on the client side.
+ */
+static int lo_inode_open(struct lo_data *lo, struct lo_inode *inode,
+                         int open_flags)
+{
+    g_autofree char *fd_str = g_strdup_printf("%d", inode->fd);
+    int fd;
+
+    if (!S_ISREG(inode->filetype) && !S_ISDIR(inode->filetype)) {
+        return -EBADF;
+    }
+
+    /*
+     * The file is a symlink so O_NOFOLLOW must be ignored. We checked earlier
+     * that the inode is not a special file but if an external process races
+     * with us then symlinks are traversed here. It is not possible to escape
+     * the shared directory since it is mounted as "/" though.
+     */
+    fd = openat(lo->proc_self_fd, fd_str, open_flags & ~O_NOFOLLOW);
+    if (fd < 0) {
+        return -errno;
+    }
+    return fd;
+}
+
 static void lo_init(void *userdata, struct fuse_conn_info *conn)
 {
     struct lo_data *lo = (struct lo_data *)userdata;
@@ -684,9 +716,9 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
         if (fi) {
             truncfd = fd;
         } else {
-            sprintf(procname, "%i", ifd);
-            truncfd = openat(lo->proc_self_fd, procname, O_RDWR);
+            truncfd = lo_inode_open(lo, inode, O_RDWR);
             if (truncfd < 0) {
+                errno = -truncfd;
                 goto out_err;
             }
         }
@@ -831,11 +863,13 @@ static int do_statx(struct lo_data *lo, int dirfd, const char *pathname,
 }
 
 /*
- * Increments nlookup and caller must release refcount using
- * lo_inode_put(&parent).
+ * Increments nlookup on the inode on success. unref_inode_lolocked() must be
+ * called eventually to decrement nlookup again. If inodep is non-NULL, the
+ * inode pointer is stored and the caller must call lo_inode_put().
  */
 static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
-                        struct fuse_entry_param *e)
+                        struct fuse_entry_param *e,
+                        struct lo_inode **inodep)
 {
     int newfd;
     int res;
@@ -845,6 +879,10 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
     struct lo_inode *inode = NULL;
     struct lo_inode *dir = lo_inode(req, parent);
 
+    if (inodep) {
+        *inodep = NULL; /* in case there is an error */
+    }
+
     /*
      * name_to_handle_at() and open_by_handle_at() can reach here with fuse
      * mount point in guest, but we don't have its inode info in the
@@ -913,7 +951,14 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
         pthread_mutex_unlock(&lo->mutex);
     }
     e->ino = inode->fuse_ino;
-    lo_inode_put(lo, &inode);
+
+    /* Transfer ownership of inode pointer to caller or drop it */
+    if (inodep) {
+        *inodep = inode;
+    } else {
+        lo_inode_put(lo, &inode);
+    }
+
     lo_inode_put(lo, &dir);
 
     fuse_log(FUSE_LOG_DEBUG, "  %lli/%s -> %lli\n", (unsigned long long)parent,
@@ -948,7 +993,7 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name)
         return;
     }
 
-    err = lo_do_lookup(req, parent, name, &e);
+    err = lo_do_lookup(req, parent, name, &e, NULL);
     if (err) {
         fuse_reply_err(req, err);
     } else {
@@ -1056,7 +1101,7 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent,
         goto out;
     }
 
-    saverr = lo_do_lookup(req, parent, name, &e);
+    saverr = lo_do_lookup(req, parent, name, &e, NULL);
     if (saverr) {
         goto out;
     }
@@ -1534,7 +1579,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
 
         if (plus) {
             if (!is_dot_or_dotdot(name)) {
-                err = lo_do_lookup(req, ino, name, &e);
+                err = lo_do_lookup(req, ino, name, &e, NULL);
                 if (err) {
                     goto error;
                 }
@@ -1651,12 +1696,52 @@ static void update_open_flags(int writeback, int allow_direct_io,
     }
 }
 
+/*
+ * Open a regular file, set up an fd mapping, and fill out the struct
+ * fuse_file_info for it. If existing_fd is not negative, use that fd instead
+ * opening a new one. Takes ownership of existing_fd.
+ *
+ * Returns 0 on success or a positive errno.
+ */
+static int lo_do_open(struct lo_data *lo, struct lo_inode *inode,
+                      int existing_fd, struct fuse_file_info *fi)
+{
+    ssize_t fh;
+    int fd = existing_fd;
+
+    update_open_flags(lo->writeback, lo->allow_direct_io, fi);
+
+    if (fd < 0) {
+        fd = lo_inode_open(lo, inode, fi->flags);
+        if (fd < 0) {
+            return -fd;
+        }
+    }
+
+    pthread_mutex_lock(&lo->mutex);
+    fh = lo_add_fd_mapping(lo, fd);
+    pthread_mutex_unlock(&lo->mutex);
+    if (fh == -1) {
+        close(fd);
+        return ENOMEM;
+    }
+
+    fi->fh = fh;
+    if (lo->cache == CACHE_NONE) {
+        fi->direct_io = 1;
+    } else if (lo->cache == CACHE_ALWAYS) {
+        fi->keep_cache = 1;
+    }
+    return 0;
+}
+
 static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
                       mode_t mode, struct fuse_file_info *fi)
 {
-    int fd;
+    int fd = -1;
     struct lo_data *lo = lo_data(req);
     struct lo_inode *parent_inode;
+    struct lo_inode *inode = NULL;
     struct fuse_entry_param e;
     int err;
     struct lo_cred old = {};
@@ -1682,36 +1767,38 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
 
     update_open_flags(lo->writeback, lo->allow_direct_io, fi);
 
-    fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW,
-                mode);
+    /* Try to create a new file but don't open existing files */
+    fd = openat(parent_inode->fd, name, fi->flags | O_CREAT | O_EXCL, mode);
     err = fd == -1 ? errno : 0;
-    lo_restore_cred(&old);
 
-    if (!err) {
-        ssize_t fh;
+    lo_restore_cred(&old);
 
-        pthread_mutex_lock(&lo->mutex);
-        fh = lo_add_fd_mapping(req, fd);
-        pthread_mutex_unlock(&lo->mutex);
-        if (fh == -1) {
-            close(fd);
-            err = ENOMEM;
-            goto out;
-        }
+    /* Ignore the error if file exists and O_EXCL was not given */
+    if (err && (err != EEXIST || (fi->flags & O_EXCL))) {
+        goto out;
+    }
 
-        fi->fh = fh;
-        err = lo_do_lookup(req, parent, name, &e);
+    err = lo_do_lookup(req, parent, name, &e, &inode);
+    if (err) {
+        goto out;
     }
-    if (lo->cache == CACHE_NONE) {
-        fi->direct_io = 1;
-    } else if (lo->cache == CACHE_ALWAYS) {
-        fi->keep_cache = 1;
+
+    err = lo_do_open(lo, inode, fd, fi);
+    fd = -1; /* lo_do_open() takes ownership of fd */
+    if (err) {
+        /* Undo lo_do_lookup() nlookup ref */
+        unref_inode_lolocked(lo, inode, 1);
     }
 
 out:
+    lo_inode_put(lo, &inode);
     lo_inode_put(lo, &parent_inode);
 
     if (err) {
+        if (fd >= 0) {
+            close(fd);
+        }
+
         fuse_reply_err(req, err);
     } else {
         fuse_reply_create(req, &e, fi);
@@ -1725,7 +1812,6 @@ static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo,
                                                       pid_t pid, int *err)
 {
     struct lo_inode_plock *plock;
-    char procname[64];
     int fd;
 
     plock =
@@ -1742,12 +1828,10 @@ static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo,
     }
 
     /* Open another instance of file which can be used for ofd locks. */
-    sprintf(procname, "%i", inode->fd);
-
     /* TODO: What if file is not writable? */
-    fd = openat(lo->proc_self_fd, procname, O_RDWR);
-    if (fd == -1) {
-        *err = errno;
+    fd = lo_inode_open(lo, inode, O_RDWR);
+    if (fd < 0) {
+        *err = -fd;
         free(plock);
         return NULL;
     }
@@ -1892,38 +1976,25 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync,
 
 static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
 {
-    int fd;
-    ssize_t fh;
-    char buf[64];
     struct lo_data *lo = lo_data(req);
+    struct lo_inode *inode = lo_inode(req, ino);
+    int err;
 
     fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino,
              fi->flags);
 
-    update_open_flags(lo->writeback, lo->allow_direct_io, fi);
-
-    sprintf(buf, "%i", lo_fd(req, ino));
-    fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW);
-    if (fd == -1) {
-        return (void)fuse_reply_err(req, errno);
-    }
-
-    pthread_mutex_lock(&lo->mutex);
-    fh = lo_add_fd_mapping(req, fd);
-    pthread_mutex_unlock(&lo->mutex);
-    if (fh == -1) {
-        close(fd);
-        fuse_reply_err(req, ENOMEM);
+    if (!inode) {
+        fuse_reply_err(req, EBADF);
         return;
     }
 
-    fi->fh = fh;
-    if (lo->cache == CACHE_NONE) {
-        fi->direct_io = 1;
-    } else if (lo->cache == CACHE_ALWAYS) {
-        fi->keep_cache = 1;
+    err = lo_do_open(lo, inode, -1, fi);
+    lo_inode_put(lo, &inode);
+    if (err) {
+        fuse_reply_err(req, err);
+    } else {
+        fuse_reply_open(req, fi);
     }
-    fuse_reply_open(req, fi);
 }
 
 static void lo_release(fuse_req_t req, fuse_ino_t ino,
@@ -1982,39 +2053,40 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
 static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync,
                      struct fuse_file_info *fi)
 {
+    struct lo_inode *inode = lo_inode(req, ino);
+    struct lo_data *lo = lo_data(req);
     int res;
     int fd;
-    char *buf;
 
     fuse_log(FUSE_LOG_DEBUG, "lo_fsync(ino=%" PRIu64 ", fi=0x%p)\n", ino,
              (void *)fi);
 
-    if (!fi) {
-        struct lo_data *lo = lo_data(req);
-
-        res = asprintf(&buf, "%i", lo_fd(req, ino));
-        if (res == -1) {
-            return (void)fuse_reply_err(req, errno);
-        }
+    if (!inode) {
+        fuse_reply_err(req, EBADF);
+        return;
+    }
 
-        fd = openat(lo->proc_self_fd, buf, O_RDWR);
-        free(buf);
-        if (fd == -1) {
-            return (void)fuse_reply_err(req, errno);
+    if (!fi) {
+        fd = lo_inode_open(lo, inode, O_RDWR);
+        if (fd < 0) {
+            res = -fd;
+            goto out;
         }
     } else {
         fd = lo_fi_fd(req, fi);
     }
 
     if (datasync) {
-        res = fdatasync(fd);
+        res = fdatasync(fd) == -1 ? errno : 0;
     } else {
-        res = fsync(fd);
+        res = fsync(fd) == -1 ? errno : 0;
     }
     if (!fi) {
         close(fd);
     }
-    fuse_reply_err(req, res == -1 ? errno : 0);
+out:
+    lo_inode_put(lo, &inode);
+    fuse_reply_err(req, res);
 }
 
 static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset,
diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c
index a60d7da4b4..ea852e2e33 100644
--- a/tools/virtiofsd/passthrough_seccomp.c
+++ b/tools/virtiofsd/passthrough_seccomp.c
@@ -65,6 +65,7 @@ static const int syscall_whitelist[] = {
     SCMP_SYS(linkat),
     SCMP_SYS(listxattr),
     SCMP_SYS(lseek),
+    SCMP_SYS(_llseek), /* For POWER */
     SCMP_SYS(madvise),
     SCMP_SYS(mkdirat),
     SCMP_SYS(mknodat),
@@ -88,6 +89,7 @@ static const int syscall_whitelist[] = {
     SCMP_SYS(renameat),
     SCMP_SYS(renameat2),
     SCMP_SYS(removexattr),
+    SCMP_SYS(restart_syscall),
     SCMP_SYS(rt_sigaction),
     SCMP_SYS(rt_sigprocmask),
     SCMP_SYS(rt_sigreturn),