diff options
Diffstat (limited to 'tools/virtiofsd/passthrough_ll.c')
| -rw-r--r-- | tools/virtiofsd/passthrough_ll.c | 467 |
1 files changed, 438 insertions, 29 deletions
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c index b3d0674f6d..dfa2fc250d 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c @@ -173,10 +173,15 @@ struct lo_data { /* An O_PATH file descriptor to /proc/self/fd/ */ int proc_self_fd; + /* An O_PATH file descriptor to /proc/self/task/ */ + int proc_self_task; int user_killpriv_v2, killpriv_v2; /* If set, virtiofsd is responsible for setting umask during creation */ bool change_umask; int user_posix_acl, posix_acl; + /* Keeps track if /proc/<pid>/attr/fscreate should be used or not */ + bool use_fscreate; + int user_security_label; }; static const struct fuse_opt lo_opts[] = { @@ -211,6 +216,8 @@ static const struct fuse_opt lo_opts[] = { { "no_killpriv_v2", offsetof(struct lo_data, user_killpriv_v2), 0 }, { "posix_acl", offsetof(struct lo_data, user_posix_acl), 1 }, { "no_posix_acl", offsetof(struct lo_data, user_posix_acl), 0 }, + { "security_label", offsetof(struct lo_data, user_security_label), 1 }, + { "no_security_label", offsetof(struct lo_data, user_security_label), 0 }, FUSE_OPT_END }; static bool use_syslog = false; @@ -230,6 +237,11 @@ static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st, static int xattr_map_client(const struct lo_data *lo, const char *client_name, char **out_name); +#define FCHDIR_NOFAIL(fd) do { \ + int fchdir_res = fchdir(fd); \ + assert(fchdir_res == 0); \ + } while (0) + static bool is_dot_or_dotdot(const char *name) { return name[0] == '.' && @@ -257,6 +269,70 @@ static struct lo_data *lo_data(fuse_req_t req) } /* + * Tries to figure out if /proc/<pid>/attr/fscreate is usable or not. With + * selinux=0, read from fscreate returns -EINVAL. + * + * TODO: Link with libselinux and use is_selinux_enabled() instead down + * the line. It probably will be more reliable indicator. + */ +static bool is_fscreate_usable(struct lo_data *lo) +{ + char procname[64]; + int fscreate_fd; + size_t bytes_read; + + sprintf(procname, "%ld/attr/fscreate", syscall(SYS_gettid)); + fscreate_fd = openat(lo->proc_self_task, procname, O_RDWR); + if (fscreate_fd == -1) { + return false; + } + + bytes_read = read(fscreate_fd, procname, 64); + close(fscreate_fd); + if (bytes_read == -1) { + return false; + } + return true; +} + +/* Helpers to set/reset fscreate */ +static int open_set_proc_fscreate(struct lo_data *lo, const void *ctx, + size_t ctxlen, int *fd) +{ + char procname[64]; + int fscreate_fd, err = 0; + size_t written; + + sprintf(procname, "%ld/attr/fscreate", syscall(SYS_gettid)); + fscreate_fd = openat(lo->proc_self_task, procname, O_WRONLY); + err = fscreate_fd == -1 ? errno : 0; + if (err) { + return err; + } + + written = write(fscreate_fd, ctx, ctxlen); + err = written == -1 ? errno : 0; + if (err) { + goto out; + } + + *fd = fscreate_fd; + return 0; +out: + close(fscreate_fd); + return err; +} + +static void close_reset_proc_fscreate(int fd) +{ + if ((write(fd, NULL, 0)) == -1) { + fuse_log(FUSE_LOG_WARNING, "Failed to reset fscreate. err=%d\n", errno); + } + close(fd); + return; +} + +/* * Load capng's state from our saved state if the current thread * hadn't previously been loaded. * returns 0 on success @@ -735,6 +811,17 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling posix_acl\n"); conn->want &= ~FUSE_CAP_POSIX_ACL; } + + if (lo->user_security_label == 1) { + if (!(conn->capable & FUSE_CAP_SECURITY_CTX)) { + fuse_log(FUSE_LOG_ERR, "lo_init: Can not enable security label." + " kernel does not support FUSE_SECURITY_CTX capability.\n"); + } + conn->want |= FUSE_CAP_SECURITY_CTX; + } else { + fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling security label\n"); + conn->want &= ~FUSE_CAP_SECURITY_CTX; + } } static void lo_getattr(fuse_req_t req, fuse_ino_t ino, @@ -1284,16 +1371,103 @@ static void lo_restore_cred_gain_cap(struct lo_cred *old, bool restore_umask, } } +static int do_mknod_symlink_secctx(fuse_req_t req, struct lo_inode *dir, + const char *name, const char *secctx_name) +{ + int path_fd, err; + char procname[64]; + struct lo_data *lo = lo_data(req); + + if (!req->secctx.ctxlen) { + return 0; + } + + /* Open newly created element with O_PATH */ + path_fd = openat(dir->fd, name, O_PATH | O_NOFOLLOW); + err = path_fd == -1 ? errno : 0; + if (err) { + return err; + } + sprintf(procname, "%i", path_fd); + FCHDIR_NOFAIL(lo->proc_self_fd); + /* Set security context. This is not atomic w.r.t file creation */ + err = setxattr(procname, secctx_name, req->secctx.ctx, req->secctx.ctxlen, + 0); + if (err) { + err = errno; + } + FCHDIR_NOFAIL(lo->root.fd); + close(path_fd); + return err; +} + +static int do_mknod_symlink(fuse_req_t req, struct lo_inode *dir, + const char *name, mode_t mode, dev_t rdev, + const char *link) +{ + int err, fscreate_fd = -1; + const char *secctx_name = req->secctx.name; + struct lo_cred old = {}; + struct lo_data *lo = lo_data(req); + char *mapped_name = NULL; + bool secctx_enabled = req->secctx.ctxlen; + bool do_fscreate = false; + + if (secctx_enabled && lo->xattrmap) { + err = xattr_map_client(lo, req->secctx.name, &mapped_name); + if (err < 0) { + return -err; + } + secctx_name = mapped_name; + } + + /* + * If security xattr has not been remapped and selinux is enabled on + * host, set fscreate and no need to do a setxattr() after file creation + */ + if (secctx_enabled && !mapped_name && lo->use_fscreate) { + do_fscreate = true; + err = open_set_proc_fscreate(lo, req->secctx.ctx, req->secctx.ctxlen, + &fscreate_fd); + if (err) { + goto out; + } + } + + err = lo_change_cred(req, &old, lo->change_umask && !S_ISLNK(mode)); + if (err) { + goto out; + } + + err = mknod_wrapper(dir->fd, name, link, mode, rdev); + err = err == -1 ? errno : 0; + lo_restore_cred(&old, lo->change_umask && !S_ISLNK(mode)); + if (err) { + goto out; + } + + if (!do_fscreate) { + err = do_mknod_symlink_secctx(req, dir, name, secctx_name); + if (err) { + unlinkat(dir->fd, name, S_ISDIR(mode) ? AT_REMOVEDIR : 0); + } + } +out: + if (fscreate_fd != -1) { + close_reset_proc_fscreate(fscreate_fd); + } + g_free(mapped_name); + return err; +} + static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, const char *name, mode_t mode, dev_t rdev, const char *link) { - int res; int saverr; struct lo_data *lo = lo_data(req); struct lo_inode *dir; struct fuse_entry_param e; - struct lo_cred old = {}; if (is_empty(name)) { fuse_reply_err(req, ENOENT); @@ -1311,21 +1485,11 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, return; } - saverr = lo_change_cred(req, &old, lo->change_umask && !S_ISLNK(mode)); + saverr = do_mknod_symlink(req, dir, name, mode, rdev, link); if (saverr) { goto out; } - res = mknod_wrapper(dir->fd, name, link, mode, rdev); - - saverr = errno; - - lo_restore_cred(&old, lo->change_umask && !S_ISLNK(mode)); - - if (res == -1) { - goto out; - } - saverr = lo_do_lookup(req, parent, name, &e, NULL); if (saverr) { goto out; @@ -2001,6 +2165,190 @@ static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, return 0; } +static int do_create_nosecctx(fuse_req_t req, struct lo_inode *parent_inode, + const char *name, mode_t mode, + struct fuse_file_info *fi, int *open_fd, + bool tmpfile) +{ + int err, fd; + struct lo_cred old = {}; + struct lo_data *lo = lo_data(req); + int flags; + + if (tmpfile) { + flags = fi->flags | O_TMPFILE; + /* + * Don't use O_EXCL as we want to link file later. Also reset O_CREAT + * otherwise openat() returns -EINVAL. + */ + flags &= ~(O_CREAT | O_EXCL); + + /* O_TMPFILE needs either O_RDWR or O_WRONLY */ + if ((flags & O_ACCMODE) == O_RDONLY) { + flags |= O_RDWR; + } + } else { + flags = fi->flags | O_CREAT | O_EXCL; + } + + err = lo_change_cred(req, &old, lo->change_umask); + if (err) { + return err; + } + + /* Try to create a new file but don't open existing files */ + fd = openat(parent_inode->fd, name, flags, mode); + err = fd == -1 ? errno : 0; + lo_restore_cred(&old, lo->change_umask); + if (!err) { + *open_fd = fd; + } + return err; +} + +static int do_create_secctx_fscreate(fuse_req_t req, + struct lo_inode *parent_inode, + const char *name, mode_t mode, + struct fuse_file_info *fi, int *open_fd) +{ + int err = 0, fd = -1, fscreate_fd = -1; + struct lo_data *lo = lo_data(req); + + err = open_set_proc_fscreate(lo, req->secctx.ctx, req->secctx.ctxlen, + &fscreate_fd); + if (err) { + return err; + } + + err = do_create_nosecctx(req, parent_inode, name, mode, fi, &fd, false); + + close_reset_proc_fscreate(fscreate_fd); + if (!err) { + *open_fd = fd; + } + return err; +} + +static int do_create_secctx_tmpfile(fuse_req_t req, + struct lo_inode *parent_inode, + const char *name, mode_t mode, + struct fuse_file_info *fi, + const char *secctx_name, int *open_fd) +{ + int err, fd = -1; + struct lo_data *lo = lo_data(req); + char procname[64]; + + err = do_create_nosecctx(req, parent_inode, ".", mode, fi, &fd, true); + if (err) { + return err; + } + + err = fsetxattr(fd, secctx_name, req->secctx.ctx, req->secctx.ctxlen, 0); + if (err) { + err = errno; + goto out; + } + + /* Security context set on file. Link it in place */ + sprintf(procname, "%d", fd); + FCHDIR_NOFAIL(lo->proc_self_fd); + err = linkat(AT_FDCWD, procname, parent_inode->fd, name, + AT_SYMLINK_FOLLOW); + err = err == -1 ? errno : 0; + FCHDIR_NOFAIL(lo->root.fd); + +out: + if (!err) { + *open_fd = fd; + } else if (fd != -1) { + close(fd); + } + return err; +} + +static int do_create_secctx_noatomic(fuse_req_t req, + struct lo_inode *parent_inode, + const char *name, mode_t mode, + struct fuse_file_info *fi, + const char *secctx_name, int *open_fd) +{ + int err = 0, fd = -1; + + err = do_create_nosecctx(req, parent_inode, name, mode, fi, &fd, false); + if (err) { + goto out; + } + + /* Set security context. This is not atomic w.r.t file creation */ + err = fsetxattr(fd, secctx_name, req->secctx.ctx, req->secctx.ctxlen, 0); + err = err == -1 ? errno : 0; +out: + if (!err) { + *open_fd = fd; + } else { + if (fd != -1) { + close(fd); + unlinkat(parent_inode->fd, name, 0); + } + } + return err; +} + +static int do_lo_create(fuse_req_t req, struct lo_inode *parent_inode, + const char *name, mode_t mode, + struct fuse_file_info *fi, int *open_fd) +{ + struct lo_data *lo = lo_data(req); + char *mapped_name = NULL; + int err; + const char *ctxname = req->secctx.name; + bool secctx_enabled = req->secctx.ctxlen; + + if (secctx_enabled && lo->xattrmap) { + err = xattr_map_client(lo, req->secctx.name, &mapped_name); + if (err < 0) { + return -err; + } + + ctxname = mapped_name; + } + + if (secctx_enabled) { + /* + * If security.selinux has not been remapped and selinux is enabled, + * use fscreate to set context before file creation. If not, use + * tmpfile method for regular files. Otherwise fallback to + * non-atomic method of file creation and xattr settting. + */ + if (!mapped_name && lo->use_fscreate) { + err = do_create_secctx_fscreate(req, parent_inode, name, mode, fi, + open_fd); + goto out; + } else if (S_ISREG(mode)) { + err = do_create_secctx_tmpfile(req, parent_inode, name, mode, fi, + ctxname, open_fd); + /* + * If filesystem does not support O_TMPFILE, fallback to non-atomic + * method. + */ + if (!err || err != EOPNOTSUPP) { + goto out; + } + } + + err = do_create_secctx_noatomic(req, parent_inode, name, mode, fi, + ctxname, open_fd); + } else { + err = do_create_nosecctx(req, parent_inode, name, mode, fi, open_fd, + false); + } + +out: + g_free(mapped_name); + return err; +} + static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, mode_t mode, struct fuse_file_info *fi) { @@ -2010,7 +2358,6 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, struct lo_inode *inode = NULL; struct fuse_entry_param e; int err; - struct lo_cred old = {}; fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)" " kill_priv=%d\n", parent, name, fi->kill_priv); @@ -2026,18 +2373,9 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, return; } - err = lo_change_cred(req, &old, lo->change_umask); - if (err) { - goto out; - } - update_open_flags(lo->writeback, lo->allow_direct_io, fi); - /* Try to create a new file but don't open existing files */ - fd = openat(parent_inode->fd, name, fi->flags | O_CREAT | O_EXCL, mode); - err = fd == -1 ? errno : 0; - - lo_restore_cred(&old, lo->change_umask); + err = do_lo_create(req, parent_inode, name, mode, fi, &fd); /* Ignore the error if file exists and O_EXCL was not given */ if (err && (err != EEXIST || (fi->flags & O_EXCL))) { @@ -2467,6 +2805,15 @@ static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, int res; (void)ino; + if (!(op & LOCK_NB)) { + /* + * Blocking flock can deadlock as there is only one thread + * serving the queue. + */ + fuse_reply_err(req, EOPNOTSUPP); + return; + } + res = flock(lo_fi_fd(req, fi), op); fuse_reply_err(req, res == -1 ? errno : 0); @@ -2842,11 +3189,6 @@ static int xattr_map_server(const struct lo_data *lo, const char *server_name, return -ENODATA; } -#define FCHDIR_NOFAIL(fd) do { \ - int fchdir_res = fchdir(fd); \ - assert(fchdir_res == 0); \ - } while (0) - static bool block_xattr(struct lo_data *lo, const char *name) { /* @@ -3357,6 +3699,49 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, } } +static int lo_do_syncfs(struct lo_data *lo, struct lo_inode *inode) +{ + int fd, ret = 0; + + fuse_log(FUSE_LOG_DEBUG, "lo_do_syncfs(ino=%" PRIu64 ")\n", + inode->fuse_ino); + + fd = lo_inode_open(lo, inode, O_RDONLY); + if (fd < 0) { + return -fd; + } + + if (syncfs(fd) < 0) { + ret = errno; + } + + close(fd); + return ret; +} + +static void lo_syncfs(fuse_req_t req, fuse_ino_t ino) +{ + struct lo_data *lo = lo_data(req); + struct lo_inode *inode = lo_inode(req, ino); + int err; + + if (!inode) { + fuse_reply_err(req, EBADF); + return; + } + + err = lo_do_syncfs(lo, inode); + lo_inode_put(lo, &inode); + + /* + * If submounts aren't announced, the client only sends a request to + * sync the root inode. TODO: Track submounts internally and iterate + * over them as well. + */ + + fuse_reply_err(req, err); +} + static void lo_destroy(void *userdata) { struct lo_data *lo = (struct lo_data *)userdata; @@ -3417,6 +3802,7 @@ static struct fuse_lowlevel_ops lo_oper = { .copy_file_range = lo_copy_file_range, #endif .lseek = lo_lseek, + .syncfs = lo_syncfs, .destroy = lo_destroy, }; @@ -3508,6 +3894,15 @@ static void setup_namespaces(struct lo_data *lo, struct fuse_session *se) exit(1); } + /* Get the /proc/self/task descriptor */ + lo->proc_self_task = open("/proc/self/task/", O_PATH); + if (lo->proc_self_task == -1) { + fuse_log(FUSE_LOG_ERR, "open(/proc/self/task, O_PATH): %m\n"); + exit(1); + } + + lo->use_fscreate = is_fscreate_usable(lo); + /* * We only need /proc/self/fd. Prevent ".." from accessing parent * directories of /proc/self/fd by bind-mounting it over /proc. Since / was @@ -3724,6 +4119,14 @@ static void setup_chroot(struct lo_data *lo) exit(1); } + lo->proc_self_task = open("/proc/self/task", O_PATH); + if (lo->proc_self_fd == -1) { + fuse_log(FUSE_LOG_ERR, "open(\"/proc/self/task\", O_PATH): %m\n"); + exit(1); + } + + lo->use_fscreate = is_fscreate_usable(lo); + /* * Make the shared directory the file system root so that FUSE_OPEN * (lo_open()) cannot escape the shared directory by opening a symlink. @@ -3909,6 +4312,10 @@ static void fuse_lo_data_cleanup(struct lo_data *lo) close(lo->proc_self_fd); } + if (lo->proc_self_task >= 0) { + close(lo->proc_self_task); + } + if (lo->root.fd >= 0) { close(lo->root.fd); } @@ -3936,8 +4343,10 @@ int main(int argc, char *argv[]) .posix_lock = 0, .allow_direct_io = 0, .proc_self_fd = -1, + .proc_self_task = -1, .user_killpriv_v2 = -1, .user_posix_acl = -1, + .user_security_label = -1, }; struct lo_map_elem *root_elem; struct lo_map_elem *reserve_elem; |