32 files changed, 13886 insertions, 14 deletions
diff --git a/.gitignore b/.gitignore
index efad605e1a..bc0a035f9c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,7 @@
 /config-target.*
 /config.status
 /config-temp
+/tools/virtiofsd/50-qemu-virtiofsd.json
 /elf2dmp
 /trace-events-all
 /trace/generated-events.h
diff --git a/MAINTAINERS b/MAINTAINERS
index d9798c20c0..f6511d5120 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1595,6 +1595,14 @@ T: git https://github.com/cohuck/qemu.git s390-next
 T: git https://github.com/borntraeger/qemu.git s390-next
 L: qemu-s390x@nongnu.org
 
+virtiofs
+M: Dr. David Alan Gilbert <dgilbert@redhat.com>
+M: Stefan Hajnoczi <stefanha@redhat.com>
+S: Supported
+F: tools/virtiofsd/*
+F: hw/virtio/vhost-user-fs*
+F: include/hw/virtio/vhost-user-fs.h
+
 virtio-input
 M: Gerd Hoffmann <kraxel@redhat.com>
 S: Maintained
diff --git a/Makefile b/Makefile
index 9473509112..04c77d3b96 100644
--- a/Makefile
+++ b/Makefile
@@ -327,6 +327,11 @@ HELPERS-y += vhost-user-gpu$(EXESUF)
 vhost-user-json-y += contrib/vhost-user-gpu/50-qemu-gpu.json
 endif
 
+ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP)$(CONFIG_LIBCAP_NG),yyy)
+HELPERS-y += virtiofsd$(EXESUF)
+vhost-user-json-y += tools/virtiofsd/50-qemu-virtiofsd.json
+endif
+
 # Sphinx does not allow building manuals into the same directory as
 # the source files, so if we're doing an in-tree QEMU build we must
 # build the manuals into a subdirectory (and then install them from
@@ -431,6 +436,7 @@ dummy := $(call unnest-vars,, \
                 elf2dmp-obj-y \
                 ivshmem-client-obj-y \
                 ivshmem-server-obj-y \
+                virtiofsd-obj-y \
                 rdmacm-mux-obj-y \
                 libvhost-user-obj-y \
                 vhost-user-scsi-obj-y \
@@ -670,6 +676,12 @@ rdmacm-mux$(EXESUF): LIBS += "-libumad"
 rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS)
 	$(call LINK, $^)
 
+# relies on Linux-specific syscalls
+ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP)$(CONFIG_LIBCAP_NG),yyy)
+virtiofsd$(EXESUF): $(virtiofsd-obj-y) libvhost-user.a $(COMMON_LDADDS)
+	$(call LINK, $^)
+endif
+
 vhost-user-gpu$(EXESUF): $(vhost-user-gpu-obj-y) $(libvhost-user-obj-y) libqemuutil.a libqemustub.a
 	$(call LINK, $^)
 
diff --git a/Makefile.objs b/Makefile.objs
index 7c1e50f9d6..ff396b9209 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -123,6 +123,7 @@ vhost-user-blk-obj-y = contrib/vhost-user-blk/
 rdmacm-mux-obj-y = contrib/rdmacm-mux/
 vhost-user-input-obj-y = contrib/vhost-user-input/
 vhost-user-gpu-obj-y = contrib/vhost-user-gpu/
+virtiofsd-obj-y = tools/virtiofsd/
 
 ######################################################################
 trace-events-subdirs =
diff --git a/configure b/configure
index d91eab4d65..6ab028dd0d 100755
--- a/configure
+++ b/configure
@@ -5197,6 +5197,19 @@ if compile_prog "" "" ; then
     strchrnul=yes
 fi
 
+#########################################
+# check if we have st_atim
+
+st_atim=no
+cat > $TMPC << EOF
+#include <sys/stat.h>
+#include <stddef.h>
+int main(void) { return offsetof(struct stat, st_atim); }
+EOF
+if compile_prog "" "" ; then
+    st_atim=yes
+fi
+
 ##########################################
 # check if trace backend exists
 
@@ -6895,6 +6908,9 @@ fi
 if test "$strchrnul" = "yes" ; then
   echo "HAVE_STRCHRNUL=y" >> $config_host_mak
 fi
+if test "$st_atim" = "yes" ; then
+  echo "HAVE_STRUCT_STAT_ST_ATIM=y" >> $config_host_mak
+fi
 if test "$byteswap_h" = "yes" ; then
   echo "CONFIG_BYTESWAP_H=y" >> $config_host_mak
 fi
diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
index ec27b78ff1..b89bf18501 100644
--- a/contrib/libvhost-user/libvhost-user.c
+++ b/contrib/libvhost-user/libvhost-user.c
@@ -392,26 +392,37 @@ vu_send_reply(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
     return vu_message_write(dev, conn_fd, vmsg);
 }
 
+/*
+ * Processes a reply on the slave channel.
+ * Entered with slave_mutex held and releases it before exit.
+ * Returns true on success.
+ */
 static bool
 vu_process_message_reply(VuDev *dev, const VhostUserMsg *vmsg)
 {
     VhostUserMsg msg_reply;
+    bool result = false;
 
     if ((vmsg->flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
-        return true;
+        result = true;
+        goto out;
     }
 
     if (!vu_message_read(dev, dev->slave_fd, &msg_reply)) {
-        return false;
+        goto out;
     }
 
     if (msg_reply.request != vmsg->request) {
         DPRINT("Received unexpected msg type. Expected %d received %d",
                vmsg->request, msg_reply.request);
-        return false;
+        goto out;
     }
 
-    return msg_reply.payload.u64 == 0;
+    result = msg_reply.payload.u64 == 0;
+
+out:
+    pthread_mutex_unlock(&dev->slave_mutex);
+    return result;
 }
 
 /* Kick the log_call_fd if required. */
@@ -554,6 +565,21 @@ vu_reset_device_exec(VuDev *dev, VhostUserMsg *vmsg)
 }
 
 static bool
+map_ring(VuDev *dev, VuVirtq *vq)
+{
+    vq->vring.desc = qva_to_va(dev, vq->vra.desc_user_addr);
+    vq->vring.used = qva_to_va(dev, vq->vra.used_user_addr);
+    vq->vring.avail = qva_to_va(dev, vq->vra.avail_user_addr);
+
+    DPRINT("Setting virtq addresses:\n");
+    DPRINT("    vring_desc  at %p\n", vq->vring.desc);
+    DPRINT("    vring_used  at %p\n", vq->vring.used);
+    DPRINT("    vring_avail at %p\n", vq->vring.avail);
+
+    return !(vq->vring.desc && vq->vring.used && vq->vring.avail);
+}
+
+static bool
 vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg *vmsg)
 {
     int i;
@@ -756,6 +782,14 @@ vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg)
         close(vmsg->fds[i]);
     }
 
+    for (i = 0; i < dev->max_queues; i++) {
+        if (dev->vq[i].vring.desc) {
+            if (map_ring(dev, &dev->vq[i])) {
+                vu_panic(dev, "remaping queue %d during setmemtable", i);
+            }
+        }
+    }
+
     return false;
 }
 
@@ -842,18 +876,12 @@ vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg)
     DPRINT("    avail_user_addr:  0x%016" PRIx64 "\n", vra->avail_user_addr);
     DPRINT("    log_guest_addr:   0x%016" PRIx64 "\n", vra->log_guest_addr);
 
+    vq->vra = *vra;
     vq->vring.flags = vra->flags;
-    vq->vring.desc = qva_to_va(dev, vra->desc_user_addr);
-    vq->vring.used = qva_to_va(dev, vra->used_user_addr);
-    vq->vring.avail = qva_to_va(dev, vra->avail_user_addr);
     vq->vring.log_guest_addr = vra->log_guest_addr;
 
-    DPRINT("Setting virtq addresses:\n");
-    DPRINT("    vring_desc  at %p\n", vq->vring.desc);
-    DPRINT("    vring_used  at %p\n", vq->vring.used);
-    DPRINT("    vring_avail at %p\n", vq->vring.avail);
 
-    if (!(vq->vring.desc && vq->vring.used && vq->vring.avail)) {
+    if (map_ring(dev, vq)) {
         vu_panic(dev, "Invalid vring_addr message");
         return false;
     }
@@ -1105,10 +1133,13 @@ bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd,
         return false;
     }
 
+    pthread_mutex_lock(&dev->slave_mutex);
     if (!vu_message_write(dev, dev->slave_fd, &vmsg)) {
+        pthread_mutex_unlock(&dev->slave_mutex);
         return false;
     }
 
+    /* Also unlocks the slave_mutex */
     return vu_process_message_reply(dev, &vmsg);
 }
 
@@ -1628,6 +1659,7 @@ vu_deinit(VuDev *dev)
         close(dev->slave_fd);
         dev->slave_fd = -1;
     }
+    pthread_mutex_destroy(&dev->slave_mutex);
 
     if (dev->sock != -1) {
         close(dev->sock);
@@ -1663,6 +1695,7 @@ vu_init(VuDev *dev,
     dev->remove_watch = remove_watch;
     dev->iface = iface;
     dev->log_call_fd = -1;
+    pthread_mutex_init(&dev->slave_mutex, NULL);
     dev->slave_fd = -1;
     dev->max_queues = max_queues;
 
diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h
index 46b600799b..5cb7708559 100644
--- a/contrib/libvhost-user/libvhost-user.h
+++ b/contrib/libvhost-user/libvhost-user.h
@@ -19,6 +19,7 @@
 #include <stddef.h>
 #include <sys/poll.h>
 #include <linux/vhost.h>
+#include <pthread.h>
 #include "standard-headers/linux/virtio_ring.h"
 
 /* Based on qemu/hw/virtio/vhost-user.c */
@@ -326,6 +327,9 @@ typedef struct VuVirtq {
     int err_fd;
     unsigned int enable;
     bool started;
+
+    /* Guest addresses of our ring */
+    struct vhost_vring_addr vra;
 } VuVirtq;
 
 enum VuWatchCondtion {
@@ -355,6 +359,8 @@ struct VuDev {
     VuVirtq *vq;
     VuDevInflightInfo inflight_info;
     int log_call_fd;
+    /* Must be held while using slave_fd */
+    pthread_mutex_t slave_mutex;
     int slave_fd;
     uint64_t log_size;
     uint8_t *log_table;
diff --git a/docs/interop/vhost-user.json b/docs/interop/vhost-user.json
index ce0ef74db5..ef8ac5941f 100644
--- a/docs/interop/vhost-user.json
+++ b/docs/interop/vhost-user.json
@@ -31,6 +31,7 @@
 # @rproc-serial: virtio remoteproc serial link
 # @scsi: virtio scsi
 # @vsock: virtio vsock transport
+# @fs: virtio fs (since 4.2)
 #
 # Since: 4.0
 ##
@@ -50,7 +51,8 @@
       'rpmsg',
       'rproc-serial',
       'scsi',
-      'vsock'
+      'vsock',
+      'fs'
   ]
 }
 
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index d27a10fcc6..2e81f5514f 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -1061,7 +1061,7 @@ static void slave_read(void *opaque)
                                                           fd[0]);
         break;
     default:
-        error_report("Received unexpected msg type.");
+        error_report("Received unexpected msg type: %d.", hdr.request);
         ret = -EINVAL;
     }
 
diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h
new file mode 100644
index 0000000000..f4df0a40f6
--- /dev/null
+++ b/include/standard-headers/linux/fuse.h
@@ -0,0 +1,891 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/*
+    This file defines the kernel interface of FUSE
+    Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
+
+    This program can be distributed under the terms of the GNU GPL.
+    See the file COPYING.
+
+    This -- and only this -- header file may also be distributed under
+    the terms of the BSD Licence as follows:
+
+    Copyright (C) 2001-2007 Miklos Szeredi. All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions
+    are met:
+    1. Redistributions of source code must retain the above copyright
+       notice, this list of conditions and the following disclaimer.
+    2. Redistributions in binary form must reproduce the above copyright
+       notice, this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+
+    THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+    ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+    ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+    OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+    OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+    SUCH DAMAGE.
+*/
+
+/*
+ * This file defines the kernel interface of FUSE
+ *
+ * Protocol changelog:
+ *
+ * 7.1:
+ *  - add the following messages:
+ *      FUSE_SETATTR, FUSE_SYMLINK, FUSE_MKNOD, FUSE_MKDIR, FUSE_UNLINK,
+ *      FUSE_RMDIR, FUSE_RENAME, FUSE_LINK, FUSE_OPEN, FUSE_READ, FUSE_WRITE,
+ *      FUSE_RELEASE, FUSE_FSYNC, FUSE_FLUSH, FUSE_SETXATTR, FUSE_GETXATTR,
+ *      FUSE_LISTXATTR, FUSE_REMOVEXATTR, FUSE_OPENDIR, FUSE_READDIR,
+ *      FUSE_RELEASEDIR
+ *  - add padding to messages to accommodate 32-bit servers on 64-bit kernels
+ *
+ * 7.2:
+ *  - add FOPEN_DIRECT_IO and FOPEN_KEEP_CACHE flags
+ *  - add FUSE_FSYNCDIR message
+ *
+ * 7.3:
+ *  - add FUSE_ACCESS message
+ *  - add FUSE_CREATE message
+ *  - add filehandle to fuse_setattr_in
+ *
+ * 7.4:
+ *  - add frsize to fuse_kstatfs
+ *  - clean up request size limit checking
+ *
+ * 7.5:
+ *  - add flags and max_write to fuse_init_out
+ *
+ * 7.6:
+ *  - add max_readahead to fuse_init_in and fuse_init_out
+ *
+ * 7.7:
+ *  - add FUSE_INTERRUPT message
+ *  - add POSIX file lock support
+ *
+ * 7.8:
+ *  - add lock_owner and flags fields to fuse_release_in
+ *  - add FUSE_BMAP message
+ *  - add FUSE_DESTROY message
+ *
+ * 7.9:
+ *  - new fuse_getattr_in input argument of GETATTR
+ *  - add lk_flags in fuse_lk_in
+ *  - add lock_owner field to fuse_setattr_in, fuse_read_in and fuse_write_in
+ *  - add blksize field to fuse_attr
+ *  - add file flags field to fuse_read_in and fuse_write_in
+ *  - Add ATIME_NOW and MTIME_NOW flags to fuse_setattr_in
+ *
+ * 7.10
+ *  - add nonseekable open flag
+ *
+ * 7.11
+ *  - add IOCTL message
+ *  - add unsolicited notification support
+ *  - add POLL message and NOTIFY_POLL notification
+ *
+ * 7.12
+ *  - add umask flag to input argument of create, mknod and mkdir
+ *  - add notification messages for invalidation of inodes and
+ *    directory entries
+ *
+ * 7.13
+ *  - make max number of background requests and congestion threshold
+ *    tunables
+ *
+ * 7.14
+ *  - add splice support to fuse device
+ *
+ * 7.15
+ *  - add store notify
+ *  - add retrieve notify
+ *
+ * 7.16
+ *  - add BATCH_FORGET request
+ *  - FUSE_IOCTL_UNRESTRICTED shall now return with array of 'struct
+ *    fuse_ioctl_iovec' instead of ambiguous 'struct iovec'
+ *  - add FUSE_IOCTL_32BIT flag
+ *
+ * 7.17
+ *  - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK
+ *
+ * 7.18
+ *  - add FUSE_IOCTL_DIR flag
+ *  - add FUSE_NOTIFY_DELETE
+ *
+ * 7.19
+ *  - add FUSE_FALLOCATE
+ *
+ * 7.20
+ *  - add FUSE_AUTO_INVAL_DATA
+ *
+ * 7.21
+ *  - add FUSE_READDIRPLUS
+ *  - send the requested events in POLL request
+ *
+ * 7.22
+ *  - add FUSE_ASYNC_DIO
+ *
+ * 7.23
+ *  - add FUSE_WRITEBACK_CACHE
+ *  - add time_gran to fuse_init_out
+ *  - add reserved space to fuse_init_out
+ *  - add FATTR_CTIME
+ *  - add ctime and ctimensec to fuse_setattr_in
+ *  - add FUSE_RENAME2 request
+ *  - add FUSE_NO_OPEN_SUPPORT flag
+ *
+ *  7.24
+ *  - add FUSE_LSEEK for SEEK_HOLE and SEEK_DATA support
+ *
+ *  7.25
+ *  - add FUSE_PARALLEL_DIROPS
+ *
+ *  7.26
+ *  - add FUSE_HANDLE_KILLPRIV
+ *  - add FUSE_POSIX_ACL
+ *
+ *  7.27
+ *  - add FUSE_ABORT_ERROR
+ *
+ *  7.28
+ *  - add FUSE_COPY_FILE_RANGE
+ *  - add FOPEN_CACHE_DIR
+ *  - add FUSE_MAX_PAGES, add max_pages to init_out
+ *  - add FUSE_CACHE_SYMLINKS
+ *
+ *  7.29
+ *  - add FUSE_NO_OPENDIR_SUPPORT flag
+ *
+ *  7.30
+ *  - add FUSE_EXPLICIT_INVAL_DATA
+ *  - add FUSE_IOCTL_COMPAT_X32
+ *
+ *  7.31
+ *  - add FUSE_WRITE_KILL_PRIV flag
+ *  - add FUSE_SETUPMAPPING and FUSE_REMOVEMAPPING
+ *  - add map_alignment to fuse_init_out, add FUSE_MAP_ALIGNMENT flag
+ */
+
+#ifndef _LINUX_FUSE_H
+#define _LINUX_FUSE_H
+
+#include <stdint.h>
+
+/*
+ * Version negotiation:
+ *
+ * Both the kernel and userspace send the version they support in the
+ * INIT request and reply respectively.
+ *
+ * If the major versions match then both shall use the smallest
+ * of the two minor versions for communication.
+ *
+ * If the kernel supports a larger major version, then userspace shall
+ * reply with the major version it supports, ignore the rest of the
+ * INIT message and expect a new INIT message from the kernel with a
+ * matching major version.
+ *
+ * If the library supports a larger major version, then it shall fall
+ * back to the major protocol version sent by the kernel for
+ * communication and reply with that major version (and an arbitrary
+ * supported minor version).
+ */
+
+/** Version number of this interface */
+#define FUSE_KERNEL_VERSION 7
+
+/** Minor version number of this interface */
+#define FUSE_KERNEL_MINOR_VERSION 31
+
+/** The node ID of the root inode */
+#define FUSE_ROOT_ID 1
+
+/* Make sure all structures are padded to 64bit boundary, so 32bit
+   userspace works under 64bit kernels */
+
+struct fuse_attr {
+	uint64_t	ino;
+	uint64_t	size;
+	uint64_t	blocks;
+	uint64_t	atime;
+	uint64_t	mtime;
+	uint64_t	ctime;
+	uint32_t	atimensec;
+	uint32_t	mtimensec;
+	uint32_t	ctimensec;
+	uint32_t	mode;
+	uint32_t	nlink;
+	uint32_t	uid;
+	uint32_t	gid;
+	uint32_t	rdev;
+	uint32_t	blksize;
+	uint32_t	padding;
+};
+
+struct fuse_kstatfs {
+	uint64_t	blocks;
+	uint64_t	bfree;
+	uint64_t	bavail;
+	uint64_t	files;
+	uint64_t	ffree;
+	uint32_t	bsize;
+	uint32_t	namelen;
+	uint32_t	frsize;
+	uint32_t	padding;
+	uint32_t	spare[6];
+};
+
+struct fuse_file_lock {
+	uint64_t	start;
+	uint64_t	end;
+	uint32_t	type;
+	uint32_t	pid; /* tgid */
+};
+
+/**
+ * Bitmasks for fuse_setattr_in.valid
+ */
+#define FATTR_MODE	(1 << 0)
+#define FATTR_UID	(1 << 1)
+#define FATTR_GID	(1 << 2)
+#define FATTR_SIZE	(1 << 3)
+#define FATTR_ATIME	(1 << 4)
+#define FATTR_MTIME	(1 << 5)
+#define FATTR_FH	(1 << 6)
+#define FATTR_ATIME_NOW	(1 << 7)
+#define FATTR_MTIME_NOW	(1 << 8)
+#define FATTR_LOCKOWNER	(1 << 9)
+#define FATTR_CTIME	(1 << 10)
+
+/**
+ * Flags returned by the OPEN request
+ *
+ * FOPEN_DIRECT_IO: bypass page cache for this open file
+ * FOPEN_KEEP_CACHE: don't invalidate the data cache on open
+ * FOPEN_NONSEEKABLE: the file is not seekable
+ * FOPEN_CACHE_DIR: allow caching this directory
+ * FOPEN_STREAM: the file is stream-like (no file position at all)
+ */
+#define FOPEN_DIRECT_IO		(1 << 0)
+#define FOPEN_KEEP_CACHE	(1 << 1)
+#define FOPEN_NONSEEKABLE	(1 << 2)
+#define FOPEN_CACHE_DIR		(1 << 3)
+#define FOPEN_STREAM		(1 << 4)
+
+/**
+ * INIT request/reply flags
+ *
+ * FUSE_ASYNC_READ: asynchronous read requests
+ * FUSE_POSIX_LOCKS: remote locking for POSIX file locks
+ * FUSE_FILE_OPS: kernel sends file handle for fstat, etc... (not yet supported)
+ * FUSE_ATOMIC_O_TRUNC: handles the O_TRUNC open flag in the filesystem
+ * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".."
+ * FUSE_BIG_WRITES: filesystem can handle write size larger than 4kB
+ * FUSE_DONT_MASK: don't apply umask to file mode on create operations
+ * FUSE_SPLICE_WRITE: kernel supports splice write on the device
+ * FUSE_SPLICE_MOVE: kernel supports splice move on the device
+ * FUSE_SPLICE_READ: kernel supports splice read on the device
+ * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks
+ * FUSE_HAS_IOCTL_DIR: kernel supports ioctl on directories
+ * FUSE_AUTO_INVAL_DATA: automatically invalidate cached pages
+ * FUSE_DO_READDIRPLUS: do READDIRPLUS (READDIR+LOOKUP in one)
+ * FUSE_READDIRPLUS_AUTO: adaptive readdirplus
+ * FUSE_ASYNC_DIO: asynchronous direct I/O submission
+ * FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes
+ * FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens
+ * FUSE_PARALLEL_DIROPS: allow parallel lookups and readdir
+ * FUSE_HANDLE_KILLPRIV: fs handles killing suid/sgid/cap on write/chown/trunc
+ * FUSE_POSIX_ACL: filesystem supports posix acls
+ * FUSE_ABORT_ERROR: reading the device after abort returns ECONNABORTED
+ * FUSE_MAX_PAGES: init_out.max_pages contains the max number of req pages
+ * FUSE_CACHE_SYMLINKS: cache READLINK responses
+ * FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir
+ * FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request
+ * FUSE_MAP_ALIGNMENT: map_alignment field is valid
+ */
+#define FUSE_ASYNC_READ		(1 << 0)
+#define FUSE_POSIX_LOCKS	(1 << 1)
+#define FUSE_FILE_OPS		(1 << 2)
+#define FUSE_ATOMIC_O_TRUNC	(1 << 3)
+#define FUSE_EXPORT_SUPPORT	(1 << 4)
+#define FUSE_BIG_WRITES		(1 << 5)
+#define FUSE_DONT_MASK		(1 << 6)
+#define FUSE_SPLICE_WRITE	(1 << 7)
+#define FUSE_SPLICE_MOVE	(1 << 8)
+#define FUSE_SPLICE_READ	(1 << 9)
+#define FUSE_FLOCK_LOCKS	(1 << 10)
+#define FUSE_HAS_IOCTL_DIR	(1 << 11)
+#define FUSE_AUTO_INVAL_DATA	(1 << 12)
+#define FUSE_DO_READDIRPLUS	(1 << 13)
+#define FUSE_READDIRPLUS_AUTO	(1 << 14)
+#define FUSE_ASYNC_DIO		(1 << 15)
+#define FUSE_WRITEBACK_CACHE	(1 << 16)
+#define FUSE_NO_OPEN_SUPPORT	(1 << 17)
+#define FUSE_PARALLEL_DIROPS    (1 << 18)
+#define FUSE_HANDLE_KILLPRIV	(1 << 19)
+#define FUSE_POSIX_ACL		(1 << 20)
+#define FUSE_ABORT_ERROR	(1 << 21)
+#define FUSE_MAX_PAGES		(1 << 22)
+#define FUSE_CACHE_SYMLINKS	(1 << 23)
+#define FUSE_NO_OPENDIR_SUPPORT (1 << 24)
+#define FUSE_EXPLICIT_INVAL_DATA (1 << 25)
+#define FUSE_MAP_ALIGNMENT	(1 << 26)
+
+/**
+ * CUSE INIT request/reply flags
+ *
+ * CUSE_UNRESTRICTED_IOCTL:  use unrestricted ioctl
+ */
+#define CUSE_UNRESTRICTED_IOCTL	(1 << 0)
+
+/**
+ * Release flags
+ */
+#define FUSE_RELEASE_FLUSH	(1 << 0)
+#define FUSE_RELEASE_FLOCK_UNLOCK	(1 << 1)
+
+/**
+ * Getattr flags
+ */
+#define FUSE_GETATTR_FH		(1 << 0)
+
+/**
+ * Lock flags
+ */
+#define FUSE_LK_FLOCK		(1 << 0)
+
+/**
+ * WRITE flags
+ *
+ * FUSE_WRITE_CACHE: delayed write from page cache, file handle is guessed
+ * FUSE_WRITE_LOCKOWNER: lock_owner field is valid
+ * FUSE_WRITE_KILL_PRIV: kill suid and sgid bits
+ */
+#define FUSE_WRITE_CACHE	(1 << 0)
+#define FUSE_WRITE_LOCKOWNER	(1 << 1)
+#define FUSE_WRITE_KILL_PRIV	(1 << 2)
+
+/**
+ * Read flags
+ */
+#define FUSE_READ_LOCKOWNER	(1 << 1)
+
+/**
+ * Ioctl flags
+ *
+ * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine
+ * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed
+ * FUSE_IOCTL_RETRY: retry with new iovecs
+ * FUSE_IOCTL_32BIT: 32bit ioctl
+ * FUSE_IOCTL_DIR: is a directory
+ * FUSE_IOCTL_COMPAT_X32: x32 compat ioctl on 64bit machine (64bit time_t)
+ *
+ * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs
+ */
+#define FUSE_IOCTL_COMPAT	(1 << 0)
+#define FUSE_IOCTL_UNRESTRICTED	(1 << 1)
+#define FUSE_IOCTL_RETRY	(1 << 2)
+#define FUSE_IOCTL_32BIT	(1 << 3)
+#define FUSE_IOCTL_DIR		(1 << 4)
+#define FUSE_IOCTL_COMPAT_X32	(1 << 5)
+
+#define FUSE_IOCTL_MAX_IOV	256
+
+/**
+ * Poll flags
+ *
+ * FUSE_POLL_SCHEDULE_NOTIFY: request poll notify
+ */
+#define FUSE_POLL_SCHEDULE_NOTIFY (1 << 0)
+
+/**
+ * Fsync flags
+ *
+ * FUSE_FSYNC_FDATASYNC: Sync data only, not metadata
+ */
+#define FUSE_FSYNC_FDATASYNC	(1 << 0)
+
+enum fuse_opcode {
+	FUSE_LOOKUP		= 1,
+	FUSE_FORGET		= 2,  /* no reply */
+	FUSE_GETATTR		= 3,
+	FUSE_SETATTR		= 4,
+	FUSE_READLINK		= 5,
+	FUSE_SYMLINK		= 6,
+	FUSE_MKNOD		= 8,
+	FUSE_MKDIR		= 9,
+	FUSE_UNLINK		= 10,
+	FUSE_RMDIR		= 11,
+	FUSE_RENAME		= 12,
+	FUSE_LINK		= 13,
+	FUSE_OPEN		= 14,
+	FUSE_READ		= 15,
+	FUSE_WRITE		= 16,
+	FUSE_STATFS		= 17,
+	FUSE_RELEASE		= 18,
+	FUSE_FSYNC		= 20,
+	FUSE_SETXATTR		= 21,
+	FUSE_GETXATTR		= 22,
+	FUSE_LISTXATTR		= 23,
+	FUSE_REMOVEXATTR	= 24,
+	FUSE_FLUSH		= 25,
+	FUSE_INIT		= 26,
+	FUSE_OPENDIR		= 27,
+	FUSE_READDIR		= 28,
+	FUSE_RELEASEDIR		= 29,
+	FUSE_FSYNCDIR		= 30,
+	FUSE_GETLK		= 31,
+	FUSE_SETLK		= 32,
+	FUSE_SETLKW		= 33,
+	FUSE_ACCESS		= 34,
+	FUSE_CREATE		= 35,
+	FUSE_INTERRUPT		= 36,
+	FUSE_BMAP		= 37,
+	FUSE_DESTROY		= 38,
+	FUSE_IOCTL		= 39,
+	FUSE_POLL		= 40,
+	FUSE_NOTIFY_REPLY	= 41,
+	FUSE_BATCH_FORGET	= 42,
+	FUSE_FALLOCATE		= 43,
+	FUSE_READDIRPLUS	= 44,
+	FUSE_RENAME2		= 45,
+	FUSE_LSEEK		= 46,
+	FUSE_COPY_FILE_RANGE	= 47,
+	FUSE_SETUPMAPPING	= 48,
+	FUSE_REMOVEMAPPING	= 49,
+
+	/* CUSE specific operations */
+	CUSE_INIT		= 4096,
+
+	/* Reserved opcodes: helpful to detect structure endian-ness */
+	CUSE_INIT_BSWAP_RESERVED	= 1048576,	/* CUSE_INIT << 8 */
+	FUSE_INIT_BSWAP_RESERVED	= 436207616,	/* FUSE_INIT << 24 */
+};
+
+enum fuse_notify_code {
+	FUSE_NOTIFY_POLL   = 1,
+	FUSE_NOTIFY_INVAL_INODE = 2,
+	FUSE_NOTIFY_INVAL_ENTRY = 3,
+	FUSE_NOTIFY_STORE = 4,
+	FUSE_NOTIFY_RETRIEVE = 5,
+	FUSE_NOTIFY_DELETE = 6,
+	FUSE_NOTIFY_CODE_MAX,
+};
+
+/* The read buffer is required to be at least 8k, but may be much larger */
+#define FUSE_MIN_READ_BUFFER 8192
+
+#define FUSE_COMPAT_ENTRY_OUT_SIZE 120
+
+struct fuse_entry_out {
+	uint64_t	nodeid;		/* Inode ID */
+	uint64_t	generation;	/* Inode generation: nodeid:gen must
+					   be unique for the fs's lifetime */
+	uint64_t	entry_valid;	/* Cache timeout for the name */
+	uint64_t	attr_valid;	/* Cache timeout for the attributes */
+	uint32_t	entry_valid_nsec;
+	uint32_t	attr_valid_nsec;
+	struct fuse_attr attr;
+};
+
+struct fuse_forget_in {
+	uint64_t	nlookup;
+};
+
+struct fuse_forget_one {
+	uint64_t	nodeid;
+	uint64_t	nlookup;
+};
+
+struct fuse_batch_forget_in {
+	uint32_t	count;
+	uint32_t	dummy;
+};
+
+struct fuse_getattr_in {
+	uint32_t	getattr_flags;
+	uint32_t	dummy;
+	uint64_t	fh;
+};
+
+#define FUSE_COMPAT_ATTR_OUT_SIZE 96
+
+struct fuse_attr_out {
+	uint64_t	attr_valid;	/* Cache timeout for the attributes */
+	uint32_t	attr_valid_nsec;
+	uint32_t	dummy;
+	struct fuse_attr attr;
+};
+
+#define FUSE_COMPAT_MKNOD_IN_SIZE 8
+
+struct fuse_mknod_in {
+	uint32_t	mode;
+	uint32_t	rdev;
+	uint32_t	umask;
+	uint32_t	padding;
+};
+
+struct fuse_mkdir_in {
+	uint32_t	mode;
+	uint32_t	umask;
+};
+
+struct fuse_rename_in {
+	uint64_t	newdir;
+};
+
+struct fuse_rename2_in {
+	uint64_t	newdir;
+	uint32_t	flags;
+	uint32_t	padding;
+};
+
+struct fuse_link_in {
+	uint64_t	oldnodeid;
+};
+
+struct fuse_setattr_in {
+	uint32_t	valid;
+	uint32_t	padding;
+	uint64_t	fh;
+	uint64_t	size;
+	uint64_t	lock_owner;
+	uint64_t	atime;
+	uint64_t	mtime;
+	uint64_t	ctime;
+	uint32_t	atimensec;
+	uint32_t	mtimensec;
+	uint32_t	ctimensec;
+	uint32_t	mode;
+	uint32_t	unused4;
+	uint32_t	uid;
+	uint32_t	gid;
+	uint32_t	unused5;
+};
+
+struct fuse_open_in {
+	uint32_t	flags;
+	uint32_t	unused;
+};
+
+struct fuse_create_in {
+	uint32_t	flags;
+	uint32_t	mode;
+	uint32_t	umask;
+	uint32_t	padding;
+};
+
+struct fuse_open_out {
+	uint64_t	fh;
+	uint32_t	open_flags;
+	uint32_t	padding;
+};
+
+struct fuse_release_in {
+	uint64_t	fh;
+	uint32_t	flags;
+	uint32_t	release_flags;
+	uint64_t	lock_owner;
+};
+
+struct fuse_flush_in {
+	uint64_t	fh;
+	uint32_t	unused;
+	uint32_t	padding;
+	uint64_t	lock_owner;
+};
+
+struct fuse_read_in {
+	uint64_t	fh;
+	uint64_t	offset;
+	uint32_t	size;
+	uint32_t	read_flags;
+	uint64_t	lock_owner;
+	uint32_t	flags;
+	uint32_t	padding;
+};
+
+#define FUSE_COMPAT_WRITE_IN_SIZE 24
+
+struct fuse_write_in {
+	uint64_t	fh;
+	uint64_t	offset;
+	uint32_t	size;
+	uint32_t	write_flags;
+	uint64_t	lock_owner;
+	uint32_t	flags;
+	uint32_t	padding;
+};
+
+struct fuse_write_out {
+	uint32_t	size;
+	uint32_t	padding;
+};
+
+#define FUSE_COMPAT_STATFS_SIZE 48
+
+struct fuse_statfs_out {
+	struct fuse_kstatfs st;
+};
+
+struct fuse_fsync_in {
+	uint64_t	fh;
+	uint32_t	fsync_flags;
+	uint32_t	padding;
+};
+
+struct fuse_setxattr_in {
+	uint32_t	size;
+	uint32_t	flags;
+};
+
+struct fuse_getxattr_in {
+	uint32_t	size;
+	uint32_t	padding;
+};
+
+struct fuse_getxattr_out {
+	uint32_t	size;
+	uint32_t	padding;
+};
+
+struct fuse_lk_in {
+	uint64_t	fh;
+	uint64_t	owner;
+	struct fuse_file_lock lk;
+	uint32_t	lk_flags;
+	uint32_t	padding;
+};
+
+struct fuse_lk_out {
+	struct fuse_file_lock lk;
+};
+
+struct fuse_access_in {
+	uint32_t	mask;
+	uint32_t	padding;
+};
+
+struct fuse_init_in {
+	uint32_t	major;
+	uint32_t	minor;
+	uint32_t	max_readahead;
+	uint32_t	flags;
+};
+
+#define FUSE_COMPAT_INIT_OUT_SIZE 8
+#define FUSE_COMPAT_22_INIT_OUT_SIZE 24
+
+struct fuse_init_out {
+	uint32_t	major;
+	uint32_t	minor;
+	uint32_t	max_readahead;
+	uint32_t	flags;
+	uint16_t	max_background;
+	uint16_t	congestion_threshold;
+	uint32_t	max_write;
+	uint32_t	time_gran;
+	uint16_t	max_pages;
+	uint16_t	map_alignment;
+	uint32_t	unused[8];
+};
+
+#define CUSE_INIT_INFO_MAX 4096
+
+struct cuse_init_in {
+	uint32_t	major;
+	uint32_t	minor;
+	uint32_t	unused;
+	uint32_t	flags;
+};
+
+struct cuse_init_out {
+	uint32_t	major;
+	uint32_t	minor;
+	uint32_t	unused;
+	uint32_t	flags;
+	uint32_t	max_read;
+	uint32_t	max_write;
+	uint32_t	dev_major;		/* chardev major */
+	uint32_t	dev_minor;		/* chardev minor */
+	uint32_t	spare[10];
+};
+
+struct fuse_interrupt_in {
+	uint64_t	unique;
+};
+
+struct fuse_bmap_in {
+	uint64_t	block;
+	uint32_t	blocksize;
+	uint32_t	padding;
+};
+
+struct fuse_bmap_out {
+	uint64_t	block;
+};
+
+struct fuse_ioctl_in {
+	uint64_t	fh;
+	uint32_t	flags;
+	uint32_t	cmd;
+	uint64_t	arg;
+	uint32_t	in_size;
+	uint32_t	out_size;
+};
+
+struct fuse_ioctl_iovec {
+	uint64_t	base;
+	uint64_t	len;
+};
+
+struct fuse_ioctl_out {
+	int32_t		result;
+	uint32_t	flags;
+	uint32_t	in_iovs;
+	uint32_t	out_iovs;
+};
+
+struct fuse_poll_in {
+	uint64_t	fh;
+	uint64_t	kh;
+	uint32_t	flags;
+	uint32_t	events;
+};
+
+struct fuse_poll_out {
+	uint32_t	revents;
+	uint32_t	padding;
+};
+
+struct fuse_notify_poll_wakeup_out {
+	uint64_t	kh;
+};
+
+struct fuse_fallocate_in {
+	uint64_t	fh;
+	uint64_t	offset;
+	uint64_t	length;
+	uint32_t	mode;
+	uint32_t	padding;
+};
+
+struct fuse_in_header {
+	uint32_t	len;
+	uint32_t	opcode;
+	uint64_t	unique;
+	uint64_t	nodeid;
+	uint32_t	uid;
+	uint32_t	gid;
+	uint32_t	pid;
+	uint32_t	padding;
+};
+
+struct fuse_out_header {
+	uint32_t	len;
+	int32_t		error;
+	uint64_t	unique;
+};
+
+struct fuse_dirent {
+	uint64_t	ino;
+	uint64_t	off;
+	uint32_t	namelen;
+	uint32_t	type;
+	char name[];
+};
+
+#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name)
+#define FUSE_DIRENT_ALIGN(x) \
+	(((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1))
+#define FUSE_DIRENT_SIZE(d) \
+	FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen)
+
+struct fuse_direntplus {
+	struct fuse_entry_out entry_out;
+	struct fuse_dirent dirent;
+};
+
+#define FUSE_NAME_OFFSET_DIRENTPLUS \
+	offsetof(struct fuse_direntplus, dirent.name)
+#define FUSE_DIRENTPLUS_SIZE(d) \
+	FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET_DIRENTPLUS + (d)->dirent.namelen)
+
+struct fuse_notify_inval_inode_out {
+	uint64_t	ino;
+	int64_t		off;
+	int64_t		len;
+};
+
+struct fuse_notify_inval_entry_out {
+	uint64_t	parent;
+	uint32_t	namelen;
+	uint32_t	padding;
+};
+
+struct fuse_notify_delete_out {
+	uint64_t	parent;
+	uint64_t	child;
+	uint32_t	namelen;
+	uint32_t	padding;
+};
+
+struct fuse_notify_store_out {
+	uint64_t	nodeid;
+	uint64_t	offset;
+	uint32_t	size;
+	uint32_t	padding;
+};
+
+struct fuse_notify_retrieve_out {
+	uint64_t	notify_unique;
+	uint64_t	nodeid;
+	uint64_t	offset;
+	uint32_t	size;
+	uint32_t	padding;
+};
+
+/* Matches the size of fuse_write_in */
+struct fuse_notify_retrieve_in {
+	uint64_t	dummy1;
+	uint64_t	offset;
+	uint32_t	size;
+	uint32_t	dummy2;
+	uint64_t	dummy3;
+	uint64_t	dummy4;
+};
+
+/* Device ioctls: */
+#define FUSE_DEV_IOC_CLONE	_IOR(229, 0, uint32_t)
+
+struct fuse_lseek_in {
+	uint64_t	fh;
+	uint64_t	offset;
+	uint32_t	whence;
+	uint32_t	padding;
+};
+
+struct fuse_lseek_out {
+	uint64_t	offset;
+};
+
+struct fuse_copy_file_range_in {
+	uint64_t	fh_in;
+	uint64_t	off_in;
+	uint64_t	nodeid_out;
+	uint64_t	fh_out;
+	uint64_t	off_out;
+	uint64_t	len;
+	uint64_t	flags;
+};
+
+#endif /* _LINUX_FUSE_H */
diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh
index f76d77363b..29c27f4681 100755
--- a/scripts/update-linux-headers.sh
+++ b/scripts/update-linux-headers.sh
@@ -186,6 +186,7 @@ rm -rf "$output/include/standard-headers/linux"
 mkdir -p "$output/include/standard-headers/linux"
 for i in "$tmpdir"/include/linux/*virtio*.h \
          "$tmpdir/include/linux/qemu_fw_cfg.h" \
+         "$tmpdir/include/linux/fuse.h" \
          "$tmpdir/include/linux/input.h" \
          "$tmpdir/include/linux/input-event-codes.h" \
          "$tmpdir/include/linux/pci_regs.h" \
diff --git a/tools/virtiofsd/50-qemu-virtiofsd.json.in b/tools/virtiofsd/50-qemu-virtiofsd.json.in
new file mode 100644
index 0000000000..9bcd86f8dc
--- /dev/null
+++ b/tools/virtiofsd/50-qemu-virtiofsd.json.in
@@ -0,0 +1,5 @@
+{
+  "description": "QEMU virtiofsd vhost-user-fs",
+  "type": "fs",
+  "binary": "@libexecdir@/virtiofsd"
+}
diff --git a/tools/virtiofsd/Makefile.objs b/tools/virtiofsd/Makefile.objs
new file mode 100644
index 0000000000..076f667e46
--- /dev/null
+++ b/tools/virtiofsd/Makefile.objs
@@ -0,0 +1,12 @@
+virtiofsd-obj-y = buffer.o \
+                  fuse_opt.o \
+                  fuse_log.o \
+                  fuse_lowlevel.o \
+                  fuse_signals.o \
+                  fuse_virtio.o \
+                  helper.o \
+                  passthrough_ll.o \
+                  seccomp.o
+
+seccomp.o-cflags := $(SECCOMP_CFLAGS)
+seccomp.o-libs := $(SECCOMP_LIBS)
diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c
new file mode 100644
index 0000000000..27c1377f22
--- /dev/null
+++ b/tools/virtiofsd/buffer.c
@@ -0,0 +1,351 @@
+/*
+ * FUSE: Filesystem in Userspace
+ * Copyright (C) 2010  Miklos Szeredi <miklos@szeredi.hu>
+ *
+ * Functions for dealing with `struct fuse_buf` and `struct
+ * fuse_bufvec`.
+ *
+ * This program can be distributed under the terms of the GNU LGPLv2.
+ * See the file COPYING.LIB
+ */
+
+#include "qemu/osdep.h"
+#include "fuse_i.h"
+#include "fuse_lowlevel.h"
+#include <assert.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+size_t fuse_buf_size(const struct fuse_bufvec *bufv)
+{
+    size_t i;
+    size_t size = 0;
+
+    for (i = 0; i < bufv->count; i++) {
+        if (bufv->buf[i].size == SIZE_MAX) {
+            size = SIZE_MAX;
+        } else {
+            size += bufv->buf[i].size;
+        }
+    }
+
+    return size;
+}
+
+static ssize_t fuse_buf_writev(struct fuse_buf *out_buf,
+                               struct fuse_bufvec *in_buf)
+{
+    ssize_t res, i, j;
+    size_t iovcnt = in_buf->count;
+    struct iovec *iov;
+    int fd = out_buf->fd;
+
+    iov = calloc(iovcnt, sizeof(struct iovec));
+    if (!iov) {
+        return -ENOMEM;
+    }
+
+    for (i = 0, j = 0; i < iovcnt; i++) {
+        /* Skip the buf with 0 size */
+        if (in_buf->buf[i].size) {
+            iov[j].iov_base = in_buf->buf[i].mem;
+            iov[j].iov_len = in_buf->buf[i].size;
+            j++;
+        }
+    }
+
+    if (out_buf->flags & FUSE_BUF_FD_SEEK) {
+        res = pwritev(fd, iov, iovcnt, out_buf->pos);
+    } else {
+        res = writev(fd, iov, iovcnt);
+    }
+
+    if (res == -1) {
+        res = -errno;
+    }
+
+    free(iov);
+    return res;
+}
+
+static size_t min_size(size_t s1, size_t s2)
+{
+    return s1 < s2 ? s1 : s2;
+}
+
+static ssize_t fuse_buf_write(const struct fuse_buf *dst, size_t dst_off,
+                              const struct fuse_buf *src, size_t src_off,
+                              size_t len)
+{
+    ssize_t res = 0;
+    size_t copied = 0;
+
+    while (len) {
+        if (dst->flags & FUSE_BUF_FD_SEEK) {
+            res = pwrite(dst->fd, (char *)src->mem + src_off, len,
+                         dst->pos + dst_off);
+        } else {
+            res = write(dst->fd, (char *)src->mem + src_off, len);
+        }
+        if (res == -1) {
+            if (!copied) {
+                return -errno;
+            }
+            break;
+        }
+        if (res == 0) {
+            break;
+        }
+
+        copied += res;
+        if (!(dst->flags & FUSE_BUF_FD_RETRY)) {
+            break;
+        }
+
+        src_off += res;
+        dst_off += res;
+        len -= res;
+    }
+
+    return copied;
+}
+
+static ssize_t fuse_buf_read(const struct fuse_buf *dst, size_t dst_off,
+                             const struct fuse_buf *src, size_t src_off,
+                             size_t len)
+{
+    ssize_t res = 0;
+    size_t copied = 0;
+
+    while (len) {
+        if (src->flags & FUSE_BUF_FD_SEEK) {
+            res = pread(src->fd, (char *)dst->mem + dst_off, len,
+                        src->pos + src_off);
+        } else {
+            res = read(src->fd, (char *)dst->mem + dst_off, len);
+        }
+        if (res == -1) {
+            if (!copied) {
+                return -errno;
+            }
+            break;
+        }
+        if (res == 0) {
+            break;
+        }
+
+        copied += res;
+        if (!(src->flags & FUSE_BUF_FD_RETRY)) {
+            break;
+        }
+
+        dst_off += res;
+        src_off += res;
+        len -= res;
+    }
+
+    return copied;
+}
+
+static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off,
+                                 const struct fuse_buf *src, size_t src_off,
+                                 size_t len)
+{
+    char buf[4096];
+    struct fuse_buf tmp = {
+        .size = sizeof(buf),
+        .flags = 0,
+    };
+    ssize_t res;
+    size_t copied = 0;
+
+    tmp.mem = buf;
+
+    while (len) {
+        size_t this_len = min_size(tmp.size, len);
+        size_t read_len;
+
+        res = fuse_buf_read(&tmp, 0, src, src_off, this_len);
+        if (res < 0) {
+            if (!copied) {
+                return res;
+            }
+            break;
+        }
+        if (res == 0) {
+            break;
+        }
+
+        read_len = res;
+        res = fuse_buf_write(dst, dst_off, &tmp, 0, read_len);
+        if (res < 0) {
+            if (!copied) {
+                return res;
+            }
+            break;
+        }
+        if (res == 0) {
+            break;
+        }
+
+        copied += res;
+
+        if (res < this_len) {
+            break;
+        }
+
+        dst_off += res;
+        src_off += res;
+        len -= res;
+    }
+
+    return copied;
+}
+
+static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off,
+                                 const struct fuse_buf *src, size_t src_off,
+                                 size_t len)
+{
+    int src_is_fd = src->flags & FUSE_BUF_IS_FD;
+    int dst_is_fd = dst->flags & FUSE_BUF_IS_FD;
+
+    if (!src_is_fd && !dst_is_fd) {
+        char *dstmem = (char *)dst->mem + dst_off;
+        char *srcmem = (char *)src->mem + src_off;
+
+        if (dstmem != srcmem) {
+            if (dstmem + len <= srcmem || srcmem + len <= dstmem) {
+                memcpy(dstmem, srcmem, len);
+            } else {
+                memmove(dstmem, srcmem, len);
+            }
+        }
+
+        return len;
+    } else if (!src_is_fd) {
+        return fuse_buf_write(dst, dst_off, src, src_off, len);
+    } else if (!dst_is_fd) {
+        return fuse_buf_read(dst, dst_off, src, src_off, len);
+    } else {
+        return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len);
+    }
+}
+
+static const struct fuse_buf *fuse_bufvec_current(struct fuse_bufvec *bufv)
+{
+    if (bufv->idx < bufv->count) {
+        return &bufv->buf[bufv->idx];
+    } else {
+        return NULL;
+    }
+}
+
+static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len)
+{
+    const struct fuse_buf *buf = fuse_bufvec_current(bufv);
+
+    bufv->off += len;
+    assert(bufv->off <= buf->size);
+    if (bufv->off == buf->size) {
+        assert(bufv->idx < bufv->count);
+        bufv->idx++;
+        if (bufv->idx == bufv->count) {
+            return 0;
+        }
+        bufv->off = 0;
+    }
+    return 1;
+}
+
+ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv)
+{
+    size_t copied = 0, i;
+
+    if (dstv == srcv) {
+        return fuse_buf_size(dstv);
+    }
+
+    /*
+     * use writev to improve bandwidth when all the
+     * src buffers already mapped by the daemon
+     * process
+     */
+    for (i = 0; i < srcv->count; i++) {
+        if (srcv->buf[i].flags & FUSE_BUF_IS_FD) {
+            break;
+        }
+    }
+    if ((i == srcv->count) && (dstv->count == 1) &&
+        (dstv->idx == 0) &&
+        (dstv->buf[0].flags & FUSE_BUF_IS_FD)) {
+        dstv->buf[0].pos += dstv->off;
+        return fuse_buf_writev(&dstv->buf[0], srcv);
+    }
+
+    for (;;) {
+        const struct fuse_buf *src = fuse_bufvec_current(srcv);
+        const struct fuse_buf *dst = fuse_bufvec_current(dstv);
+        size_t src_len;
+        size_t dst_len;
+        size_t len;
+        ssize_t res;
+
+        if (src == NULL || dst == NULL) {
+            break;
+        }
+
+        src_len = src->size - srcv->off;
+        dst_len = dst->size - dstv->off;
+        len = min_size(src_len, dst_len);
+
+        res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len);
+        if (res < 0) {
+            if (!copied) {
+                return res;
+            }
+            break;
+        }
+        copied += res;
+
+        if (!fuse_bufvec_advance(srcv, res) ||
+            !fuse_bufvec_advance(dstv, res)) {
+            break;
+        }
+
+        if (res < len) {
+            break;
+        }
+    }
+
+    return copied;
+}
+
+void *fuse_mbuf_iter_advance(struct fuse_mbuf_iter *iter, size_t len)
+{
+    void *ptr;
+
+    if (len > iter->size - iter->pos) {
+        return NULL;
+    }
+
+    ptr = iter->mem + iter->pos;
+    iter->pos += len;
+    return ptr;
+}
+
+const char *fuse_mbuf_iter_advance_str(struct fuse_mbuf_iter *iter)
+{
+    const char *str = iter->mem + iter->pos;
+    size_t remaining = iter->size - iter->pos;
+    size_t i;
+
+    for (i = 0; i < remaining; i++) {
+        if (str[i] == '\0') {
+            iter->pos += i + 1;
+            return str;
+        }
+    }
+    return NULL;
+}
diff --git a/tools/virtiofsd/fuse.h b/tools/virtiofsd/fuse.h
new file mode 100644
index 0000000000..7a4c713559
--- /dev/null
+++ b/tools/virtiofsd/fuse.h
@@ -0,0 +1,1249 @@
+/*
+ * FUSE: Filesystem in Userspace
+ * Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
+ *
+ * This program can be distributed under the terms of the GNU LGPLv2.
+ * See the file COPYING.LIB.
+ */
+
+#ifndef FUSE_H_
+#define FUSE_H_
+
+/*
+ *
+ * This file defines the library interface of FUSE
+ *
+ * IMPORTANT: you should define FUSE_USE_VERSION before including this header.
+ */
+
+#include "fuse_common.h"
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/statvfs.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <time.h>
+
+/*
+ * Basic FUSE API
+ */
+
+/** Handle for a FUSE filesystem */
+struct fuse;
+
+/**
+ * Readdir flags, passed to ->readdir()
+ */
+enum fuse_readdir_flags {
+    /**
+     * "Plus" mode.
+     *
+     * The kernel wants to prefill the inode cache during readdir.  The
+     * filesystem may honour this by filling in the attributes and setting
+     * FUSE_FILL_DIR_FLAGS for the filler function.  The filesystem may also
+     * just ignore this flag completely.
+     */
+    FUSE_READDIR_PLUS = (1 << 0),
+};
+
+enum fuse_fill_dir_flags {
+    /**
+     * "Plus" mode: all file attributes are valid
+     *
+     * The attributes are used by the kernel to prefill the inode cache
+     * during a readdir.
+     *
+     * It is okay to set FUSE_FILL_DIR_PLUS if FUSE_READDIR_PLUS is not set
+     * and vice versa.
+     */
+    FUSE_FILL_DIR_PLUS = (1 << 1),
+};
+
+/**
+ * Function to add an entry in a readdir() operation
+ *
+ * The *off* parameter can be any non-zero value that enables the
+ * filesystem to identify the current point in the directory
+ * stream. It does not need to be the actual physical position. A
+ * value of zero is reserved to indicate that seeking in directories
+ * is not supported.
+ *
+ * @param buf the buffer passed to the readdir() operation
+ * @param name the file name of the directory entry
+ * @param stat file attributes, can be NULL
+ * @param off offset of the next entry or zero
+ * @param flags fill flags
+ * @return 1 if buffer is full, zero otherwise
+ */
+typedef int (*fuse_fill_dir_t)(void *buf, const char *name,
+                               const struct stat *stbuf, off_t off,
+                               enum fuse_fill_dir_flags flags);
+/**
+ * Configuration of the high-level API
+ *
+ * This structure is initialized from the arguments passed to
+ * fuse_new(), and then passed to the file system's init() handler
+ * which should ensure that the configuration is compatible with the
+ * file system implementation.
+ */
+struct fuse_config {
+    /**
+     * If `set_gid` is non-zero, the st_gid attribute of each file
+     * is overwritten with the value of `gid`.
+     */
+    int set_gid;
+    unsigned int gid;
+
+    /**
+     * If `set_uid` is non-zero, the st_uid attribute of each file
+     * is overwritten with the value of `uid`.
+     */
+    int set_uid;
+    unsigned int uid;
+
+    /**
+     * If `set_mode` is non-zero, the any permissions bits set in
+     * `umask` are unset in the st_mode attribute of each file.
+     */
+    int set_mode;
+    unsigned int umask;
+
+    /**
+     * The timeout in seconds for which name lookups will be
+     * cached.
+     */
+    double entry_timeout;
+
+    /**
+     * The timeout in seconds for which a negative lookup will be
+     * cached. This means, that if file did not exist (lookup
+     * retuned ENOENT), the lookup will only be redone after the
+     * timeout, and the file/directory will be assumed to not
+     * exist until then. A value of zero means that negative
+     * lookups are not cached.
+     */
+    double negative_timeout;
+
+    /**
+     * The timeout in seconds for which file/directory attributes
+     * (as returned by e.g. the `getattr` handler) are cached.
+     */
+    double attr_timeout;
+
+    /**
+     * Allow requests to be interrupted
+     */
+    int intr;
+
+    /**
+     * Specify which signal number to send to the filesystem when
+     * a request is interrupted.  The default is hardcoded to
+     * USR1.
+     */
+    int intr_signal;
+
+    /**
+     * Normally, FUSE assigns inodes to paths only for as long as
+     * the kernel is aware of them. With this option inodes are
+     * instead remembered for at least this many seconds.  This
+     * will require more memory, but may be necessary when using
+     * applications that make use of inode numbers.
+     *
+     * A number of -1 means that inodes will be remembered for the
+     * entire life-time of the file-system process.
+     */
+    int remember;
+
+    /**
+     * The default behavior is that if an open file is deleted,
+     * the file is renamed to a hidden file (.fuse_hiddenXXX), and
+     * only removed when the file is finally released.  This
+     * relieves the filesystem implementation of having to deal
+     * with this problem. This option disables the hiding
+     * behavior, and files are removed immediately in an unlink
+     * operation (or in a rename operation which overwrites an
+     * existing file).
+     *
+     * It is recommended that you not use the hard_remove
+     * option. When hard_remove is set, the following libc
+     * functions fail on unlinked files (returning errno of
+     * ENOENT): read(2), write(2), fsync(2), close(2), f*xattr(2),
+     * ftruncate(2), fstat(2), fchmod(2), fchown(2)
+     */
+    int hard_remove;
+
+    /**
+     * Honor the st_ino field in the functions getattr() and
+     * fill_dir(). This value is used to fill in the st_ino field
+     * in the stat(2), lstat(2), fstat(2) functions and the d_ino
+     * field in the readdir(2) function. The filesystem does not
+     * have to guarantee uniqueness, however some applications
+     * rely on this value being unique for the whole filesystem.
+     *
+     * Note that this does *not* affect the inode that libfuse
+     * and the kernel use internally (also called the "nodeid").
+     */
+    int use_ino;
+
+    /**
+     * If use_ino option is not given, still try to fill in the
+     * d_ino field in readdir(2). If the name was previously
+     * looked up, and is still in the cache, the inode number
+     * found there will be used.  Otherwise it will be set to -1.
+     * If use_ino option is given, this option is ignored.
+     */
+    int readdir_ino;
+
+    /**
+     * This option disables the use of page cache (file content cache)
+     * in the kernel for this filesystem. This has several affects:
+     *
+     * 1. Each read(2) or write(2) system call will initiate one
+     *    or more read or write operations, data will not be
+     *    cached in the kernel.
+     *
+     * 2. The return value of the read() and write() system calls
+     *    will correspond to the return values of the read and
+     *    write operations. This is useful for example if the
+     *    file size is not known in advance (before reading it).
+     *
+     * Internally, enabling this option causes fuse to set the
+     * `direct_io` field of `struct fuse_file_info` - overwriting
+     * any value that was put there by the file system.
+     */
+    int direct_io;
+
+    /**
+     * This option disables flushing the cache of the file
+     * contents on every open(2).  This should only be enabled on
+     * filesystems where the file data is never changed
+     * externally (not through the mounted FUSE filesystem).  Thus
+     * it is not suitable for network filesystems and other
+     * intermediate filesystems.
+     *
+     * NOTE: if this option is not specified (and neither
+     * direct_io) data is still cached after the open(2), so a
+     * read(2) system call will not always initiate a read
+     * operation.
+     *
+     * Internally, enabling this option causes fuse to set the
+     * `keep_cache` field of `struct fuse_file_info` - overwriting
+     * any value that was put there by the file system.
+     */
+    int kernel_cache;
+
+    /**
+     * This option is an alternative to `kernel_cache`. Instead of
+     * unconditionally keeping cached data, the cached data is
+     * invalidated on open(2) if if the modification time or the
+     * size of the file has changed since it was last opened.
+     */
+    int auto_cache;
+
+    /**
+     * The timeout in seconds for which file attributes are cached
+     * for the purpose of checking if auto_cache should flush the
+     * file data on open.
+     */
+    int ac_attr_timeout_set;
+    double ac_attr_timeout;
+
+    /**
+     * If this option is given the file-system handlers for the
+     * following operations will not receive path information:
+     * read, write, flush, release, fsync, readdir, releasedir,
+     * fsyncdir, lock, ioctl and poll.
+     *
+     * For the truncate, getattr, chmod, chown and utimens
+     * operations the path will be provided only if the struct
+     * fuse_file_info argument is NULL.
+     */
+    int nullpath_ok;
+
+    /**
+     * The remaining options are used by libfuse internally and
+     * should not be touched.
+     */
+    int show_help;
+    char *modules;
+    int debug;
+};
+
+
+/**
+ * The file system operations:
+ *
+ * Most of these should work very similarly to the well known UNIX
+ * file system operations.  A major exception is that instead of
+ * returning an error in 'errno', the operation should return the
+ * negated error value (-errno) directly.
+ *
+ * All methods are optional, but some are essential for a useful
+ * filesystem (e.g. getattr).  Open, flush, release, fsync, opendir,
+ * releasedir, fsyncdir, access, create, truncate, lock, init and
+ * destroy are special purpose methods, without which a full featured
+ * filesystem can still be implemented.
+ *
+ * In general, all methods are expected to perform any necessary
+ * permission checking. However, a filesystem may delegate this task
+ * to the kernel by passing the `default_permissions` mount option to
+ * `fuse_new()`. In this case, methods will only be called if
+ * the kernel's permission check has succeeded.
+ *
+ * Almost all operations take a path which can be of any length.
+ */
+struct fuse_operations {
+    /**
+     * Get file attributes.
+     *
+     * Similar to stat().  The 'st_dev' and 'st_blksize' fields are
+     * ignored. The 'st_ino' field is ignored except if the 'use_ino'
+     * mount option is given. In that case it is passed to userspace,
+     * but libfuse and the kernel will still assign a different
+     * inode for internal use (called the "nodeid").
+     *
+     * `fi` will always be NULL if the file is not currently open, but
+     * may also be NULL if the file is open.
+     */
+    int (*getattr)(const char *, struct stat *, struct fuse_file_info *fi);
+
+    /**
+     * Read the target of a symbolic link
+     *
+     * The buffer should be filled with a null terminated string.  The
+     * buffer size argument includes the space for the terminating
+     * null character. If the linkname is too long to fit in the
+     * buffer, it should be truncated. The return value should be 0
+     * for success.
+     */
+    int (*readlink)(const char *, char *, size_t);
+
+    /**
+     * Create a file node
+     *
+     * This is called for creation of all non-directory, non-symlink
+     * nodes.  If the filesystem defines a create() method, then for
+     * regular files that will be called instead.
+     */
+    int (*mknod)(const char *, mode_t, dev_t);
+
+    /**
+     * Create a directory
+     *
+     * Note that the mode argument may not have the type specification
+     * bits set, i.e. S_ISDIR(mode) can be false.  To obtain the
+     * correct directory type bits use  mode|S_IFDIR
+     */
+    int (*mkdir)(const char *, mode_t);
+
+    /** Remove a file */
+    int (*unlink)(const char *);
+
+    /** Remove a directory */
+    int (*rmdir)(const char *);
+
+    /** Create a symbolic link */
+    int (*symlink)(const char *, const char *);
+
+    /**
+     * Rename a file
+     *
+     * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If
+     * RENAME_NOREPLACE is specified, the filesystem must not
+     * overwrite *newname* if it exists and return an error
+     * instead. If `RENAME_EXCHANGE` is specified, the filesystem
+     * must atomically exchange the two files, i.e. both must
+     * exist and neither may be deleted.
+     */
+    int (*rename)(const char *, const char *, unsigned int flags);
+
+    /** Create a hard link to a file */
+    int (*link)(const char *, const char *);
+
+    /**
+     * Change the permission bits of a file
+     *
+     * `fi` will always be NULL if the file is not currenlty open, but
+     * may also be NULL if the file is open.
+     */
+    int (*chmod)(const char *, mode_t, struct fuse_file_info *fi);
+
+    /**
+     * Change the owner and group of a file
+     *
+     * `fi` will always be NULL if the file is not currenlty open, but
+     * may also be NULL if the file is open.
+     *
+     * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is
+     * expected to reset the setuid and setgid bits.
+     */
+    int (*chown)(const char *, uid_t, gid_t, struct fuse_file_info *fi);
+
+    /**
+     * Change the size of a file
+     *
+     * `fi` will always be NULL if the file is not currenlty open, but
+     * may also be NULL if the file is open.
+     *
+     * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is
+     * expected to reset the setuid and setgid bits.
+     */
+    int (*truncate)(const char *, off_t, struct fuse_file_info *fi);
+
+    /**
+     * Open a file
+     *
+     * Open flags are available in fi->flags. The following rules
+     * apply.
+     *
+     *  - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be
+     *    filtered out / handled by the kernel.
+     *
+     *  - Access modes (O_RDONLY, O_WRONLY, O_RDWR, O_EXEC, O_SEARCH)
+     *    should be used by the filesystem to check if the operation is
+     *    permitted.  If the ``-o default_permissions`` mount option is
+     *    given, this check is already done by the kernel before calling
+     *    open() and may thus be omitted by the filesystem.
+     *
+     *  - When writeback caching is enabled, the kernel may send
+     *    read requests even for files opened with O_WRONLY. The
+     *    filesystem should be prepared to handle this.
+     *
+     *  - When writeback caching is disabled, the filesystem is
+     *    expected to properly handle the O_APPEND flag and ensure
+     *    that each write is appending to the end of the file.
+     *
+     *  - When writeback caching is enabled, the kernel will
+     *    handle O_APPEND. However, unless all changes to the file
+     *    come through the kernel this will not work reliably. The
+     *    filesystem should thus either ignore the O_APPEND flag
+     *    (and let the kernel handle it), or return an error
+     *    (indicating that reliably O_APPEND is not available).
+     *
+     * Filesystem may store an arbitrary file handle (pointer,
+     * index, etc) in fi->fh, and use this in other all other file
+     * operations (read, write, flush, release, fsync).
+     *
+     * Filesystem may also implement stateless file I/O and not store
+     * anything in fi->fh.
+     *
+     * There are also some flags (direct_io, keep_cache) which the
+     * filesystem may set in fi, to change the way the file is opened.
+     * See fuse_file_info structure in <fuse_common.h> for more details.
+     *
+     * If this request is answered with an error code of ENOSYS
+     * and FUSE_CAP_NO_OPEN_SUPPORT is set in
+     * `fuse_conn_info.capable`, this is treated as success and
+     * future calls to open will also succeed without being send
+     * to the filesystem process.
+     *
+     */
+    int (*open)(const char *, struct fuse_file_info *);
+
+    /**
+     * Read data from an open file
+     *
+     * Read should return exactly the number of bytes requested except
+     * on EOF or error, otherwise the rest of the data will be
+     * substituted with zeroes.  An exception to this is when the
+     * 'direct_io' mount option is specified, in which case the return
+     * value of the read system call will reflect the return value of
+     * this operation.
+     */
+    int (*read)(const char *, char *, size_t, off_t, struct fuse_file_info *);
+
+    /**
+     * Write data to an open file
+     *
+     * Write should return exactly the number of bytes requested
+     * except on error.  An exception to this is when the 'direct_io'
+     * mount option is specified (see read operation).
+     *
+     * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is
+     * expected to reset the setuid and setgid bits.
+     */
+    int (*write)(const char *, const char *, size_t, off_t,
+                 struct fuse_file_info *);
+
+    /**
+     * Get file system statistics
+     *
+     * The 'f_favail', 'f_fsid' and 'f_flag' fields are ignored
+     */
+    int (*statfs)(const char *, struct statvfs *);
+
+    /**
+     * Possibly flush cached data
+     *
+     * BIG NOTE: This is not equivalent to fsync().  It's not a
+     * request to sync dirty data.
+     *
+     * Flush is called on each close() of a file descriptor, as opposed to
+     * release which is called on the close of the last file descriptor for
+     * a file.  Under Linux, errors returned by flush() will be passed to
+     * userspace as errors from close(), so flush() is a good place to write
+     * back any cached dirty data. However, many applications ignore errors
+     * on close(), and on non-Linux systems, close() may succeed even if flush()
+     * returns an error. For these reasons, filesystems should not assume
+     * that errors returned by flush will ever be noticed or even
+     * delivered.
+     *
+     * NOTE: The flush() method may be called more than once for each
+     * open().  This happens if more than one file descriptor refers to an
+     * open file handle, e.g. due to dup(), dup2() or fork() calls.  It is
+     * not possible to determine if a flush is final, so each flush should
+     * be treated equally.  Multiple write-flush sequences are relatively
+     * rare, so this shouldn't be a problem.
+     *
+     * Filesystems shouldn't assume that flush will be called at any
+     * particular point.  It may be called more times than expected, or not
+     * at all.
+     *
+     * [close]:
+     * http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html
+     */
+    int (*flush)(const char *, struct fuse_file_info *);
+
+    /**
+     * Release an open file
+     *
+     * Release is called when there are no more references to an open
+     * file: all file descriptors are closed and all memory mappings
+     * are unmapped.
+     *
+     * For every open() call there will be exactly one release() call
+     * with the same flags and file handle.  It is possible to
+     * have a file opened more than once, in which case only the last
+     * release will mean, that no more reads/writes will happen on the
+     * file.  The return value of release is ignored.
+     */
+    int (*release)(const char *, struct fuse_file_info *);
+
+    /*
+     * Synchronize file contents
+     *
+     * If the datasync parameter is non-zero, then only the user data
+     * should be flushed, not the meta data.
+     */
+    int (*fsync)(const char *, int, struct fuse_file_info *);
+
+    /** Set extended attributes */
+    int (*setxattr)(const char *, const char *, const char *, size_t, int);
+
+    /** Get extended attributes */
+    int (*getxattr)(const char *, const char *, char *, size_t);
+
+    /** List extended attributes */
+    int (*listxattr)(const char *, char *, size_t);
+
+    /** Remove extended attributes */
+    int (*removexattr)(const char *, const char *);
+
+    /*
+     * Open directory
+     *
+     * Unless the 'default_permissions' mount option is given,
+     * this method should check if opendir is permitted for this
+     * directory. Optionally opendir may also return an arbitrary
+     * filehandle in the fuse_file_info structure, which will be
+     * passed to readdir, releasedir and fsyncdir.
+     */
+    int (*opendir)(const char *, struct fuse_file_info *);
+
+    /*
+     * Read directory
+     *
+     * The filesystem may choose between two modes of operation:
+     *
+     * 1) The readdir implementation ignores the offset parameter, and
+     * passes zero to the filler function's offset.  The filler
+     * function will not return '1' (unless an error happens), so the
+     * whole directory is read in a single readdir operation.
+     *
+     * 2) The readdir implementation keeps track of the offsets of the
+     * directory entries.  It uses the offset parameter and always
+     * passes non-zero offset to the filler function.  When the buffer
+     * is full (or an error happens) the filler function will return
+     * '1'.
+     */
+    int (*readdir)(const char *, void *, fuse_fill_dir_t, off_t,
+                   struct fuse_file_info *, enum fuse_readdir_flags);
+
+    /**
+     *  Release directory
+     */
+    int (*releasedir)(const char *, struct fuse_file_info *);
+
+    /**
+     * Synchronize directory contents
+     *
+     * If the datasync parameter is non-zero, then only the user data
+     * should be flushed, not the meta data
+     */
+    int (*fsyncdir)(const char *, int, struct fuse_file_info *);
+
+    /**
+     * Initialize filesystem
+     *
+     * The return value will passed in the `private_data` field of
+     * `struct fuse_context` to all file operations, and as a
+     * parameter to the destroy() method. It overrides the initial
+     * value provided to fuse_main() / fuse_new().
+     */
+    void *(*init)(struct fuse_conn_info *conn, struct fuse_config *cfg);
+
+    /**
+     * Clean up filesystem
+     *
+     * Called on filesystem exit.
+     */
+    void (*destroy)(void *private_data);
+
+    /**
+     * Check file access permissions
+     *
+     * This will be called for the access() system call.  If the
+     * 'default_permissions' mount option is given, this method is not
+     * called.
+     *
+     * This method is not called under Linux kernel versions 2.4.x
+     */
+    int (*access)(const char *, int);
+
+    /**
+     * Create and open a file
+     *
+     * If the file does not exist, first create it with the specified
+     * mode, and then open it.
+     *
+     * If this method is not implemented or under Linux kernel
+     * versions earlier than 2.6.15, the mknod() and open() methods
+     * will be called instead.
+     */
+    int (*create)(const char *, mode_t, struct fuse_file_info *);
+
+    /**
+     * Perform POSIX file locking operation
+     *
+     * The cmd argument will be either F_GETLK, F_SETLK or F_SETLKW.
+     *
+     * For the meaning of fields in 'struct flock' see the man page
+     * for fcntl(2).  The l_whence field will always be set to
+     * SEEK_SET.
+     *
+     * For checking lock ownership, the 'fuse_file_info->owner'
+     * argument must be used.
+     *
+     * For F_GETLK operation, the library will first check currently
+     * held locks, and if a conflicting lock is found it will return
+     * information without calling this method.  This ensures, that
+     * for local locks the l_pid field is correctly filled in. The
+     * results may not be accurate in case of race conditions and in
+     * the presence of hard links, but it's unlikely that an
+     * application would rely on accurate GETLK results in these
+     * cases.  If a conflicting lock is not found, this method will be
+     * called, and the filesystem may fill out l_pid by a meaningful
+     * value, or it may leave this field zero.
+     *
+     * For F_SETLK and F_SETLKW the l_pid field will be set to the pid
+     * of the process performing the locking operation.
+     *
+     * Note: if this method is not implemented, the kernel will still
+     * allow file locking to work locally.  Hence it is only
+     * interesting for network filesystems and similar.
+     */
+    int (*lock)(const char *, struct fuse_file_info *, int cmd, struct flock *);
+
+    /**
+     * Change the access and modification times of a file with
+     * nanosecond resolution
+     *
+     * This supersedes the old utime() interface.  New applications
+     * should use this.
+     *
+     * `fi` will always be NULL if the file is not currenlty open, but
+     * may also be NULL if the file is open.
+     *
+     * See the utimensat(2) man page for details.
+     */
+    int (*utimens)(const char *, const struct timespec tv[2],
+                   struct fuse_file_info *fi);
+
+    /**
+     * Map block index within file to block index within device
+     *
+     * Note: This makes sense only for block device backed filesystems
+     * mounted with the 'blkdev' option
+     */
+    int (*bmap)(const char *, size_t blocksize, uint64_t *idx);
+
+    /**
+     * Ioctl
+     *
+     * flags will have FUSE_IOCTL_COMPAT set for 32bit ioctls in
+     * 64bit environment.  The size and direction of data is
+     * determined by _IOC_*() decoding of cmd.  For _IOC_NONE,
+     * data will be NULL, for _IOC_WRITE data is out area, for
+     * _IOC_READ in area and if both are set in/out area.  In all
+     * non-NULL cases, the area is of _IOC_SIZE(cmd) bytes.
+     *
+     * If flags has FUSE_IOCTL_DIR then the fuse_file_info refers to a
+     * directory file handle.
+     *
+     * Note : the unsigned long request submitted by the application
+     * is truncated to 32 bits.
+     */
+    int (*ioctl)(const char *, unsigned int cmd, void *arg,
+                 struct fuse_file_info *, unsigned int flags, void *data);
+
+    /**
+     * Poll for IO readiness events
+     *
+     * Note: If ph is non-NULL, the client should notify
+     * when IO readiness events occur by calling
+     * fuse_notify_poll() with the specified ph.
+     *
+     * Regardless of the number of times poll with a non-NULL ph
+     * is received, single notification is enough to clear all.
+     * Notifying more times incurs overhead but doesn't harm
+     * correctness.
+     *
+     * The callee is responsible for destroying ph with
+     * fuse_pollhandle_destroy() when no longer in use.
+     */
+    int (*poll)(const char *, struct fuse_file_info *,
+                struct fuse_pollhandle *ph, unsigned *reventsp);
+
+    /*
+     * Write contents of buffer to an open file
+     *
+     * Similar to the write() method, but data is supplied in a
+     * generic buffer.  Use fuse_buf_copy() to transfer data to
+     * the destination.
+     *
+     * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is
+     * expected to reset the setuid and setgid bits.
+     */
+    int (*write_buf)(const char *, struct fuse_bufvec *buf, off_t off,
+                     struct fuse_file_info *);
+
+    /*
+     *  Store data from an open file in a buffer
+     *
+     * Similar to the read() method, but data is stored and
+     * returned in a generic buffer.
+     *
+     * No actual copying of data has to take place, the source
+     * file descriptor may simply be stored in the buffer for
+     * later data transfer.
+     *
+     * The buffer must be allocated dynamically and stored at the
+     * location pointed to by bufp.  If the buffer contains memory
+     * regions, they too must be allocated using malloc().  The
+     * allocated memory will be freed by the caller.
+     */
+    int (*read_buf)(const char *, struct fuse_bufvec **bufp, size_t size,
+                    off_t off, struct fuse_file_info *);
+    /**
+     * Perform BSD file locking operation
+     *
+     * The op argument will be either LOCK_SH, LOCK_EX or LOCK_UN
+     *
+     * Nonblocking requests will be indicated by ORing LOCK_NB to
+     * the above operations
+     *
+     * For more information see the flock(2) manual page.
+     *
+     * Additionally fi->owner will be set to a value unique to
+     * this open file.  This same value will be supplied to
+     * ->release() when the file is released.
+     *
+     * Note: if this method is not implemented, the kernel will still
+     * allow file locking to work locally.  Hence it is only
+     * interesting for network filesystems and similar.
+     */
+    int (*flock)(const char *, struct fuse_file_info *, int op);
+
+    /**
+     * Allocates space for an open file
+     *
+     * This function ensures that required space is allocated for specified
+     * file.  If this function returns success then any subsequent write
+     * request to specified range is guaranteed not to fail because of lack
+     * of space on the file system media.
+     */
+    int (*fallocate)(const char *, int, off_t, off_t, struct fuse_file_info *);
+
+    /**
+     * Copy a range of data from one file to another
+     *
+     * Performs an optimized copy between two file descriptors without the
+     * additional cost of transferring data through the FUSE kernel module
+     * to user space (glibc) and then back into the FUSE filesystem again.
+     *
+     * In case this method is not implemented, glibc falls back to reading
+     * data from the source and writing to the destination. Effectively
+     * doing an inefficient copy of the data.
+     */
+    ssize_t (*copy_file_range)(const char *path_in,
+                               struct fuse_file_info *fi_in, off_t offset_in,
+                               const char *path_out,
+                               struct fuse_file_info *fi_out, off_t offset_out,
+                               size_t size, int flags);
+
+    /**
+     * Find next data or hole after the specified offset
+     */
+    off_t (*lseek)(const char *, off_t off, int whence,
+                   struct fuse_file_info *);
+};
+
+/*
+ * Extra context that may be needed by some filesystems
+ *
+ * The uid, gid and pid fields are not filled in case of a writepage
+ * operation.
+ */
+struct fuse_context {
+    /** Pointer to the fuse object */
+    struct fuse *fuse;
+
+    /** User ID of the calling process */
+    uid_t uid;
+
+    /** Group ID of the calling process */
+    gid_t gid;
+
+    /** Process ID of the calling thread */
+    pid_t pid;
+
+    /** Private filesystem data */
+    void *private_data;
+
+    /** Umask of the calling process */
+    mode_t umask;
+};
+
+/**
+ * Main function of FUSE.
+ *
+ * This is for the lazy.  This is all that has to be called from the
+ * main() function.
+ *
+ * This function does the following:
+ *   - parses command line options, and handles --help and
+ *     --version
+ *   - installs signal handlers for INT, HUP, TERM and PIPE
+ *   - registers an exit handler to unmount the filesystem on program exit
+ *   - creates a fuse handle
+ *   - registers the operations
+ *   - calls either the single-threaded or the multi-threaded event loop
+ *
+ * Most file systems will have to parse some file-system specific
+ * arguments before calling this function. It is recommended to do
+ * this with fuse_opt_parse() and a processing function that passes
+ * through any unknown options (this can also be achieved by just
+ * passing NULL as the processing function). That way, the remaining
+ * options can be passed directly to fuse_main().
+ *
+ * fuse_main() accepts all options that can be passed to
+ * fuse_parse_cmdline(), fuse_new(), or fuse_session_new().
+ *
+ * Option parsing skips argv[0], which is assumed to contain the
+ * program name. This element must always be present and is used to
+ * construct a basic ``usage: `` message for the --help
+ * output. argv[0] may also be set to the empty string. In this case
+ * the usage message is suppressed. This can be used by file systems
+ * to print their own usage line first. See hello.c for an example of
+ * how to do this.
+ *
+ * Note: this is currently implemented as a macro.
+ *
+ * The following error codes may be returned from fuse_main():
+ *   1: Invalid option arguments
+ *   2: No mount point specified
+ *   3: FUSE setup failed
+ *   4: Mounting failed
+ *   5: Failed to daemonize (detach from session)
+ *   6: Failed to set up signal handlers
+ *   7: An error occured during the life of the file system
+ *
+ * @param argc the argument counter passed to the main() function
+ * @param argv the argument vector passed to the main() function
+ * @param op the file system operation
+ * @param private_data Initial value for the `private_data`
+ *            field of `struct fuse_context`. May be overridden by the
+ *            `struct fuse_operations.init` handler.
+ * @return 0 on success, nonzero on failure
+ *
+ * Example usage, see hello.c
+ */
+/*
+ * int fuse_main(int argc, char *argv[], const struct fuse_operations *op,
+ * void *private_data);
+ */
+#define fuse_main(argc, argv, op, private_data) \
+    fuse_main_real(argc, argv, op, sizeof(*(op)), private_data)
+
+/*
+ * More detailed API
+ */
+
+/**
+ * Print available options (high- and low-level) to stdout.  This is
+ * not an exhaustive list, but includes only those options that may be
+ * of interest to an end-user of a file system.
+ *
+ * The function looks at the argument vector only to determine if
+ * there are additional modules to be loaded (module=foo option),
+ * and attempts to call their help functions as well.
+ *
+ * @param args the argument vector.
+ */
+void fuse_lib_help(struct fuse_args *args);
+
+/**
+ * Create a new FUSE filesystem.
+ *
+ * This function accepts most file-system independent mount options
+ * (like context, nodev, ro - see mount(8)), as well as the
+ * FUSE-specific mount options from mount.fuse(8).
+ *
+ * If the --help option is specified, the function writes a help text
+ * to stdout and returns NULL.
+ *
+ * Option parsing skips argv[0], which is assumed to contain the
+ * program name. This element must always be present and is used to
+ * construct a basic ``usage: `` message for the --help output. If
+ * argv[0] is set to the empty string, no usage message is included in
+ * the --help output.
+ *
+ * If an unknown option is passed in, an error message is written to
+ * stderr and the function returns NULL.
+ *
+ * @param args argument vector
+ * @param op the filesystem operations
+ * @param op_size the size of the fuse_operations structure
+ * @param private_data Initial value for the `private_data`
+ *            field of `struct fuse_context`. May be overridden by the
+ *            `struct fuse_operations.init` handler.
+ * @return the created FUSE handle
+ */
+#if FUSE_USE_VERSION == 30
+struct fuse *fuse_new_30(struct fuse_args *args,
+                         const struct fuse_operations *op, size_t op_size,
+                         void *private_data);
+#define fuse_new(args, op, size, data) fuse_new_30(args, op, size, data)
+#else
+struct fuse *fuse_new(struct fuse_args *args, const struct fuse_operations *op,
+                      size_t op_size, void *private_data);
+#endif
+
+/**
+ * Mount a FUSE file system.
+ *
+ * @param mountpoint the mount point path
+ * @param f the FUSE handle
+ *
+ * @return 0 on success, -1 on failure.
+ **/
+int fuse_mount(struct fuse *f, const char *mountpoint);
+
+/**
+ * Unmount a FUSE file system.
+ *
+ * See fuse_session_unmount() for additional information.
+ *
+ * @param f the FUSE handle
+ **/
+void fuse_unmount(struct fuse *f);
+
+/**
+ * Destroy the FUSE handle.
+ *
+ * NOTE: This function does not unmount the filesystem.  If this is
+ * needed, call fuse_unmount() before calling this function.
+ *
+ * @param f the FUSE handle
+ */
+void fuse_destroy(struct fuse *f);
+
+/**
+ * FUSE event loop.
+ *
+ * Requests from the kernel are processed, and the appropriate
+ * operations are called.
+ *
+ * For a description of the return value and the conditions when the
+ * event loop exits, refer to the documentation of
+ * fuse_session_loop().
+ *
+ * @param f the FUSE handle
+ * @return see fuse_session_loop()
+ *
+ * See also: fuse_loop_mt()
+ */
+int fuse_loop(struct fuse *f);
+
+/**
+ * Flag session as terminated
+ *
+ * This function will cause any running event loops to exit on
+ * the next opportunity.
+ *
+ * @param f the FUSE handle
+ */
+void fuse_exit(struct fuse *f);
+
+/**
+ * Get the current context
+ *
+ * The context is only valid for the duration of a filesystem
+ * operation, and thus must not be stored and used later.
+ *
+ * @return the context
+ */
+struct fuse_context *fuse_get_context(void);
+
+/**
+ * Get the current supplementary group IDs for the current request
+ *
+ * Similar to the getgroups(2) system call, except the return value is
+ * always the total number of group IDs, even if it is larger than the
+ * specified size.
+ *
+ * The current fuse kernel module in linux (as of 2.6.30) doesn't pass
+ * the group list to userspace, hence this function needs to parse
+ * "/proc/$TID/task/$TID/status" to get the group IDs.
+ *
+ * This feature may not be supported on all operating systems.  In
+ * such a case this function will return -ENOSYS.
+ *
+ * @param size size of given array
+ * @param list array of group IDs to be filled in
+ * @return the total number of supplementary group IDs or -errno on failure
+ */
+int fuse_getgroups(int size, gid_t list[]);
+
+/**
+ * Check if the current request has already been interrupted
+ *
+ * @return 1 if the request has been interrupted, 0 otherwise
+ */
+int fuse_interrupted(void);
+
+/**
+ * Invalidates cache for the given path.
+ *
+ * This calls fuse_lowlevel_notify_inval_inode internally.
+ *
+ * @return 0 on successful invalidation, negative error value otherwise.
+ *         This routine may return -ENOENT to indicate that there was
+ *         no entry to be invalidated, e.g., because the path has not
+ *         been seen before or has been forgotten; this should not be
+ *         considered to be an error.
+ */
+int fuse_invalidate_path(struct fuse *f, const char *path);
+
+/**
+ * The real main function
+ *
+ * Do not call this directly, use fuse_main()
+ */
+int fuse_main_real(int argc, char *argv[], const struct fuse_operations *op,
+                   size_t op_size, void *private_data);
+
+/**
+ * Start the cleanup thread when using option "remember".
+ *
+ * This is done automatically by fuse_loop_mt()
+ * @param fuse struct fuse pointer for fuse instance
+ * @return 0 on success and -1 on error
+ */
+int fuse_start_cleanup_thread(struct fuse *fuse);
+
+/**
+ * Stop the cleanup thread when using option "remember".
+ *
+ * This is done automatically by fuse_loop_mt()
+ * @param fuse struct fuse pointer for fuse instance
+ */
+void fuse_stop_cleanup_thread(struct fuse *fuse);
+
+/**
+ * Iterate over cache removing stale entries
+ * use in conjunction with "-oremember"
+ *
+ * NOTE: This is already done for the standard sessions
+ *
+ * @param fuse struct fuse pointer for fuse instance
+ * @return the number of seconds until the next cleanup
+ */
+int fuse_clean_cache(struct fuse *fuse);
+
+/*
+ * Stacking API
+ */
+
+/**
+ * Fuse filesystem object
+ *
+ * This is opaque object represents a filesystem layer
+ */
+struct fuse_fs;
+
+/*
+ * These functions call the relevant filesystem operation, and return
+ * the result.
+ *
+ * If the operation is not defined, they return -ENOSYS, with the
+ * exception of fuse_fs_open, fuse_fs_release, fuse_fs_opendir,
+ * fuse_fs_releasedir and fuse_fs_statfs, which return 0.
+ */
+
+int fuse_fs_getattr(struct fuse_fs *fs, const char *path, struct stat *buf,
+                    struct fuse_file_info *fi);
+int fuse_fs_rename(struct fuse_fs *fs, const char *oldpath, const char *newpath,
+                   unsigned int flags);
+int fuse_fs_unlink(struct fuse_fs *fs, const char *path);
+int fuse_fs_rmdir(struct fuse_fs *fs, const char *path);
+int fuse_fs_symlink(struct fuse_fs *fs, const char *linkname, const char *path);
+int fuse_fs_link(struct fuse_fs *fs, const char *oldpath, const char *newpath);
+int fuse_fs_release(struct fuse_fs *fs, const char *path,
+                    struct fuse_file_info *fi);
+int fuse_fs_open(struct fuse_fs *fs, const char *path,
+                 struct fuse_file_info *fi);
+int fuse_fs_read(struct fuse_fs *fs, const char *path, char *buf, size_t size,
+                 off_t off, struct fuse_file_info *fi);
+int fuse_fs_read_buf(struct fuse_fs *fs, const char *path,
+                     struct fuse_bufvec **bufp, size_t size, off_t off,
+                     struct fuse_file_info *fi);
+int fuse_fs_write(struct fuse_fs *fs, const char *path, const char *buf,
+                  size_t size, off_t off, struct fuse_file_info *fi);
+int fuse_fs_write_buf(struct fuse_fs *fs, const char *path,
+                      struct fuse_bufvec *buf, off_t off,
+                      struct fuse_file_info *fi);
+int fuse_fs_fsync(struct fuse_fs *fs, const char *path, int datasync,
+                  struct fuse_file_info *fi);
+int fuse_fs_flush(struct fuse_fs *fs, const char *path,
+                  struct fuse_file_info *fi);
+int fuse_fs_statfs(struct fuse_fs *fs, const char *path, struct statvfs *buf);
+int fuse_fs_opendir(struct fuse_fs *fs, const char *path,
+                    struct fuse_file_info *fi);
+int fuse_fs_readdir(struct fuse_fs *fs, const char *path, void *buf,
+                    fuse_fill_dir_t filler, off_t off,
+                    struct fuse_file_info *fi, enum fuse_readdir_flags flags);
+int fuse_fs_fsyncdir(struct fuse_fs *fs, const char *path, int datasync,
+                     struct fuse_file_info *fi);
+int fuse_fs_releasedir(struct fuse_fs *fs, const char *path,
+                       struct fuse_file_info *fi);
+int fuse_fs_create(struct fuse_fs *fs, const char *path, mode_t mode,
+                   struct fuse_file_info *fi);
+int fuse_fs_lock(struct fuse_fs *fs, const char *path,
+                 struct fuse_file_info *fi, int cmd, struct flock *lock);
+int fuse_fs_flock(struct fuse_fs *fs, const char *path,
+                  struct fuse_file_info *fi, int op);
+int fuse_fs_chmod(struct fuse_fs *fs, const char *path, mode_t mode,
+                  struct fuse_file_info *fi);
+int fuse_fs_chown(struct fuse_fs *fs, const char *path, uid_t uid, gid_t gid,
+                  struct fuse_file_info *fi);
+int fuse_fs_truncate(struct fuse_fs *fs, const char *path, off_t size,
+                     struct fuse_file_info *fi);
+int fuse_fs_utimens(struct fuse_fs *fs, const char *path,
+                    const struct timespec tv[2], struct fuse_file_info *fi);
+int fuse_fs_access(struct fuse_fs *fs, const char *path, int mask);
+int fuse_fs_readlink(struct fuse_fs *fs, const char *path, char *buf,
+                     size_t len);
+int fuse_fs_mknod(struct fuse_fs *fs, const char *path, mode_t mode,
+                  dev_t rdev);
+int fuse_fs_mkdir(struct fuse_fs *fs, const char *path, mode_t mode);
+int fuse_fs_setxattr(struct fuse_fs *fs, const char *path, const char *name,
+                     const char *value, size_t size, int flags);
+int fuse_fs_getxattr(struct fuse_fs *fs, const char *path, const char *name,
+                     char *value, size_t size);
+int fuse_fs_listxattr(struct fuse_fs *fs, const char *path, char *list,
+                      size_t size);
+int fuse_fs_removexattr(struct fuse_fs *fs, const char *path, const char *name);
+int fuse_fs_bmap(struct fuse_fs *fs, const char *path, size_t blocksize,
+                 uint64_t *idx);
+int fuse_fs_ioctl(struct fuse_fs *fs, const char *path, unsigned int cmd,
+                  void *arg, struct fuse_file_info *fi, unsigned int flags,
+                  void *data);
+int fuse_fs_poll(struct fuse_fs *fs, const char *path,
+                 struct fuse_file_info *fi, struct fuse_pollhandle *ph,
+                 unsigned *reventsp);
+int fuse_fs_fallocate(struct fuse_fs *fs, const char *path, int mode,
+                      off_t offset, off_t length, struct fuse_file_info *fi);
+ssize_t fuse_fs_copy_file_range(struct fuse_fs *fs, const char *path_in,
+                                struct fuse_file_info *fi_in, off_t off_in,
+                                const char *path_out,
+                                struct fuse_file_info *fi_out, off_t off_out,
+                                size_t len, int flags);
+off_t fuse_fs_lseek(struct fuse_fs *fs, const char *path, off_t off, int whence,
+                    struct fuse_file_info *fi);
+void fuse_fs_init(struct fuse_fs *fs, struct fuse_conn_info *conn,
+                  struct fuse_config *cfg);
+void fuse_fs_destroy(struct fuse_fs *fs);
+
+int fuse_notify_poll(struct fuse_pollhandle *ph);
+
+/**
+ * Create a new fuse filesystem object
+ *
+ * This is usually called from the factory of a fuse module to create
+ * a new instance of a filesystem.
+ *
+ * @param op the filesystem operations
+ * @param op_size the size of the fuse_operations structure
+ * @param private_data Initial value for the `private_data`
+ *            field of `struct fuse_context`. May be overridden by the
+ *            `struct fuse_operations.init` handler.
+ * @return a new filesystem object
+ */
+struct fuse_fs *fuse_fs_new(const struct fuse_operations *op, size_t op_size,
+                            void *private_data);
+
+/**
+ * Factory for creating filesystem objects
+ *
+ * The function may use and remove options from 'args' that belong
+ * to this module.
+ *
+ * For now the 'fs' vector always contains exactly one filesystem.
+ * This is the filesystem which will be below the newly created
+ * filesystem in the stack.
+ *
+ * @param args the command line arguments
+ * @param fs NULL terminated filesystem object vector
+ * @return the new filesystem object
+ */
+typedef struct fuse_fs *(*fuse_module_factory_t)(struct fuse_args *args,
+                                                 struct fuse_fs *fs[]);
+/**
+ * Register filesystem module
+ *
+ * If the "-omodules=*name*_:..." option is present, filesystem
+ * objects are created and pushed onto the stack with the *factory_*
+ * function.
+ *
+ * @param name_ the name of this filesystem module
+ * @param factory_ the factory function for this filesystem module
+ */
+#define FUSE_REGISTER_MODULE(name_, factory_) \
+    fuse_module_factory_t fuse_module_##name_##_factory = factory_
+
+/** Get session from fuse object */
+struct fuse_session *fuse_get_session(struct fuse *f);
+
+/**
+ * Open a FUSE file descriptor and set up the mount for the given
+ * mountpoint and flags.
+ *
+ * @param mountpoint reference to the mount in the file system
+ * @param options mount options
+ * @return the FUSE file descriptor or -1 upon error
+ */
+int fuse_open_channel(const char *mountpoint, const char *options);
+
+#endif /* FUSE_H_ */
diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h
new file mode 100644
index 0000000000..686c42c0a5
--- /dev/null
+++ b/tools/virtiofsd/fuse_common.h
@@ -0,0 +1,816 @@
+/*
+ * FUSE: Filesystem in Userspace
+ * Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
+ *
+ * This program can be distributed under the terms of the GNU LGPLv2.
+ * See the file COPYING.LIB.
+ */
+
+/** @file */
+
+#if !defined(FUSE_H_) && !defined(FUSE_LOWLEVEL_H_)
+#error \
+    "Never include <fuse_common.h> directly; use <fuse.h> or <fuse_lowlevel.h> instead."
+#endif
+
+#ifndef FUSE_COMMON_H_
+#define FUSE_COMMON_H_
+
+#include "fuse_log.h"
+#include "fuse_opt.h"
+#include <stdint.h>
+#include <sys/types.h>
+
+/** Major version of FUSE library interface */
+#define FUSE_MAJOR_VERSION 3
+
+/** Minor version of FUSE library interface */
+#define FUSE_MINOR_VERSION 2
+
+#define FUSE_MAKE_VERSION(maj, min) ((maj) * 10 + (min))
+#define FUSE_VERSION FUSE_MAKE_VERSION(FUSE_MAJOR_VERSION, FUSE_MINOR_VERSION)
+
+/**
+ * Information about an open file.
+ *
+ * File Handles are created by the open, opendir, and create methods and closed
+ * by the release and releasedir methods.  Multiple file handles may be
+ * concurrently open for the same file.  Generally, a client will create one
+ * file handle per file descriptor, though in some cases multiple file
+ * descriptors can share a single file handle.
+ */
+struct fuse_file_info {
+    /** Open flags. Available in open() and release() */
+    int flags;
+
+    /*
+     * In case of a write operation indicates if this was caused
+     * by a delayed write from the page cache. If so, then the
+     * context's pid, uid, and gid fields will not be valid, and
+     * the *fh* value may not match the *fh* value that would
+     * have been sent with the corresponding individual write
+     * requests if write caching had been disabled.
+     */
+    unsigned int writepage:1;
+
+    /** Can be filled in by open, to use direct I/O on this file. */
+    unsigned int direct_io:1;
+
+    /*
+     *  Can be filled in by open. It signals the kernel that any
+     *  currently cached file data (ie., data that the filesystem
+     *  provided the last time the file was open) need not be
+     *  invalidated. Has no effect when set in other contexts (in
+     *  particular it does nothing when set by opendir()).
+     */
+    unsigned int keep_cache:1;
+
+    /*
+     *  Indicates a flush operation.  Set in flush operation, also
+     *  maybe set in highlevel lock operation and lowlevel release
+     *  operation.
+     */
+    unsigned int flush:1;
+
+    /*
+     *  Can be filled in by open, to indicate that the file is not
+     *  seekable.
+     */
+    unsigned int nonseekable:1;
+
+    /*
+     * Indicates that flock locks for this file should be
+     * released.  If set, lock_owner shall contain a valid value.
+     * May only be set in ->release().
+     */
+    unsigned int flock_release:1;
+
+    /*
+     *  Can be filled in by opendir. It signals the kernel to
+     *  enable caching of entries returned by readdir().  Has no
+     *  effect when set in other contexts (in particular it does
+     *  nothing when set by open()).
+     */
+    unsigned int cache_readdir:1;
+
+    /* Indicates that suid/sgid bits should be removed upon write */
+    unsigned int kill_priv:1;
+
+
+    /** Padding.  Reserved for future use*/
+    unsigned int padding:24;
+    unsigned int padding2:32;
+
+    /*
+     *  File handle id.  May be filled in by filesystem in create,
+     * open, and opendir().  Available in most other file operations on the
+     * same file handle.
+     */
+    uint64_t fh;
+
+    /** Lock owner id.  Available in locking operations and flush */
+    uint64_t lock_owner;
+
+    /*
+     * Requested poll events.  Available in ->poll.  Only set on kernels
+     * which support it.  If unsupported, this field is set to zero.
+     */
+    uint32_t poll_events;
+};
+
+/*
+ * Capability bits for 'fuse_conn_info.capable' and 'fuse_conn_info.want'
+ */
+
+/**
+ * Indicates that the filesystem supports asynchronous read requests.
+ *
+ * If this capability is not requested/available, the kernel will
+ * ensure that there is at most one pending read request per
+ * file-handle at any time, and will attempt to order read requests by
+ * increasing offset.
+ *
+ * This feature is enabled by default when supported by the kernel.
+ */
+#define FUSE_CAP_ASYNC_READ (1 << 0)
+
+/**
+ * Indicates that the filesystem supports "remote" locking.
+ *
+ * This feature is enabled by default when supported by the kernel,
+ * and if getlk() and setlk() handlers are implemented.
+ */
+#define FUSE_CAP_POSIX_LOCKS (1 << 1)
+
+/**
+ * Indicates that the filesystem supports the O_TRUNC open flag.  If
+ * disabled, and an application specifies O_TRUNC, fuse first calls
+ * truncate() and then open() with O_TRUNC filtered out.
+ *
+ * This feature is enabled by default when supported by the kernel.
+ */
+#define FUSE_CAP_ATOMIC_O_TRUNC (1 << 3)
+
+/**
+ * Indicates that the filesystem supports lookups of "." and "..".
+ *
+ * This feature is disabled by default.
+ */
+#define FUSE_CAP_EXPORT_SUPPORT (1 << 4)
+
+/**
+ * Indicates that the kernel should not apply the umask to the
+ * file mode on create operations.
+ *
+ * This feature is disabled by default.
+ */
+#define FUSE_CAP_DONT_MASK (1 << 6)
+
+/**
+ * Indicates that libfuse should try to use splice() when writing to
+ * the fuse device. This may improve performance.
+ *
+ * This feature is disabled by default.
+ */
+#define FUSE_CAP_SPLICE_WRITE (1 << 7)
+
+/**
+ * Indicates that libfuse should try to move pages instead of copying when
+ * writing to / reading from the fuse device. This may improve performance.
+ *
+ * This feature is disabled by default.
+ */
+#define FUSE_CAP_SPLICE_MOVE (1 << 8)
+
+/**
+ * Indicates that libfuse should try to use splice() when reading from
+ * the fuse device. This may improve performance.
+ *
+ * This feature is enabled by default when supported by the kernel and
+ * if the filesystem implements a write_buf() handler.
+ */
+#define FUSE_CAP_SPLICE_READ (1 << 9)
+
+/**
+ * If set, the calls to flock(2) will be emulated using POSIX locks and must
+ * then be handled by the filesystem's setlock() handler.
+ *
+ * If not set, flock(2) calls will be handled by the FUSE kernel module
+ * internally (so any access that does not go through the kernel cannot be taken
+ * into account).
+ *
+ * This feature is enabled by default when supported by the kernel and
+ * if the filesystem implements a flock() handler.
+ */
+#define FUSE_CAP_FLOCK_LOCKS (1 << 10)
+
+/**
+ * Indicates that the filesystem supports ioctl's on directories.
+ *
+ * This feature is enabled by default when supported by the kernel.
+ */
+#define FUSE_CAP_IOCTL_DIR (1 << 11)
+
+/**
+ * Traditionally, while a file is open the FUSE kernel module only
+ * asks the filesystem for an update of the file's attributes when a
+ * client attempts to read beyond EOF. This is unsuitable for
+ * e.g. network filesystems, where the file contents may change
+ * without the kernel knowing about it.
+ *
+ * If this flag is set, FUSE will check the validity of the attributes
+ * on every read. If the attributes are no longer valid (i.e., if the
+ * *attr_timeout* passed to fuse_reply_attr() or set in `struct
+ * fuse_entry_param` has passed), it will first issue a `getattr`
+ * request. If the new mtime differs from the previous value, any
+ * cached file *contents* will be invalidated as well.
+ *
+ * This flag should always be set when available. If all file changes
+ * go through the kernel, *attr_timeout* should be set to a very large
+ * number to avoid unnecessary getattr() calls.
+ *
+ * This feature is enabled by default when supported by the kernel.
+ */
+#define FUSE_CAP_AUTO_INVAL_DATA (1 << 12)
+
+/**
+ * Indicates that the filesystem supports readdirplus.
+ *
+ * This feature is enabled by default when supported by the kernel and if the
+ * filesystem implements a readdirplus() handler.
+ */
+#define FUSE_CAP_READDIRPLUS (1 << 13)
+
+/**
+ * Indicates that the filesystem supports adaptive readdirplus.
+ *
+ * If FUSE_CAP_READDIRPLUS is not set, this flag has no effect.
+ *
+ * If FUSE_CAP_READDIRPLUS is set and this flag is not set, the kernel
+ * will always issue readdirplus() requests to retrieve directory
+ * contents.
+ *
+ * If FUSE_CAP_READDIRPLUS is set and this flag is set, the kernel
+ * will issue both readdir() and readdirplus() requests, depending on
+ * how much information is expected to be required.
+ *
+ * As of Linux 4.20, the algorithm is as follows: when userspace
+ * starts to read directory entries, issue a READDIRPLUS request to
+ * the filesystem. If any entry attributes have been looked up by the
+ * time userspace requests the next batch of entries continue with
+ * READDIRPLUS, otherwise switch to plain READDIR.  This will reasult
+ * in eg plain "ls" triggering READDIRPLUS first then READDIR after
+ * that because it doesn't do lookups.  "ls -l" should result in all
+ * READDIRPLUS, except if dentries are already cached.
+ *
+ * This feature is enabled by default when supported by the kernel and
+ * if the filesystem implements both a readdirplus() and a readdir()
+ * handler.
+ */
+#define FUSE_CAP_READDIRPLUS_AUTO (1 << 14)
+
+/**
+ * Indicates that the filesystem supports asynchronous direct I/O submission.
+ *
+ * If this capability is not requested/available, the kernel will ensure that
+ * there is at most one pending read and one pending write request per direct
+ * I/O file-handle at any time.
+ *
+ * This feature is enabled by default when supported by the kernel.
+ */
+#define FUSE_CAP_ASYNC_DIO (1 << 15)
+
+/**
+ * Indicates that writeback caching should be enabled. This means that
+ * individual write request may be buffered and merged in the kernel
+ * before they are send to the filesystem.
+ *
+ * This feature is disabled by default.
+ */
+#define FUSE_CAP_WRITEBACK_CACHE (1 << 16)
+
+/**
+ * Indicates support for zero-message opens. If this flag is set in
+ * the `capable` field of the `fuse_conn_info` structure, then the
+ * filesystem may return `ENOSYS` from the open() handler to indicate
+ * success. Further attempts to open files will be handled in the
+ * kernel. (If this flag is not set, returning ENOSYS will be treated
+ * as an error and signaled to the caller).
+ *
+ * Setting (or unsetting) this flag in the `want` field has *no
+ * effect*.
+ */
+#define FUSE_CAP_NO_OPEN_SUPPORT (1 << 17)
+
+/**
+ * Indicates support for parallel directory operations. If this flag
+ * is unset, the FUSE kernel module will ensure that lookup() and
+ * readdir() requests are never issued concurrently for the same
+ * directory.
+ *
+ * This feature is enabled by default when supported by the kernel.
+ */
+#define FUSE_CAP_PARALLEL_DIROPS (1 << 18)
+
+/**
+ * Indicates support for POSIX ACLs.
+ *
+ * If this feature is enabled, the kernel will cache and have
+ * responsibility for enforcing ACLs. ACL will be stored as xattrs and
+ * passed to userspace, which is responsible for updating the ACLs in
+ * the filesystem, keeping the file mode in sync with the ACL, and
+ * ensuring inheritance of default ACLs when new filesystem nodes are
+ * created. Note that this requires that the file system is able to
+ * parse and interpret the xattr representation of ACLs.
+ *
+ * Enabling this feature implicitly turns on the
+ * ``default_permissions`` mount option (even if it was not passed to
+ * mount(2)).
+ *
+ * This feature is disabled by default.
+ */
+#define FUSE_CAP_POSIX_ACL (1 << 19)
+
+/**
+ * Indicates that the filesystem is responsible for unsetting
+ * setuid and setgid bits when a file is written, truncated, or
+ * its owner is changed.
+ *
+ * This feature is enabled by default when supported by the kernel.
+ */
+#define FUSE_CAP_HANDLE_KILLPRIV (1 << 20)
+
+/**
+ * Indicates support for zero-message opendirs. If this flag is set in
+ * the `capable` field of the `fuse_conn_info` structure, then the filesystem
+ * may return `ENOSYS` from the opendir() handler to indicate success. Further
+ * opendir and releasedir messages will be handled in the kernel. (If this
+ * flag is not set, returning ENOSYS will be treated as an error and signalled
+ * to the caller.)
+ *
+ * Setting (or unsetting) this flag in the `want` field has *no effect*.
+ */
+#define FUSE_CAP_NO_OPENDIR_SUPPORT (1 << 24)
+
+/**
+ * Ioctl flags
+ *
+ * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine
+ * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed
+ * FUSE_IOCTL_RETRY: retry with new iovecs
+ * FUSE_IOCTL_DIR: is a directory
+ *
+ * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs
+ */
+#define FUSE_IOCTL_COMPAT (1 << 0)
+#define FUSE_IOCTL_UNRESTRICTED (1 << 1)
+#define FUSE_IOCTL_RETRY (1 << 2)
+#define FUSE_IOCTL_DIR (1 << 4)
+
+#define FUSE_IOCTL_MAX_IOV 256
+
+/**
+ * Connection information, passed to the ->init() method
+ *
+ * Some of the elements are read-write, these can be changed to
+ * indicate the value requested by the filesystem.  The requested
+ * value must usually be smaller than the indicated value.
+ */
+struct fuse_conn_info {
+    /**
+     * Major version of the protocol (read-only)
+     */
+    unsigned proto_major;
+
+    /**
+     * Minor version of the protocol (read-only)
+     */
+    unsigned proto_minor;
+
+    /**
+     * Maximum size of the write buffer
+     */
+    unsigned max_write;
+
+    /**
+     * Maximum size of read requests. A value of zero indicates no
+     * limit. However, even if the filesystem does not specify a
+     * limit, the maximum size of read requests will still be
+     * limited by the kernel.
+     *
+     * NOTE: For the time being, the maximum size of read requests
+     * must be set both here *and* passed to fuse_session_new()
+     * using the ``-o max_read=<n>`` mount option. At some point
+     * in the future, specifying the mount option will no longer
+     * be necessary.
+     */
+    unsigned max_read;
+
+    /**
+     * Maximum readahead
+     */
+    unsigned max_readahead;
+
+    /**
+     * Capability flags that the kernel supports (read-only)
+     */
+    unsigned capable;
+
+    /**
+     * Capability flags that the filesystem wants to enable.
+     *
+     * libfuse attempts to initialize this field with
+     * reasonable default values before calling the init() handler.
+     */
+    unsigned want;
+
+    /**
+     * Maximum number of pending "background" requests. A
+     * background request is any type of request for which the
+     * total number is not limited by other means. As of kernel
+     * 4.8, only two types of requests fall into this category:
+     *
+     *   1. Read-ahead requests
+     *   2. Asynchronous direct I/O requests
+     *
+     * Read-ahead requests are generated (if max_readahead is
+     * non-zero) by the kernel to preemptively fill its caches
+     * when it anticipates that userspace will soon read more
+     * data.
+     *
+     * Asynchronous direct I/O requests are generated if
+     * FUSE_CAP_ASYNC_DIO is enabled and userspace submits a large
+     * direct I/O request. In this case the kernel will internally
+     * split it up into multiple smaller requests and submit them
+     * to the filesystem concurrently.
+     *
+     * Note that the following requests are *not* background
+     * requests: writeback requests (limited by the kernel's
+     * flusher algorithm), regular (i.e., synchronous and
+     * buffered) userspace read/write requests (limited to one per
+     * thread), asynchronous read requests (Linux's io_submit(2)
+     * call actually blocks, so these are also limited to one per
+     * thread).
+     */
+    unsigned max_background;
+
+    /**
+     * Kernel congestion threshold parameter. If the number of pending
+     * background requests exceeds this number, the FUSE kernel module will
+     * mark the filesystem as "congested". This instructs the kernel to
+     * expect that queued requests will take some time to complete, and to
+     * adjust its algorithms accordingly (e.g. by putting a waiting thread
+     * to sleep instead of using a busy-loop).
+     */
+    unsigned congestion_threshold;
+
+    /**
+     * When FUSE_CAP_WRITEBACK_CACHE is enabled, the kernel is responsible
+     * for updating mtime and ctime when write requests are received. The
+     * updated values are passed to the filesystem with setattr() requests.
+     * However, if the filesystem does not support the full resolution of
+     * the kernel timestamps (nanoseconds), the mtime and ctime values used
+     * by kernel and filesystem will differ (and result in an apparent
+     * change of times after a cache flush).
+     *
+     * To prevent this problem, this variable can be used to inform the
+     * kernel about the timestamp granularity supported by the file-system.
+     * The value should be power of 10.  The default is 1, i.e. full
+     * nano-second resolution. Filesystems supporting only second resolution
+     * should set this to 1000000000.
+     */
+    unsigned time_gran;
+
+    /**
+     * For future use.
+     */
+    unsigned reserved[22];
+};
+
+struct fuse_session;
+struct fuse_pollhandle;
+struct fuse_conn_info_opts;
+
+/**
+ * This function parses several command-line options that can be used
+ * to override elements of struct fuse_conn_info. The pointer returned
+ * by this function should be passed to the
+ * fuse_apply_conn_info_opts() method by the file system's init()
+ * handler.
+ *
+ * Before using this function, think twice if you really want these
+ * parameters to be adjustable from the command line. In most cases,
+ * they should be determined by the file system internally.
+ *
+ * The following options are recognized:
+ *
+ *   -o max_write=N         sets conn->max_write
+ *   -o max_readahead=N     sets conn->max_readahead
+ *   -o max_background=N    sets conn->max_background
+ *   -o congestion_threshold=N  sets conn->congestion_threshold
+ *   -o async_read          sets FUSE_CAP_ASYNC_READ in conn->want
+ *   -o sync_read           unsets FUSE_CAP_ASYNC_READ in conn->want
+ *   -o atomic_o_trunc      sets FUSE_CAP_ATOMIC_O_TRUNC in conn->want
+ *   -o no_remote_lock      Equivalent to -o
+ *no_remote_flock,no_remote_posix_lock -o no_remote_flock     Unsets
+ *FUSE_CAP_FLOCK_LOCKS in conn->want -o no_remote_posix_lock  Unsets
+ *FUSE_CAP_POSIX_LOCKS in conn->want -o [no_]splice_write     (un-)sets
+ *FUSE_CAP_SPLICE_WRITE in conn->want -o [no_]splice_move      (un-)sets
+ *FUSE_CAP_SPLICE_MOVE in conn->want -o [no_]splice_read      (un-)sets
+ *FUSE_CAP_SPLICE_READ in conn->want -o [no_]auto_inval_data  (un-)sets
+ *FUSE_CAP_AUTO_INVAL_DATA in conn->want -o readdirplus=no        unsets
+ *FUSE_CAP_READDIRPLUS in conn->want -o readdirplus=yes       sets
+ *FUSE_CAP_READDIRPLUS and unsets FUSE_CAP_READDIRPLUS_AUTO in conn->want -o
+ *readdirplus=auto      sets FUSE_CAP_READDIRPLUS and FUSE_CAP_READDIRPLUS_AUTO
+ *in conn->want -o [no_]async_dio        (un-)sets FUSE_CAP_ASYNC_DIO in
+ *conn->want -o [no_]writeback_cache  (un-)sets FUSE_CAP_WRITEBACK_CACHE in
+ *conn->want -o time_gran=N           sets conn->time_gran
+ *
+ * Known options will be removed from *args*, unknown options will be
+ * passed through unchanged.
+ *
+ * @param args argument vector (input+output)
+ * @return parsed options
+ **/
+struct fuse_conn_info_opts *fuse_parse_conn_info_opts(struct fuse_args *args);
+
+/**
+ * This function applies the (parsed) parameters in *opts* to the
+ * *conn* pointer. It may modify the following fields: wants,
+ * max_write, max_readahead, congestion_threshold, max_background,
+ * time_gran. A field is only set (or unset) if the corresponding
+ * option has been explicitly set.
+ */
+void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts,
+                               struct fuse_conn_info *conn);
+
+/**
+ * Go into the background
+ *
+ * @param foreground if true, stay in the foreground
+ * @return 0 on success, -1 on failure
+ */
+int fuse_daemonize(int foreground);
+
+/**
+ * Get the version of the library
+ *
+ * @return the version
+ */
+int fuse_version(void);
+
+/**
+ * Get the full package version string of the library
+ *
+ * @return the package version
+ */
+const char *fuse_pkgversion(void);
+
+/**
+ * Destroy poll handle
+ *
+ * @param ph the poll handle
+ */
+void fuse_pollhandle_destroy(struct fuse_pollhandle *ph);
+
+/*
+ * Data buffer
+ */
+
+/**
+ * Buffer flags
+ */
+enum fuse_buf_flags {
+    /**
+     * Buffer contains a file descriptor
+     *
+     * If this flag is set, the .fd field is valid, otherwise the
+     * .mem fields is valid.
+     */
+    FUSE_BUF_IS_FD = (1 << 1),
+
+    /**
+     * Seek on the file descriptor
+     *
+     * If this flag is set then the .pos field is valid and is
+     * used to seek to the given offset before performing
+     * operation on file descriptor.
+     */
+    FUSE_BUF_FD_SEEK = (1 << 2),
+
+    /**
+     * Retry operation on file descriptor
+     *
+     * If this flag is set then retry operation on file descriptor
+     * until .size bytes have been copied or an error or EOF is
+     * detected.
+     */
+    FUSE_BUF_FD_RETRY = (1 << 3),
+};
+
+/**
+ * Single data buffer
+ *
+ * Generic data buffer for I/O, extended attributes, etc...  Data may
+ * be supplied as a memory pointer or as a file descriptor
+ */
+struct fuse_buf {
+    /**
+     * Size of data in bytes
+     */
+    size_t size;
+
+    /**
+     * Buffer flags
+     */
+    enum fuse_buf_flags flags;
+
+    /**
+     * Memory pointer
+     *
+     * Used unless FUSE_BUF_IS_FD flag is set.
+     */
+    void *mem;
+
+    /**
+     * File descriptor
+     *
+     * Used if FUSE_BUF_IS_FD flag is set.
+     */
+    int fd;
+
+    /**
+     * File position
+     *
+     * Used if FUSE_BUF_FD_SEEK flag is set.
+     */
+    off_t pos;
+};
+
+/**
+ * Data buffer vector
+ *
+ * An array of data buffers, each containing a memory pointer or a
+ * file descriptor.
+ *
+ * Allocate dynamically to add more than one buffer.
+ */
+struct fuse_bufvec {
+    /**
+     * Number of buffers in the array
+     */
+    size_t count;
+
+    /**
+     * Index of current buffer within the array
+     */
+    size_t idx;
+
+    /**
+     * Current offset within the current buffer
+     */
+    size_t off;
+
+    /**
+     * Array of buffers
+     */
+    struct fuse_buf buf[1];
+};
+
+/* Initialize bufvec with a single buffer of given size */
+#define FUSE_BUFVEC_INIT(size__)                                      \
+    ((struct fuse_bufvec){ /* .count= */ 1,                           \
+                           /* .idx =  */ 0,                           \
+                           /* .off =  */ 0, /* .buf =  */             \
+                           { /* [0] = */ {                            \
+                               /* .size =  */ (size__),               \
+                               /* .flags = */ (enum fuse_buf_flags)0, \
+                               /* .mem =   */ NULL,                   \
+                               /* .fd =    */ -1,                     \
+                               /* .pos =   */ 0,                      \
+                           } } })
+
+/**
+ * Get total size of data in a fuse buffer vector
+ *
+ * @param bufv buffer vector
+ * @return size of data
+ */
+size_t fuse_buf_size(const struct fuse_bufvec *bufv);
+
+/**
+ * Copy data from one buffer vector to another
+ *
+ * @param dst destination buffer vector
+ * @param src source buffer vector
+ * @return actual number of bytes copied or -errno on error
+ */
+ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src);
+
+/**
+ * Memory buffer iterator
+ *
+ */
+struct fuse_mbuf_iter {
+    /**
+     * Data pointer
+     */
+    void *mem;
+
+    /**
+     * Total length, in bytes
+     */
+    size_t size;
+
+    /**
+     * Offset from start of buffer
+     */
+    size_t pos;
+};
+
+/* Initialize memory buffer iterator from a fuse_buf */
+#define FUSE_MBUF_ITER_INIT(fbuf) \
+    ((struct fuse_mbuf_iter){     \
+        .mem = fbuf->mem,         \
+        .size = fbuf->size,       \
+        .pos = 0,                 \
+    })
+
+/**
+ * Consume bytes from a memory buffer iterator
+ *
+ * @param iter memory buffer iterator
+ * @param len number of bytes to consume
+ * @return pointer to start of consumed bytes or
+ *         NULL if advancing beyond end of buffer
+ */
+void *fuse_mbuf_iter_advance(struct fuse_mbuf_iter *iter, size_t len);
+
+/**
+ * Consume a NUL-terminated string from a memory buffer iterator
+ *
+ * @param iter memory buffer iterator
+ * @return pointer to the string or
+ *         NULL if advancing beyond end of buffer or there is no NUL-terminator
+ */
+const char *fuse_mbuf_iter_advance_str(struct fuse_mbuf_iter *iter);
+
+/*
+ * Signal handling
+ */
+/**
+ * Exit session on HUP, TERM and INT signals and ignore PIPE signal
+ *
+ * Stores session in a global variable. May only be called once per
+ * process until fuse_remove_signal_handlers() is called.
+ *
+ * Once either of the POSIX signals arrives, the signal handler calls
+ * fuse_session_exit().
+ *
+ * @param se the session to exit
+ * @return 0 on success, -1 on failure
+ *
+ * See also:
+ * fuse_remove_signal_handlers()
+ */
+int fuse_set_signal_handlers(struct fuse_session *se);
+
+/**
+ * Restore default signal handlers
+ *
+ * Resets global session.  After this fuse_set_signal_handlers() may
+ * be called again.
+ *
+ * @param se the same session as given in fuse_set_signal_handlers()
+ *
+ * See also:
+ * fuse_set_signal_handlers()
+ */
+void fuse_remove_signal_handlers(struct fuse_session *se);
+
+/*
+ * Compatibility stuff
+ */
+
+#if !defined(FUSE_USE_VERSION) || FUSE_USE_VERSION < 30
+#error only API version 30 or greater is supported
+#endif
+
+
+/*
+ * This interface uses 64 bit off_t.
+ *
+ * On 32bit systems please add -D_FILE_OFFSET_BITS=64 to your compile flags!
+ */
+
+#if defined(__GNUC__) &&                                      \
+    (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && \
+    !defined __cplusplus
+_Static_assert(sizeof(off_t) == 8, "fuse: off_t must be 64bit");
+#else
+struct _fuse_off_t_must_be_64bit_dummy_struct {
+    unsigned _fuse_off_t_must_be_64bit:((sizeof(off_t) == 8) ? 1 : -1);
+};
+#endif
+
+#endif /* FUSE_COMMON_H_ */
diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h
new file mode 100644
index 0000000000..4e47e5880d
--- /dev/null
+++ b/tools/virtiofsd/fuse_i.h
@@ -0,0 +1,115 @@
+/*
+ * FUSE: Filesystem in Userspace
+ * Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
+ *
+ * This program can be distributed under the terms of the GNU LGPLv2.
+ * See the file COPYING.LIB
+ */
+
+#ifndef FUSE_I_H
+#define FUSE_I_H
+
+#define FUSE_USE_VERSION 31
+#include "fuse.h"
+#include "fuse_lowlevel.h"
+
+struct fv_VuDev;
+struct fv_QueueInfo;
+
+struct fuse_req {
+    struct fuse_session *se;
+    uint64_t unique;
+    int ctr;
+    pthread_mutex_t lock;
+    struct fuse_ctx ctx;
+    struct fuse_chan *ch;
+    int interrupted;
+    unsigned int ioctl_64bit:1;
+    union {
+        struct {
+            uint64_t unique;
+        } i;
+        struct {
+            fuse_interrupt_func_t func;
+            void *data;
+        } ni;
+    } u;
+    struct fuse_req *next;
+    struct fuse_req *prev;
+};
+
+struct fuse_notify_req {
+    uint64_t unique;
+    void (*reply)(struct fuse_notify_req *, fuse_req_t, fuse_ino_t,
+                  const void *, const struct fuse_buf *);
+    struct fuse_notify_req *next;
+    struct fuse_notify_req *prev;
+};
+
+struct fuse_session {
+    char *mountpoint;
+    volatile int exited;
+    int fd;
+    int debug;
+    int deny_others;
+    struct fuse_lowlevel_ops op;
+    int got_init;
+    struct cuse_data *cuse_data;
+    void *userdata;
+    uid_t owner;
+    struct fuse_conn_info conn;
+    struct fuse_req list;
+    struct fuse_req interrupts;
+    pthread_mutex_t lock;
+    pthread_rwlock_t init_rwlock;
+    int got_destroy;
+    int broken_splice_nonblock;
+    uint64_t notify_ctr;
+    struct fuse_notify_req notify_list;
+    size_t bufsize;
+    int error;
+    char *vu_socket_path;
+    int   vu_listen_fd;
+    int   vu_socketfd;
+    struct fv_VuDev *virtio_dev;
+    int thread_pool_size;
+};
+
+struct fuse_chan {
+    pthread_mutex_t lock;
+    int ctr;
+    int fd;
+    struct fv_QueueInfo *qi;
+};
+
+/**
+ * Filesystem module
+ *
+ * Filesystem modules are registered with the FUSE_REGISTER_MODULE()
+ * macro.
+ *
+ */
+struct fuse_module {
+    char *name;
+    fuse_module_factory_t factory;
+    struct fuse_module *next;
+    struct fusemod_so *so;
+    int ctr;
+};
+
+int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov,
+                               int count);
+void fuse_free_req(fuse_req_t req);
+
+void fuse_session_process_buf_int(struct fuse_session *se,
+                                  struct fuse_bufvec *bufv,
+                                  struct fuse_chan *ch);
+
+
+#define FUSE_MAX_MAX_PAGES 256
+#define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32
+
+/* room needed in buffer to accommodate header */
+#define FUSE_BUFFER_HEADER_SIZE 0x1000
+
+#endif
diff --git a/tools/virtiofsd/fuse_log.c b/tools/virtiofsd/fuse_log.c
new file mode 100644
index 0000000000..c301ff6da1
--- /dev/null
+++ b/tools/virtiofsd/fuse_log.c
@@ -0,0 +1,41 @@
+/*
+ * FUSE: Filesystem in Userspace
+ * Copyright (C) 2019  Red Hat, Inc.
+ *
+ * Logging API.
+ *
+ * This program can be distributed under the terms of the GNU LGPLv2.
+ * See the file COPYING.LIB
+ */
+
+#include "qemu/osdep.h"
+#include "fuse_log.h"
+
+#include <stdarg.h>
+#include <stdio.h>
+
+static void default_log_func(__attribute__((unused)) enum fuse_log_level level,
+                             const char *fmt, va_list ap)
+{
+    vfprintf(stderr, fmt, ap);
+}
+
+static fuse_log_func_t log_func = default_log_func;
+
+void fuse_set_log_func(fuse_log_func_t func)
+{
+    if (!func) {
+        func = default_log_func;
+    }
+
+    log_func = func;
+}
+
+void fuse_log(enum fuse_log_level level, const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    log_func(level, fmt, ap);
+    va_end(ap);
+}
diff --git a/tools/virtiofsd/fuse_log.h b/tools/virtiofsd/fuse_log.h
new file mode 100644
index 0000000000..bf6c11ff11
--- /dev/null
+++ b/tools/virtiofsd/fuse_log.h
@@ -0,0 +1,74 @@
+/*
+ * FUSE: Filesystem in Userspace
+ * Copyright (C) 2019  Red Hat, Inc.
+ *
+ * This program can be distributed under the terms of the GNU LGPLv2.
+ * See the file COPYING.LIB.
+ */
+
+#ifndef FUSE_LOG_H_
+#define FUSE_LOG_H_
+
+/** @file
+ *
+ * This file defines the logging interface of FUSE
+ */
+
+#include <stdarg.h>
+
+/**
+ * Log severity level
+ *
+ * These levels correspond to syslog(2) log levels since they are widely used.
+ */
+enum fuse_log_level {
+    FUSE_LOG_EMERG,
+    FUSE_LOG_ALERT,
+    FUSE_LOG_CRIT,
+    FUSE_LOG_ERR,
+    FUSE_LOG_WARNING,
+    FUSE_LOG_NOTICE,
+    FUSE_LOG_INFO,
+    FUSE_LOG_DEBUG
+};
+
+/**
+ * Log message handler function.
+ *
+ * This function must be thread-safe.  It may be called from any libfuse
+ * function, including fuse_parse_cmdline() and other functions invoked before
+ * a FUSE filesystem is created.
+ *
+ * Install a custom log message handler function using fuse_set_log_func().
+ *
+ * @param level log severity level
+ * @param fmt sprintf-style format string including newline
+ * @param ap format string arguments
+ */
+typedef void (*fuse_log_func_t)(enum fuse_log_level level, const char *fmt,
+                                va_list ap);
+
+/**
+ * Install a custom log handler function.
+ *
+ * Log messages are emitted by libfuse functions to report errors and debug
+ * information.  Messages are printed to stderr by default but this can be
+ * overridden by installing a custom log message handler function.
+ *
+ * The log message handler function is global and affects all FUSE filesystems
+ * created within this process.
+ *
+ * @param func a custom log message handler function or NULL to revert to
+ *             the default
+ */
+void fuse_set_log_func(fuse_log_func_t func);
+
+/**
+ * Emit a log message
+ *
+ * @param level severity level (FUSE_LOG_ERR, FUSE_LOG_DEBUG, etc)
+ * @param fmt sprintf-style format string including newline
+ */
+void fuse_log(enum fuse_log_level level, const char *fmt, ...);
+
+#endif /* FUSE_LOG_H_ */
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
new file mode 100644
index 0000000000..de2e2e0c65
--- /dev/null
+++ b/tools/virtiofsd/fuse_lowlevel.c
@@ -0,0 +1,2761 @@
+/*
+ * FUSE: Filesystem in Userspace
+ * Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
+ *
+ * Implementation of (most of) the low-level FUSE API. The session loop
+ * functions are implemented in separate files.
+ *
+ * This program can be distributed under the terms of the GNU LGPLv2.
+ * See the file COPYING.LIB
+ */
+
+#include "qemu/osdep.h"
+#include "fuse_i.h"
+#include "standard-headers/linux/fuse.h"
+#include "fuse_misc.h"
+#include "fuse_opt.h"
+#include "fuse_virtio.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <glib.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/file.h>
+#include <unistd.h>
+
+#define THREAD_POOL_SIZE 64
+
+#define OFFSET_MAX 0x7fffffffffffffffLL
+
+struct fuse_pollhandle {
+    uint64_t kh;
+    struct fuse_session *se;
+};
+
+static size_t pagesize;
+
+static __attribute__((constructor)) void fuse_ll_init_pagesize(void)
+{
+    pagesize = getpagesize();
+}
+
+static void convert_stat(const struct stat *stbuf, struct fuse_attr *attr)
+{
+    *attr = (struct fuse_attr){
+        .ino = stbuf->st_ino,
+        .mode = stbuf->st_mode,
+        .nlink = stbuf->st_nlink,
+        .uid = stbuf->st_uid,
+        .gid = stbuf->st_gid,
+        .rdev = stbuf->st_rdev,
+        .size = stbuf->st_size,
+        .blksize = stbuf->st_blksize,
+        .blocks = stbuf->st_blocks,
+        .atime = stbuf->st_atime,
+        .mtime = stbuf->st_mtime,
+        .ctime = stbuf->st_ctime,
+        .atimensec = ST_ATIM_NSEC(stbuf),
+        .mtimensec = ST_MTIM_NSEC(stbuf),
+        .ctimensec = ST_CTIM_NSEC(stbuf),
+    };
+}
+
+static void convert_attr(const struct fuse_setattr_in *attr, struct stat *stbuf)
+{
+    stbuf->st_mode = attr->mode;
+    stbuf->st_uid = attr->uid;
+    stbuf->st_gid = attr->gid;
+    stbuf->st_size = attr->size;
+    stbuf->st_atime = attr->atime;
+    stbuf->st_mtime = attr->mtime;
+    stbuf->st_ctime = attr->ctime;
+    ST_ATIM_NSEC_SET(stbuf, attr->atimensec);
+    ST_MTIM_NSEC_SET(stbuf, attr->mtimensec);
+    ST_CTIM_NSEC_SET(stbuf, attr->ctimensec);
+}
+
+static size_t iov_length(const struct iovec *iov, size_t count)
+{
+    size_t seg;
+    size_t ret = 0;
+
+    for (seg = 0; seg < count; seg++) {
+        ret += iov[seg].iov_len;
+    }
+    return ret;
+}
+
+static void list_init_req(struct fuse_req *req)
+{
+    req->next = req;
+    req->prev = req;
+}
+
+static void list_del_req(struct fuse_req *req)
+{
+    struct fuse_req *prev = req->prev;
+    struct fuse_req *next = req->next;
+    prev->next = next;
+    next->prev = prev;
+}
+
+static void list_add_req(struct fuse_req *req, struct fuse_req *next)
+{
+    struct fuse_req *prev = next->prev;
+    req->next = next;
+    req->prev = prev;
+    prev->next = req;
+    next->prev = req;
+}
+
+static void destroy_req(fuse_req_t req)
+{
+    pthread_mutex_destroy(&req->lock);
+    free(req);
+}
+
+void fuse_free_req(fuse_req_t req)
+{
+    int ctr;
+    struct fuse_session *se = req->se;
+
+    pthread_mutex_lock(&se->lock);
+    req->u.ni.func = NULL;
+    req->u.ni.data = NULL;
+    list_del_req(req);
+    ctr = --req->ctr;
+    req->ch = NULL;
+    pthread_mutex_unlock(&se->lock);
+    if (!ctr) {
+        destroy_req(req);
+    }
+}
+
+static struct fuse_req *fuse_ll_alloc_req(struct fuse_session *se)
+{
+    struct fuse_req *req;
+
+    req = (struct fuse_req *)calloc(1, sizeof(struct fuse_req));
+    if (req == NULL) {
+        fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate request\n");
+    } else {
+        req->se = se;
+        req->ctr = 1;
+        list_init_req(req);
+        fuse_mutex_init(&req->lock);
+    }
+
+    return req;
+}
+
+/* Send data. If *ch* is NULL, send via session master fd */
+static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch,
+                         struct iovec *iov, int count)
+{
+    struct fuse_out_header *out = iov[0].iov_base;
+
+    out->len = iov_length(iov, count);
+    if (out->unique == 0) {
+        fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", out->error,
+                 out->len);
+    } else if (out->error) {
+        fuse_log(FUSE_LOG_DEBUG,
+                 "   unique: %llu, error: %i (%s), outsize: %i\n",
+                 (unsigned long long)out->unique, out->error,
+                 strerror(-out->error), out->len);
+    } else {
+        fuse_log(FUSE_LOG_DEBUG, "   unique: %llu, success, outsize: %i\n",
+                 (unsigned long long)out->unique, out->len);
+    }
+
+    if (fuse_lowlevel_is_virtio(se)) {
+        return virtio_send_msg(se, ch, iov, count);
+    }
+
+    abort(); /* virtio should have taken it before here */
+    return 0;
+}
+
+
+int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov,
+                               int count)
+{
+    struct fuse_out_header out = {
+        .unique = req->unique,
+        .error = error,
+    };
+
+    if (error <= -1000 || error > 0) {
+        fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error);
+        error = -ERANGE;
+    }
+
+    iov[0].iov_base = &out;
+    iov[0].iov_len = sizeof(struct fuse_out_header);
+
+    return fuse_send_msg(req->se, req->ch, iov, count);
+}
+
+static int send_reply_iov(fuse_req_t req, int error, struct iovec *iov,
+                          int count)
+{
+    int res;
+
+    res = fuse_send_reply_iov_nofree(req, error, iov, count);
+    fuse_free_req(req);
+    return res;
+}
+
+static int send_reply(fuse_req_t req, int error, const void *arg,
+                      size_t argsize)
+{
+    struct iovec iov[2];
+    int count = 1;
+    if (argsize) {
+        iov[1].iov_base = (void *)arg;
+        iov[1].iov_len = argsize;
+        count++;
+    }
+    return send_reply_iov(req, error, iov, count);
+}
+
+int fuse_reply_iov(fuse_req_t req, const struct iovec *iov, int count)
+{
+    int res;
+    struct iovec *padded_iov;
+
+    padded_iov = malloc((count + 1) * sizeof(struct iovec));
+    if (padded_iov == NULL) {
+        return fuse_reply_err(req, ENOMEM);
+    }
+
+    memcpy(padded_iov + 1, iov, count * sizeof(struct iovec));
+    count++;
+
+    res = send_reply_iov(req, 0, padded_iov, count);
+    free(padded_iov);
+
+    return res;
+}
+
+
+/*
+ * 'buf` is allowed to be empty so that the proper size may be
+ * allocated by the caller
+ */
+size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize,
+                         const char *name, const struct stat *stbuf, off_t off)
+{
+    (void)req;
+    size_t namelen;
+    size_t entlen;
+    size_t entlen_padded;
+    struct fuse_dirent *dirent;
+
+    namelen = strlen(name);
+    entlen = FUSE_NAME_OFFSET + namelen;
+    entlen_padded = FUSE_DIRENT_ALIGN(entlen);
+
+    if ((buf == NULL) || (entlen_padded > bufsize)) {
+        return entlen_padded;
+    }
+
+    dirent = (struct fuse_dirent *)buf;
+    dirent->ino = stbuf->st_ino;
+    dirent->off = off;
+    dirent->namelen = namelen;
+    dirent->type = (stbuf->st_mode & S_IFMT) >> 12;
+    memcpy(dirent->name, name, namelen);
+    memset(dirent->name + namelen, 0, entlen_padded - entlen);
+
+    return entlen_padded;
+}
+
+static void convert_statfs(const struct statvfs *stbuf,
+                           struct fuse_kstatfs *kstatfs)
+{
+    *kstatfs = (struct fuse_kstatfs){
+        .bsize = stbuf->f_bsize,
+        .frsize = stbuf->f_frsize,
+        .blocks = stbuf->f_blocks,
+        .bfree = stbuf->f_bfree,
+        .bavail = stbuf->f_bavail,
+        .files = stbuf->f_files,
+        .ffree = stbuf->f_ffree,
+        .namelen = stbuf->f_namemax,
+    };
+}
+
+static int send_reply_ok(fuse_req_t req, const void *arg, size_t argsize)
+{
+    return send_reply(req, 0, arg, argsize);
+}
+
+int fuse_reply_err(fuse_req_t req, int err)
+{
+    return send_reply(req, -err, NULL, 0);
+}
+
+void fuse_reply_none(fuse_req_t req)
+{
+    fuse_free_req(req);
+}
+
+static unsigned long calc_timeout_sec(double t)
+{
+    if (t > (double)ULONG_MAX) {
+        return ULONG_MAX;
+    } else if (t < 0.0) {
+        return 0;
+    } else {
+        return (unsigned long)t;
+    }
+}
+
+static unsigned int calc_timeout_nsec(double t)
+{
+    double f = t - (double)calc_timeout_sec(t);
+    if (f < 0.0) {
+        return 0;
+    } else if (f >= 0.999999999) {
+        return 999999999;
+    } else {
+        return (unsigned int)(f * 1.0e9);
+    }
+}
+
+static void fill_entry(struct fuse_entry_out *arg,
+                       const struct fuse_entry_param *e)
+{
+    *arg = (struct fuse_entry_out){
+        .nodeid = e->ino,
+        .generation = e->generation,
+        .entry_valid = calc_timeout_sec(e->entry_timeout),
+        .entry_valid_nsec = calc_timeout_nsec(e->entry_timeout),
+        .attr_valid = calc_timeout_sec(e->attr_timeout),
+        .attr_valid_nsec = calc_timeout_nsec(e->attr_timeout),
+    };
+    convert_stat(&e->attr, &arg->attr);
+}
+
+/*
+ * `buf` is allowed to be empty so that the proper size may be
+ * allocated by the caller
+ */
+size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize,
+                              const char *name,
+                              const struct fuse_entry_param *e, off_t off)
+{
+    (void)req;
+    size_t namelen;
+    size_t entlen;
+    size_t entlen_padded;
+
+    namelen = strlen(name);
+    entlen = FUSE_NAME_OFFSET_DIRENTPLUS + namelen;
+    entlen_padded = FUSE_DIRENT_ALIGN(entlen);
+    if ((buf == NULL) || (entlen_padded > bufsize)) {
+        return entlen_padded;
+    }
+
+    struct fuse_direntplus *dp = (struct fuse_direntplus *)buf;
+    memset(&dp->entry_out, 0, sizeof(dp->entry_out));
+    fill_entry(&dp->entry_out, e);
+
+    struct fuse_dirent *dirent = &dp->dirent;
+    *dirent = (struct fuse_dirent){
+        .ino = e->attr.st_ino,
+        .off = off,
+        .namelen = namelen,
+        .type = (e->attr.st_mode & S_IFMT) >> 12,
+    };
+    memcpy(dirent->name, name, namelen);
+    memset(dirent->name + namelen, 0, entlen_padded - entlen);
+
+    return entlen_padded;
+}
+
+static void fill_open(struct fuse_open_out *arg, const struct fuse_file_info *f)
+{
+    arg->fh = f->fh;
+    if (f->direct_io) {
+        arg->open_flags |= FOPEN_DIRECT_IO;
+    }
+    if (f->keep_cache) {
+        arg->open_flags |= FOPEN_KEEP_CACHE;
+    }
+    if (f->cache_readdir) {
+        arg->open_flags |= FOPEN_CACHE_DIR;
+    }
+    if (f->nonseekable) {
+        arg->open_flags |= FOPEN_NONSEEKABLE;
+    }
+}
+
+int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e)
+{
+    struct fuse_entry_out arg;
+    size_t size = sizeof(arg);
+
+    memset(&arg, 0, sizeof(arg));
+    fill_entry(&arg, e);
+    return send_reply_ok(req, &arg, size);
+}
+
+int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e,
+                      const struct fuse_file_info *f)
+{
+    char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)];
+    size_t entrysize = sizeof(struct fuse_entry_out);
+    struct fuse_entry_out *earg = (struct fuse_entry_out *)buf;
+    struct fuse_open_out *oarg = (struct fuse_open_out *)(buf + entrysize);
+
+    memset(buf, 0, sizeof(buf));
+    fill_entry(earg, e);
+    fill_open(oarg, f);
+    return send_reply_ok(req, buf, entrysize + sizeof(struct fuse_open_out));
+}
+
+int fuse_reply_attr(fuse_req_t req, const struct stat *attr,
+                    double attr_timeout)
+{
+    struct fuse_attr_out arg;
+    size_t size = sizeof(arg);
+
+    memset(&arg, 0, sizeof(arg));
+    arg.attr_valid = calc_timeout_sec(attr_timeout);
+    arg.attr_valid_nsec = calc_timeout_nsec(attr_timeout);
+    convert_stat(attr, &arg.attr);
+
+    return send_reply_ok(req, &arg, size);
+}
+
+int fuse_reply_readlink(fuse_req_t req, const char *linkname)
+{
+    return send_reply_ok(req, linkname, strlen(linkname));
+}
+
+int fuse_reply_open(fuse_req_t req, const struct fuse_file_info *f)
+{
+    struct fuse_open_out arg;
+
+    memset(&arg, 0, sizeof(arg));
+    fill_open(&arg, f);
+    return send_reply_ok(req, &arg, sizeof(arg));
+}
+
+int fuse_reply_write(fuse_req_t req, size_t count)
+{
+    struct fuse_write_out arg;
+
+    memset(&arg, 0, sizeof(arg));
+    arg.size = count;
+
+    return send_reply_ok(req, &arg, sizeof(arg));
+}
+
+int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size)
+{
+    return send_reply_ok(req, buf, size);
+}
+
+static int fuse_send_data_iov_fallback(struct fuse_session *se,
+                                       struct fuse_chan *ch, struct iovec *iov,
+                                       int iov_count, struct fuse_bufvec *buf,
+                                       size_t len)
+{
+    /* Optimize common case */
+    if (buf->count == 1 && buf->idx == 0 && buf->off == 0 &&
+        !(buf->buf[0].flags & FUSE_BUF_IS_FD)) {
+        /*
+         * FIXME: also avoid memory copy if there are multiple buffers
+         * but none of them contain an fd
+         */
+
+        iov[iov_count].iov_base = buf->buf[0].mem;
+        iov[iov_count].iov_len = len;
+        iov_count++;
+        return fuse_send_msg(se, ch, iov, iov_count);
+    }
+
+    if (fuse_lowlevel_is_virtio(se) && buf->count == 1 &&
+        buf->buf[0].flags == (FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK)) {
+        return virtio_send_data_iov(se, ch, iov, iov_count, buf, len);
+    }
+
+    abort(); /* Will have taken vhost path */
+    return 0;
+}
+
+static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
+                              struct iovec *iov, int iov_count,
+                              struct fuse_bufvec *buf)
+{
+    size_t len = fuse_buf_size(buf);
+
+    return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len);
+}
+
+int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv)
+{
+    struct iovec iov[2];
+    struct fuse_out_header out = {
+        .unique = req->unique,
+    };
+    int res;
+
+    iov[0].iov_base = &out;
+    iov[0].iov_len = sizeof(struct fuse_out_header);
+
+    res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv);
+    if (res <= 0) {
+        fuse_free_req(req);
+        return res;
+    } else {
+        return fuse_reply_err(req, res);
+    }
+}
+
+int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf)
+{
+    struct fuse_statfs_out arg;
+    size_t size = sizeof(arg);
+
+    memset(&arg, 0, sizeof(arg));
+    convert_statfs(stbuf, &arg.st);
+
+    return send_reply_ok(req, &arg, size);
+}
+
+int fuse_reply_xattr(fuse_req_t req, size_t count)
+{
+    struct fuse_getxattr_out arg;
+
+    memset(&arg, 0, sizeof(arg));
+    arg.size = count;
+
+    return send_reply_ok(req, &arg, sizeof(arg));
+}
+
+int fuse_reply_lock(fuse_req_t req, const struct flock *lock)
+{
+    struct fuse_lk_out arg;
+
+    memset(&arg, 0, sizeof(arg));
+    arg.lk.type = lock->l_type;
+    if (lock->l_type != F_UNLCK) {
+        arg.lk.start = lock->l_start;
+        if (lock->l_len == 0) {
+            arg.lk.end = OFFSET_MAX;
+        } else {
+            arg.lk.end = lock->l_start + lock->l_len - 1;
+        }
+    }
+    arg.lk.pid = lock->l_pid;
+    return send_reply_ok(req, &arg, sizeof(arg));
+}
+
+int fuse_reply_bmap(fuse_req_t req, uint64_t idx)
+{
+    struct fuse_bmap_out arg;
+
+    memset(&arg, 0, sizeof(arg));
+    arg.block = idx;
+
+    return send_reply_ok(req, &arg, sizeof(arg));
+}
+
+static struct fuse_ioctl_iovec *fuse_ioctl_iovec_copy(const struct iovec *iov,
+                                                      size_t count)
+{
+    struct fuse_ioctl_iovec *fiov;
+    size_t i;
+
+    fiov = malloc(sizeof(fiov[0]) * count);
+    if (!fiov) {
+        return NULL;
+    }
+
+    for (i = 0; i < count; i++) {
+        fiov[i].base = (uintptr_t)iov[i].iov_base;
+        fiov[i].len = iov[i].iov_len;
+    }
+
+    return fiov;
+}
+
+int fuse_reply_ioctl_retry(fuse_req_t req, const struct iovec *in_iov,
+                           size_t in_count, const struct iovec *out_iov,
+                           size_t out_count)
+{
+    struct fuse_ioctl_out arg;
+    struct fuse_ioctl_iovec *in_fiov = NULL;
+    struct fuse_ioctl_iovec *out_fiov = NULL;
+    struct iovec iov[4];
+    size_t count = 1;
+    int res;
+
+    memset(&arg, 0, sizeof(arg));
+    arg.flags |= FUSE_IOCTL_RETRY;
+    arg.in_iovs = in_count;
+    arg.out_iovs = out_count;
+    iov[count].iov_base = &arg;
+    iov[count].iov_len = sizeof(arg);
+    count++;
+
+    /* Can't handle non-compat 64bit ioctls on 32bit */
+    if (sizeof(void *) == 4 && req->ioctl_64bit) {
+        res = fuse_reply_err(req, EINVAL);
+        goto out;
+    }
+
+    if (in_count) {
+        in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count);
+        if (!in_fiov) {
+            goto enomem;
+        }
+
+        iov[count].iov_base = (void *)in_fiov;
+        iov[count].iov_len = sizeof(in_fiov[0]) * in_count;
+        count++;
+    }
+    if (out_count) {
+        out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count);
+        if (!out_fiov) {
+            goto enomem;
+        }
+
+        iov[count].iov_base = (void *)out_fiov;
+        iov[count].iov_len = sizeof(out_fiov[0]) * out_count;
+        count++;
+    }
+
+    res = send_reply_iov(req, 0, iov, count);
+out:
+    free(in_fiov);
+    free(out_fiov);
+
+    return res;
+
+enomem:
+    res = fuse_reply_err(req, ENOMEM);
+    goto out;
+}
+
+int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size)
+{
+    struct fuse_ioctl_out arg;
+    struct iovec iov[3];
+    size_t count = 1;
+
+    memset(&arg, 0, sizeof(arg));
+    arg.result = result;
+    iov[count].iov_base = &arg;
+    iov[count].iov_len = sizeof(arg);
+    count++;
+
+    if (size) {
+        iov[count].iov_base = (char *)buf;
+        iov[count].iov_len = size;
+        count++;
+    }
+
+    return send_reply_iov(req, 0, iov, count);
+}
+
+int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov,
+                         int count)
+{
+    struct iovec *padded_iov;
+    struct fuse_ioctl_out arg;
+    int res;
+
+    padded_iov = malloc((count + 2) * sizeof(struct iovec));
+    if (padded_iov == NULL) {
+        return fuse_reply_err(req, ENOMEM);
+    }
+
+    memset(&arg, 0, sizeof(arg));
+    arg.result = result;
+    padded_iov[1].iov_base = &arg;
+    padded_iov[1].iov_len = sizeof(arg);
+
+    memcpy(&padded_iov[2], iov, count * sizeof(struct iovec));
+
+    res = send_reply_iov(req, 0, padded_iov, count + 2);
+    free(padded_iov);
+
+    return res;
+}
+
+int fuse_reply_poll(fuse_req_t req, unsigned revents)
+{
+    struct fuse_poll_out arg;
+
+    memset(&arg, 0, sizeof(arg));
+    arg.revents = revents;
+
+    return send_reply_ok(req, &arg, sizeof(arg));
+}
+
+int fuse_reply_lseek(fuse_req_t req, off_t off)
+{
+    struct fuse_lseek_out arg;
+
+    memset(&arg, 0, sizeof(arg));
+    arg.offset = off;
+
+    return send_reply_ok(req, &arg, sizeof(arg));
+}
+
+static void do_lookup(fuse_req_t req, fuse_ino_t nodeid,
+                      struct fuse_mbuf_iter *iter)
+{
+    const char *name = fuse_mbuf_iter_advance_str(iter);
+    if (!name) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    if (req->se->op.lookup) {
+        req->se->op.lookup(req, nodeid, name);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
+static void do_forget(fuse_req_t req, fuse_ino_t nodeid,
+                      struct fuse_mbuf_iter *iter)
+{
+    struct fuse_forget_in *arg;
+
+    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+    if (!arg) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    if (req->se->op.forget) {
+        req->se->op.forget(req, nodeid, arg->nlookup);
+    } else {
+        fuse_reply_none(req);
+    }
+}
+
+static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid,
+                            struct fuse_mbuf_iter *iter)
+{
+    struct fuse_batch_forget_in *arg;
+    struct fuse_forget_data *forgets;
+    size_t scount;
+
+    (void)nodeid;
+
+    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+    if (!arg) {
+        fuse_reply_none(req);
+        return;
+    }
+
+    /*
+     * Prevent integer overflow.  The compiler emits the following warning
+     * unless we use the scount local variable:
+     *
+     * error: comparison is always false due to limited range of data type
+     * [-Werror=type-limits]
+     *
+     * This may be true on 64-bit hosts but we need this check for 32-bit
+     * hosts.
+     */
+    scount = arg->count;
+    if (scount > SIZE_MAX / sizeof(forgets[0])) {
+        fuse_reply_none(req);
+        return;
+    }
+
+    forgets = fuse_mbuf_iter_advance(iter, arg->count * sizeof(forgets[0]));
+    if (!forgets) {
+        fuse_reply_none(req);
+        return;
+    }
+
+    if (req->se->op.forget_multi) {
+        req->se->op.forget_multi(req, arg->count, forgets);
+    } else if (req->se->op.forget) {
+        unsigned int i;
+
+        for (i = 0; i < arg->count; i++) {
+            struct fuse_req *dummy_req;
+
+            dummy_req = fuse_ll_alloc_req(req->se);
+            if (dummy_req == NULL) {
+                break;
+            }
+
+            dummy_req->unique = req->unique;
+            dummy_req->ctx = req->ctx;
+            dummy_req->ch = NULL;
+
+            req->se->op.forget(dummy_req, forgets[i].ino, forgets[i].nlookup);
+        }
+        fuse_reply_none(req);
+    } else {
+        fuse_reply_none(req);
+    }
+}
+
+static void do_getattr(fuse_req_t req, fuse_ino_t nodeid,
+                       struct fuse_mbuf_iter *iter)
+{
+    struct fuse_file_info *fip = NULL;
+    struct fuse_file_info fi;
+
+    struct fuse_getattr_in *arg;
+
+    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+    if (!arg) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    if (arg->getattr_flags & FUSE_GETATTR_FH) {
+        memset(&fi, 0, sizeof(fi));
+        fi.fh = arg->fh;
+        fip = &fi;
+    }
+
+    if (req->se->op.getattr) {
+        req->se->op.getattr(req, nodeid, fip);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
+static void do_setattr(fuse_req_t req, fuse_ino_t nodeid,
+                       struct fuse_mbuf_iter *iter)
+{
+    if (req->se->op.setattr) {
+        struct fuse_setattr_in *arg;
+        struct fuse_file_info *fi = NULL;
+        struct fuse_file_info fi_store;
+        struct stat stbuf;
+
+        arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+        if (!arg) {
+            fuse_reply_err(req, EINVAL);
+            return;
+        }
+
+        memset(&stbuf, 0, sizeof(stbuf));
+        convert_attr(arg, &stbuf);
+        if (arg->valid & FATTR_FH) {
+            arg->valid &= ~FATTR_FH;
+            memset(&fi_store, 0, sizeof(fi_store));
+            fi = &fi_store;
+            fi->fh = arg->fh;
+        }
+        arg->valid &= FUSE_SET_ATTR_MODE | FUSE_SET_ATTR_UID |
+                      FUSE_SET_ATTR_GID | FUSE_SET_ATTR_SIZE |
+                      FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME |
+                      FUSE_SET_ATTR_ATIME_NOW | FUSE_SET_ATTR_MTIME_NOW |
+                      FUSE_SET_ATTR_CTIME;
+
+        req->se->op.setattr(req, nodeid, &stbuf, arg->valid, fi);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
+static void do_access(fuse_req_t req, fuse_ino_t nodeid,
+                      struct fuse_mbuf_iter *iter)
+{
+    struct fuse_access_in *arg;
+
+    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+    if (!arg) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    if (req->se->op.access) {
+        req->se->op.access(req, nodeid, arg->mask);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
+static void do_readlink(fuse_req_t req, fuse_ino_t nodeid,
+                        struct fuse_mbuf_iter *iter)
+{
+    (void)iter;
+
+    if (req->se->op.readlink) {
+        req->se->op.readlink(req, nodeid);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
+static void do_mknod(fuse_req_t req, fuse_ino_t nodeid,
+                     struct fuse_mbuf_iter *iter)
+{
+    struct fuse_mknod_in *arg;
+    const char *name;
+
+    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+    name = fuse_mbuf_iter_advance_str(iter);
+    if (!arg || !name) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    req->ctx.umask = arg->umask;
+
+    if (req->se->op.mknod) {
+        req->se->op.mknod(req, nodeid, name, arg->mode, arg->rdev);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
+static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid,
+                     struct fuse_mbuf_iter *iter)
+{
+    struct fuse_mkdir_in *arg;
+    const char *name;
+
+    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+    name = fuse_mbuf_iter_advance_str(iter);
+    if (!arg || !name) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    req->ctx.umask = arg->umask;
+
+    if (req->se->op.mkdir) {
+        req->se->op.mkdir(req, nodeid, name, arg->mode);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
+static void do_unlink(fuse_req_t req, fuse_ino_t nodeid,
+                      struct fuse_mbuf_iter *iter)
+{
+    const char *name = fuse_mbuf_iter_advance_str(iter);
+
+    if (!name) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    if (req->se->op.unlink) {
+        req->se->op.unlink(req, nodeid, name);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
+static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid,
+                     struct fuse_mbuf_iter *iter)
+{
+    const char *name = fuse_mbuf_iter_advance_str(iter);
+
+    if (!name) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    if (req->se->op.rmdir) {
+        req->se->op.rmdir(req, nodeid, name);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
+static void do_symlink(fuse_req_t req, fuse_ino_t nodeid,
+                       struct fuse_mbuf_iter *iter)
+{
+    const char *name = fuse_mbuf_iter_advance_str(iter);
+    const char *linkname = fuse_mbuf_iter_advance_str(iter);
+
+    if (!name || !linkname) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    if (req->se->op.symlink) {
+        req->se->op.symlink(req, linkname, nodeid, name);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
+static void do_rename(fuse_req_t req, fuse_ino_t nodeid,
+                      struct fuse_mbuf_iter *iter)
+{
+    struct fuse_rename_in *arg;
+    const char *oldname;
+    const char *newname;
+
+    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+    oldname = fuse_mbuf_iter_advance_str(iter);
+    newname = fuse_mbuf_iter_advance_str(iter);
+    if (!arg || !oldname || !newname) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    if (req->se->op.rename) {
+        req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, 0);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
+static void do_rename2(fuse_req_t req, fuse_ino_t nodeid,
+                       struct fuse_mbuf_iter *iter)
+{
+    struct fuse_rename2_in *arg;
+    const char *oldname;
+    const char *newname;
+
+    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+    oldname = fuse_mbuf_iter_advance_str(iter);
+    newname = fuse_mbuf_iter_advance_str(iter);
+    if (!arg || !oldname || !newname) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    if (req->se->op.rename) {
+        req->se->op.rename(req, nodeid, oldname, arg->newdir, newname,
+                           arg->flags);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
+static void do_link(fuse_req_t req, fuse_ino_t nodeid,
+                    struct fuse_mbuf_iter *iter)
+{
+    struct fuse_link_in *arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+    const char *name = fuse_mbuf_iter_advance_str(iter);
+
+    if (!arg || !name) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    if (req->se->op.link) {
+        req->se->op.link(req, arg->oldnodeid, nodeid, name);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
+static void do_create(fuse_req_t req, fuse_ino_t nodeid,
+                      struct fuse_mbuf_iter *iter)
+{
+    if (req->se->op.create) {
+        struct fuse_create_in *arg;
+        struct fuse_file_info fi;
+        const char *name;
+
+        arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+        name = fuse_mbuf_iter_advance_str(iter);
+        if (!arg || !name) {
+            fuse_reply_err(req, EINVAL);
+            return;
+        }
+
+        memset(&fi, 0, sizeof(fi));
+        fi.flags = arg->flags;
+
+        req->ctx.umask = arg->umask;
+
+        req->se->op.create(req, nodeid, name, arg->mode, &fi);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
+static void do_open(fuse_req_t req, fuse_ino_t nodeid,
+                    struct fuse_mbuf_iter *iter)
+{
+    struct fuse_open_in *arg;
+    struct fuse_file_info fi;
+
+    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+    if (!arg) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    memset(&fi, 0, sizeof(fi));
+    fi.flags = arg->flags;
+
+    if (req->se->op.open) {
+        req->se->op.open(req, nodeid, &fi);
+    } else {
+        fuse_reply_open(req, &fi);
+    }
+}
+
+static void do_read(fuse_req_t req, fuse_ino_t nodeid,
+                    struct fuse_mbuf_iter *iter)
+{
+    if (req->se->op.read) {
+        struct fuse_read_in *arg;
+        struct fuse_file_info fi;
+
+        arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+
+        memset(&fi, 0, sizeof(fi));
+        fi.fh = arg->fh;
+        fi.lock_owner = arg->lock_owner;
+        fi.flags = arg->flags;
+        req->se->op.read(req, nodeid, arg->size, arg->offset, &fi);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
+static void do_write(fuse_req_t req, fuse_ino_t nodeid,
+                     struct fuse_mbuf_iter *iter)
+{
+    struct fuse_write_in *arg;
+    struct fuse_file_info fi;
+    const char *param;
+
+    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+    if (!arg) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    param = fuse_mbuf_iter_advance(iter, arg->size);
+    if (!param) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    memset(&fi, 0, sizeof(fi));
+    fi.fh = arg->fh;
+    fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0;
+    fi.kill_priv = !!(arg->write_flags & FUSE_WRITE_KILL_PRIV);
+
+    fi.lock_owner = arg->lock_owner;
+    fi.flags = arg->flags;
+
+    if (req->se->op.write) {
+        req->se->op.write(req, nodeid, param, arg->size, arg->offset, &fi);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
+static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid,
+                         struct fuse_mbuf_iter *iter, struct fuse_bufvec *ibufv)
+{
+    struct fuse_session *se = req->se;
+    struct fuse_bufvec *pbufv = ibufv;
+    struct fuse_bufvec tmpbufv = {
+        .buf[0] = ibufv->buf[0],
+        .count = 1,
+    };
+    struct fuse_write_in *arg;
+    size_t arg_size = sizeof(*arg);
+    struct fuse_file_info fi;
+
+    memset(&fi, 0, sizeof(fi));
+
+    arg = fuse_mbuf_iter_advance(iter, arg_size);
+    if (!arg) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    fi.lock_owner = arg->lock_owner;
+    fi.flags = arg->flags;
+    fi.fh = arg->fh;
+    fi.writepage = !!(arg->write_flags & FUSE_WRITE_CACHE);
+    fi.kill_priv = !!(arg->write_flags & FUSE_WRITE_KILL_PRIV);
+
+    if (ibufv->count == 1) {
+        assert(!(tmpbufv.buf[0].flags & FUSE_BUF_IS_FD));
+        tmpbufv.buf[0].mem = ((char *)arg) + arg_size;
+        tmpbufv.buf[0].size -= sizeof(struct fuse_in_header) + arg_size;
+        pbufv = &tmpbufv;
+    } else {
+        /*
+         *  Input bufv contains the headers in the first element
+         * and the data in the rest, we need to skip that first element
+         */
+        ibufv->buf[0].size = 0;
+    }
+
+    if (fuse_buf_size(pbufv) != arg->size) {
+        fuse_log(FUSE_LOG_ERR,
+                 "fuse: do_write_buf: buffer size doesn't match arg->size\n");
+        fuse_reply_err(req, EIO);
+        return;
+    }
+
+    se->op.write_buf(req, nodeid, pbufv, arg->offset, &fi);
+}
+
+static void do_flush(fuse_req_t req, fuse_ino_t nodeid,
+                     struct fuse_mbuf_iter *iter)
+{
+    struct fuse_flush_in *arg;
+    struct fuse_file_info fi;
+
+    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+    if (!arg) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    memset(&fi, 0, sizeof(fi));
+    fi.fh = arg->fh;
+    fi.flush = 1;
+    fi.lock_owner = arg->lock_owner;
+
+    if (req->se->op.flush) {
+        req->se->op.flush(req, nodeid, &fi);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
+static void do_release(fuse_req_t req, fuse_ino_t nodeid,
+                       struct fuse_mbuf_iter *iter)
+{
+    struct fuse_release_in *arg;
+    struct fuse_file_info fi;
+
+    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+    if (!arg) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    memset(&fi, 0, sizeof(fi));
+    fi.flags = arg->flags;
+    fi.fh = arg->fh;
+    fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0;
+    fi.lock_owner = arg->lock_owner;
+
+    if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) {
+        fi.flock_release = 1;
+    }
+
+    if (req->se->op.release) {
+        req->se->op.release(req, nodeid, &fi);
+    } else {
+        fuse_reply_err(req, 0);
+    }
+}
+
+static void do_fsync(fuse_req_t req, fuse_ino_t nodeid,
+                     struct fuse_mbuf_iter *iter)
+{
+    struct fuse_fsync_in *arg;
+    struct fuse_file_info fi;
+    int datasync;
+
+    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+    if (!arg) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+    datasync = arg->fsync_flags & 1;
+
+    memset(&fi, 0, sizeof(fi));
+    fi.fh = arg->fh;
+
+    if (req->se->op.fsync) {
+        if (fi.fh == (uint64_t)-1) {
+            req->se->op.fsync(req, nodeid, datasync, NULL);
+        } else {
+            req->se->op.fsync(req, nodeid, datasync, &fi);
+        }
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
+static void do_opendir(fuse_req_t req, fuse_ino_t nodeid,
+                       struct fuse_mbuf_iter *iter)
+{
+    struct fuse_open_in *arg;
+    struct fuse_file_info fi;
+
+    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+    if (!arg) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    memset(&fi, 0, sizeof(fi));
+    fi.flags = arg->flags;
+
+    if (req->se->op.opendir) {
+        req->se->op.opendir(req, nodeid, &fi);
+    } else {
+        fuse_reply_open(req, &fi);
+    }
+}
+
+static void do_readdir(fuse_req_t req, fuse_ino_t nodeid,
+                       struct fuse_mbuf_iter *iter)
+{
+    struct fuse_read_in *arg;
+    struct fuse_file_info fi;
+
+    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+    if (!arg) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    memset(&fi, 0, sizeof(fi));
+    fi.fh = arg->fh;
+
+    if (req->se->op.readdir) {
+        req->se->op.readdir(req, nodeid, arg->size, arg->offset, &fi);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
+static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid,
+                           struct fuse_mbuf_iter *iter)
+{
+    struct fuse_read_in *arg;
+    struct fuse_file_info fi;
+
+    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+    if (!arg) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    memset(&fi, 0, sizeof(fi));
+    fi.fh = arg->fh;
+
+    if (req->se->op.readdirplus) {
+        req->se->op.readdirplus(req, nodeid, arg->size, arg->offset, &fi);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
+static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid,
+                          struct fuse_mbuf_iter *iter)
+{
+    struct fuse_release_in *arg;
+    struct fuse_file_info fi;
+
+    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+    if (!arg) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    memset(&fi, 0, sizeof(fi));
+    fi.flags = arg->flags;
+    fi.fh = arg->fh;
+
+    if (req->se->op.releasedir) {
+        req->se->op.releasedir(req, nodeid, &fi);
+    } else {
+        fuse_reply_err(req, 0);
+    }
+}
+
+static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid,
+                        struct fuse_mbuf_iter *iter)
+{
+    struct fuse_fsync_in *arg;
+    struct fuse_file_info fi;
+    int datasync;
+
+    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+    if (!arg) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+    datasync = arg->fsync_flags & 1;
+
+    memset(&fi, 0, sizeof(fi));
+    fi.fh = arg->fh;
+
+    if (req->se->op.fsyncdir) {
+        req->se->op.fsyncdir(req, nodeid, datasync, &fi);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
+static void do_statfs(fuse_req_t req, fuse_ino_t nodeid,
+                      struct fuse_mbuf_iter *iter)
+{
+    (void)nodeid;
+    (void)iter;
+
+    if (req->se->op.statfs) {
+        req->se->op.statfs(req, nodeid);
+    } else {
+        struct statvfs buf = {
+            .f_namemax = 255,
+            .f_bsize = 512,
+        };
+        fuse_reply_statfs(req, &buf);
+    }
+}
+
+static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid,
+                        struct fuse_mbuf_iter *iter)
+{
+    struct fuse_setxattr_in *arg;
+    const char *name;
+    const char *value;
+
+    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+    name = fuse_mbuf_iter_advance_str(iter);
+    if (!arg || !name) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    value = fuse_mbuf_iter_advance(iter, arg->size);
+    if (!value) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    if (req->se->op.setxattr) {
+        req->se->op.setxattr(req, nodeid, name, value, arg->size, arg->flags);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
+static void do_getxattr(fuse_req_t req, fuse_ino_t nodeid,
+                        struct fuse_mbuf_iter *iter)
+{
+    struct fuse_getxattr_in *arg;
+    const char *name;
+
+    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+    name = fuse_mbuf_iter_advance_str(iter);
+    if (!arg || !name) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    if (req->se->op.getxattr) {
+        req->se->op.getxattr(req, nodeid, name, arg->size);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
+static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid,
+                         struct fuse_mbuf_iter *iter)
+{
+    struct fuse_getxattr_in *arg;
+
+    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+    if (!arg) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    if (req->se->op.listxattr) {
+        req->se->op.listxattr(req, nodeid, arg->size);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
+static void do_removexattr(fuse_req_t req, fuse_ino_t nodeid,
+                           struct fuse_mbuf_iter *iter)
+{
+    const char *name = fuse_mbuf_iter_advance_str(iter);
+
+    if (!name) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    if (req->se->op.removexattr) {
+        req->se->op.removexattr(req, nodeid, name);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
+static void convert_fuse_file_lock(struct fuse_file_lock *fl,
+                                   struct flock *flock)
+{
+    memset(flock, 0, sizeof(struct flock));
+    flock->l_type = fl->type;
+    flock->l_whence = SEEK_SET;
+    flock->l_start = fl->start;
+    if (fl->end == OFFSET_MAX) {
+        flock->l_len = 0;
+    } else {
+        flock->l_len = fl->end - fl->start + 1;
+    }
+    flock->l_pid = fl->pid;
+}
+
+static void do_getlk(fuse_req_t req, fuse_ino_t nodeid,
+                     struct fuse_mbuf_iter *iter)
+{
+    struct fuse_lk_in *arg;
+    struct fuse_file_info fi;
+    struct flock flock;
+
+    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+    if (!arg) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    memset(&fi, 0, sizeof(fi));
+    fi.fh = arg->fh;
+    fi.lock_owner = arg->owner;
+
+    convert_fuse_file_lock(&arg->lk, &flock);
+    if (req->se->op.getlk) {
+        req->se->op.getlk(req, nodeid, &fi, &flock);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
+static void do_setlk_common(fuse_req_t req, fuse_ino_t nodeid,
+                            struct fuse_mbuf_iter *iter, int sleep)
+{
+    struct fuse_lk_in *arg;
+    struct fuse_file_info fi;
+    struct flock flock;
+
+    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+    if (!arg) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    memset(&fi, 0, sizeof(fi));
+    fi.fh = arg->fh;
+    fi.lock_owner = arg->owner;
+
+    if (arg->lk_flags & FUSE_LK_FLOCK) {
+        int op = 0;
+
+        switch (arg->lk.type) {
+        case F_RDLCK:
+            op = LOCK_SH;
+            break;
+        case F_WRLCK:
+            op = LOCK_EX;
+            break;
+        case F_UNLCK:
+            op = LOCK_UN;
+            break;
+        }
+        if (!sleep) {
+            op |= LOCK_NB;
+        }
+
+        if (req->se->op.flock) {
+            req->se->op.flock(req, nodeid, &fi, op);
+        } else {
+            fuse_reply_err(req, ENOSYS);
+        }
+    } else {
+        convert_fuse_file_lock(&arg->lk, &flock);
+        if (req->se->op.setlk) {
+            req->se->op.setlk(req, nodeid, &fi, &flock, sleep);
+        } else {
+            fuse_reply_err(req, ENOSYS);
+        }
+    }
+}
+
+static void do_setlk(fuse_req_t req, fuse_ino_t nodeid,
+                     struct fuse_mbuf_iter *iter)
+{
+    do_setlk_common(req, nodeid, iter, 0);
+}
+
+static void do_setlkw(fuse_req_t req, fuse_ino_t nodeid,
+                      struct fuse_mbuf_iter *iter)
+{
+    do_setlk_common(req, nodeid, iter, 1);
+}
+
+static int find_interrupted(struct fuse_session *se, struct fuse_req *req)
+{
+    struct fuse_req *curr;
+
+    for (curr = se->list.next; curr != &se->list; curr = curr->next) {
+        if (curr->unique == req->u.i.unique) {
+            fuse_interrupt_func_t func;
+            void *data;
+
+            curr->ctr++;
+            pthread_mutex_unlock(&se->lock);
+
+            /* Ugh, ugly locking */
+            pthread_mutex_lock(&curr->lock);
+            pthread_mutex_lock(&se->lock);
+            curr->interrupted = 1;
+            func = curr->u.ni.func;
+            data = curr->u.ni.data;
+            pthread_mutex_unlock(&se->lock);
+            if (func) {
+                func(curr, data);
+            }
+            pthread_mutex_unlock(&curr->lock);
+
+            pthread_mutex_lock(&se->lock);
+            curr->ctr--;
+            if (!curr->ctr) {
+                destroy_req(curr);
+            }
+
+            return 1;
+        }
+    }
+    for (curr = se->interrupts.next; curr != &se->interrupts;
+         curr = curr->next) {
+        if (curr->u.i.unique == req->u.i.unique) {
+            return 1;
+        }
+    }
+    return 0;
+}
+
+static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid,
+                         struct fuse_mbuf_iter *iter)
+{
+    struct fuse_interrupt_in *arg;
+    struct fuse_session *se = req->se;
+
+    (void)nodeid;
+
+    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+    if (!arg) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n",
+             (unsigned long long)arg->unique);
+
+    req->u.i.unique = arg->unique;
+
+    pthread_mutex_lock(&se->lock);
+    if (find_interrupted(se, req)) {
+        destroy_req(req);
+    } else {
+        list_add_req(req, &se->interrupts);
+    }
+    pthread_mutex_unlock(&se->lock);
+}
+
+static struct fuse_req *check_interrupt(struct fuse_session *se,
+                                        struct fuse_req *req)
+{
+    struct fuse_req *curr;
+
+    for (curr = se->interrupts.next; curr != &se->interrupts;
+         curr = curr->next) {
+        if (curr->u.i.unique == req->unique) {
+            req->interrupted = 1;
+            list_del_req(curr);
+            free(curr);
+            return NULL;
+        }
+    }
+    curr = se->interrupts.next;
+    if (curr != &se->interrupts) {
+        list_del_req(curr);
+        list_init_req(curr);
+        return curr;
+    } else {
+        return NULL;
+    }
+}
+
+static void do_bmap(fuse_req_t req, fuse_ino_t nodeid,
+                    struct fuse_mbuf_iter *iter)
+{
+    struct fuse_bmap_in *arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+
+    if (!arg) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    if (req->se->op.bmap) {
+        req->se->op.bmap(req, nodeid, arg->blocksize, arg->block);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
+static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid,
+                     struct fuse_mbuf_iter *iter)
+{
+    struct fuse_ioctl_in *arg;
+    unsigned int flags;
+    void *in_buf = NULL;
+    struct fuse_file_info fi;
+
+    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+    if (!arg) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    flags = arg->flags;
+    if (flags & FUSE_IOCTL_DIR && !(req->se->conn.want & FUSE_CAP_IOCTL_DIR)) {
+        fuse_reply_err(req, ENOTTY);
+        return;
+    }
+
+    if (arg->in_size) {
+        in_buf = fuse_mbuf_iter_advance(iter, arg->in_size);
+        if (!in_buf) {
+            fuse_reply_err(req, EINVAL);
+            return;
+        }
+    }
+
+    memset(&fi, 0, sizeof(fi));
+    fi.fh = arg->fh;
+
+    if (sizeof(void *) == 4 && !(flags & FUSE_IOCTL_32BIT)) {
+        req->ioctl_64bit = 1;
+    }
+
+    if (req->se->op.ioctl) {
+        req->se->op.ioctl(req, nodeid, arg->cmd, (void *)(uintptr_t)arg->arg,
+                          &fi, flags, in_buf, arg->in_size, arg->out_size);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
+void fuse_pollhandle_destroy(struct fuse_pollhandle *ph)
+{
+    free(ph);
+}
+
+static void do_poll(fuse_req_t req, fuse_ino_t nodeid,
+                    struct fuse_mbuf_iter *iter)
+{
+    struct fuse_poll_in *arg;
+    struct fuse_file_info fi;
+
+    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+    if (!arg) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    memset(&fi, 0, sizeof(fi));
+    fi.fh = arg->fh;
+    fi.poll_events = arg->events;
+
+    if (req->se->op.poll) {
+        struct fuse_pollhandle *ph = NULL;
+
+        if (arg->flags & FUSE_POLL_SCHEDULE_NOTIFY) {
+            ph = malloc(sizeof(struct fuse_pollhandle));
+            if (ph == NULL) {
+                fuse_reply_err(req, ENOMEM);
+                return;
+            }
+            ph->kh = arg->kh;
+            ph->se = req->se;
+        }
+
+        req->se->op.poll(req, nodeid, &fi, ph);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
+static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid,
+                         struct fuse_mbuf_iter *iter)
+{
+    struct fuse_fallocate_in *arg;
+    struct fuse_file_info fi;
+
+    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+    if (!arg) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    memset(&fi, 0, sizeof(fi));
+    fi.fh = arg->fh;
+
+    if (req->se->op.fallocate) {
+        req->se->op.fallocate(req, nodeid, arg->mode, arg->offset, arg->length,
+                              &fi);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
+static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in,
+                               struct fuse_mbuf_iter *iter)
+{
+    struct fuse_copy_file_range_in *arg;
+    struct fuse_file_info fi_in, fi_out;
+
+    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+    if (!arg) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    memset(&fi_in, 0, sizeof(fi_in));
+    fi_in.fh = arg->fh_in;
+
+    memset(&fi_out, 0, sizeof(fi_out));
+    fi_out.fh = arg->fh_out;
+
+
+    if (req->se->op.copy_file_range) {
+        req->se->op.copy_file_range(req, nodeid_in, arg->off_in, &fi_in,
+                                    arg->nodeid_out, arg->off_out, &fi_out,
+                                    arg->len, arg->flags);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
+static void do_lseek(fuse_req_t req, fuse_ino_t nodeid,
+                     struct fuse_mbuf_iter *iter)
+{
+    struct fuse_lseek_in *arg;
+    struct fuse_file_info fi;
+
+    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+    if (!arg) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+    memset(&fi, 0, sizeof(fi));
+    fi.fh = arg->fh;
+
+    if (req->se->op.lseek) {
+        req->se->op.lseek(req, nodeid, arg->offset, arg->whence, &fi);
+    } else {
+        fuse_reply_err(req, ENOSYS);
+    }
+}
+
+static void do_init(fuse_req_t req, fuse_ino_t nodeid,
+                    struct fuse_mbuf_iter *iter)
+{
+    size_t compat_size = offsetof(struct fuse_init_in, max_readahead);
+    struct fuse_init_in *arg;
+    struct fuse_init_out outarg;
+    struct fuse_session *se = req->se;
+    size_t bufsize = se->bufsize;
+    size_t outargsize = sizeof(outarg);
+
+    (void)nodeid;
+
+    /* First consume the old fields... */
+    arg = fuse_mbuf_iter_advance(iter, compat_size);
+    if (!arg) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    /* ...and now consume the new fields. */
+    if (arg->major == 7 && arg->minor >= 6) {
+        if (!fuse_mbuf_iter_advance(iter, sizeof(*arg) - compat_size)) {
+            fuse_reply_err(req, EINVAL);
+            return;
+        }
+    }
+
+    fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor);
+    if (arg->major == 7 && arg->minor >= 6) {
+        fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags);
+        fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", arg->max_readahead);
+    }
+    se->conn.proto_major = arg->major;
+    se->conn.proto_minor = arg->minor;
+    se->conn.capable = 0;
+    se->conn.want = 0;
+
+    memset(&outarg, 0, sizeof(outarg));
+    outarg.major = FUSE_KERNEL_VERSION;
+    outarg.minor = FUSE_KERNEL_MINOR_VERSION;
+
+    if (arg->major < 7 || (arg->major == 7 && arg->minor < 31)) {
+        fuse_log(FUSE_LOG_ERR, "fuse: unsupported protocol version: %u.%u\n",
+                 arg->major, arg->minor);
+        fuse_reply_err(req, EPROTO);
+        return;
+    }
+
+    if (arg->major > 7) {
+        /* Wait for a second INIT request with a 7.X version */
+        send_reply_ok(req, &outarg, sizeof(outarg));
+        return;
+    }
+
+    if (arg->max_readahead < se->conn.max_readahead) {
+        se->conn.max_readahead = arg->max_readahead;
+    }
+    if (arg->flags & FUSE_ASYNC_READ) {
+        se->conn.capable |= FUSE_CAP_ASYNC_READ;
+    }
+    if (arg->flags & FUSE_POSIX_LOCKS) {
+        se->conn.capable |= FUSE_CAP_POSIX_LOCKS;
+    }
+    if (arg->flags & FUSE_ATOMIC_O_TRUNC) {
+        se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC;
+    }
+    if (arg->flags & FUSE_EXPORT_SUPPORT) {
+        se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT;
+    }
+    if (arg->flags & FUSE_DONT_MASK) {
+        se->conn.capable |= FUSE_CAP_DONT_MASK;
+    }
+    if (arg->flags & FUSE_FLOCK_LOCKS) {
+        se->conn.capable |= FUSE_CAP_FLOCK_LOCKS;
+    }
+    if (arg->flags & FUSE_AUTO_INVAL_DATA) {
+        se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA;
+    }
+    if (arg->flags & FUSE_DO_READDIRPLUS) {
+        se->conn.capable |= FUSE_CAP_READDIRPLUS;
+    }
+    if (arg->flags & FUSE_READDIRPLUS_AUTO) {
+        se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO;
+    }
+    if (arg->flags & FUSE_ASYNC_DIO) {
+        se->conn.capable |= FUSE_CAP_ASYNC_DIO;
+    }
+    if (arg->flags & FUSE_WRITEBACK_CACHE) {
+        se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE;
+    }
+    if (arg->flags & FUSE_NO_OPEN_SUPPORT) {
+        se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT;
+    }
+    if (arg->flags & FUSE_PARALLEL_DIROPS) {
+        se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS;
+    }
+    if (arg->flags & FUSE_POSIX_ACL) {
+        se->conn.capable |= FUSE_CAP_POSIX_ACL;
+    }
+    if (arg->flags & FUSE_HANDLE_KILLPRIV) {
+        se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV;
+    }
+    if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) {
+        se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT;
+    }
+    if (!(arg->flags & FUSE_MAX_PAGES)) {
+        size_t max_bufsize = FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() +
+                             FUSE_BUFFER_HEADER_SIZE;
+        if (bufsize > max_bufsize) {
+            bufsize = max_bufsize;
+        }
+    }
+#ifdef HAVE_SPLICE
+#ifdef HAVE_VMSPLICE
+    se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE;
+#endif
+    se->conn.capable |= FUSE_CAP_SPLICE_READ;
+#endif
+    se->conn.capable |= FUSE_CAP_IOCTL_DIR;
+
+    /*
+     * Default settings for modern filesystems.
+     *
+     * Most of these capabilities were disabled by default in
+     * libfuse2 for backwards compatibility reasons. In libfuse3,
+     * we can finally enable them by default (as long as they're
+     * supported by the kernel).
+     */
+#define LL_SET_DEFAULT(cond, cap)             \
+    if ((cond) && (se->conn.capable & (cap))) \
+        se->conn.want |= (cap)
+    LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_READ);
+    LL_SET_DEFAULT(1, FUSE_CAP_PARALLEL_DIROPS);
+    LL_SET_DEFAULT(1, FUSE_CAP_AUTO_INVAL_DATA);
+    LL_SET_DEFAULT(1, FUSE_CAP_HANDLE_KILLPRIV);
+    LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_DIO);
+    LL_SET_DEFAULT(1, FUSE_CAP_IOCTL_DIR);
+    LL_SET_DEFAULT(1, FUSE_CAP_ATOMIC_O_TRUNC);
+    LL_SET_DEFAULT(se->op.write_buf, FUSE_CAP_SPLICE_READ);
+    LL_SET_DEFAULT(se->op.getlk && se->op.setlk, FUSE_CAP_POSIX_LOCKS);
+    LL_SET_DEFAULT(se->op.flock, FUSE_CAP_FLOCK_LOCKS);
+    LL_SET_DEFAULT(se->op.readdirplus, FUSE_CAP_READDIRPLUS);
+    LL_SET_DEFAULT(se->op.readdirplus && se->op.readdir,
+                   FUSE_CAP_READDIRPLUS_AUTO);
+    se->conn.time_gran = 1;
+
+    if (bufsize < FUSE_MIN_READ_BUFFER) {
+        fuse_log(FUSE_LOG_ERR, "fuse: warning: buffer size too small: %zu\n",
+                 bufsize);
+        bufsize = FUSE_MIN_READ_BUFFER;
+    }
+    se->bufsize = bufsize;
+
+    if (se->conn.max_write > bufsize - FUSE_BUFFER_HEADER_SIZE) {
+        se->conn.max_write = bufsize - FUSE_BUFFER_HEADER_SIZE;
+    }
+
+    se->got_init = 1;
+    se->got_destroy = 0;
+    if (se->op.init) {
+        se->op.init(se->userdata, &se->conn);
+    }
+
+    if (se->conn.want & (~se->conn.capable)) {
+        fuse_log(FUSE_LOG_ERR,
+                 "fuse: error: filesystem requested capabilities "
+                 "0x%x that are not supported by kernel, aborting.\n",
+                 se->conn.want & (~se->conn.capable));
+        fuse_reply_err(req, EPROTO);
+        se->error = -EPROTO;
+        fuse_session_exit(se);
+        return;
+    }
+
+    if (se->conn.max_write < bufsize - FUSE_BUFFER_HEADER_SIZE) {
+        se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE;
+    }
+    if (arg->flags & FUSE_MAX_PAGES) {
+        outarg.flags |= FUSE_MAX_PAGES;
+        outarg.max_pages = (se->conn.max_write - 1) / getpagesize() + 1;
+    }
+
+    /*
+     * Always enable big writes, this is superseded
+     * by the max_write option
+     */
+    outarg.flags |= FUSE_BIG_WRITES;
+
+    if (se->conn.want & FUSE_CAP_ASYNC_READ) {
+        outarg.flags |= FUSE_ASYNC_READ;
+    }
+    if (se->conn.want & FUSE_CAP_PARALLEL_DIROPS) {
+        outarg.flags |= FUSE_PARALLEL_DIROPS;
+    }
+    if (se->conn.want & FUSE_CAP_POSIX_LOCKS) {
+        outarg.flags |= FUSE_POSIX_LOCKS;
+    }
+    if (se->conn.want & FUSE_CAP_ATOMIC_O_TRUNC) {
+        outarg.flags |= FUSE_ATOMIC_O_TRUNC;
+    }
+    if (se->conn.want & FUSE_CAP_EXPORT_SUPPORT) {
+        outarg.flags |= FUSE_EXPORT_SUPPORT;
+    }
+    if (se->conn.want & FUSE_CAP_DONT_MASK) {
+        outarg.flags |= FUSE_DONT_MASK;
+    }
+    if (se->conn.want & FUSE_CAP_FLOCK_LOCKS) {
+        outarg.flags |= FUSE_FLOCK_LOCKS;
+    }
+    if (se->conn.want & FUSE_CAP_AUTO_INVAL_DATA) {
+        outarg.flags |= FUSE_AUTO_INVAL_DATA;
+    }
+    if (se->conn.want & FUSE_CAP_READDIRPLUS) {
+        outarg.flags |= FUSE_DO_READDIRPLUS;
+    }
+    if (se->conn.want & FUSE_CAP_READDIRPLUS_AUTO) {
+        outarg.flags |= FUSE_READDIRPLUS_AUTO;
+    }
+    if (se->conn.want & FUSE_CAP_ASYNC_DIO) {
+        outarg.flags |= FUSE_ASYNC_DIO;
+    }
+    if (se->conn.want & FUSE_CAP_WRITEBACK_CACHE) {
+        outarg.flags |= FUSE_WRITEBACK_CACHE;
+    }
+    if (se->conn.want & FUSE_CAP_POSIX_ACL) {
+        outarg.flags |= FUSE_POSIX_ACL;
+    }
+    outarg.max_readahead = se->conn.max_readahead;
+    outarg.max_write = se->conn.max_write;
+    if (se->conn.max_background >= (1 << 16)) {
+        se->conn.max_background = (1 << 16) - 1;
+    }
+    if (se->conn.congestion_threshold > se->conn.max_background) {
+        se->conn.congestion_threshold = se->conn.max_background;
+    }
+    if (!se->conn.congestion_threshold) {
+        se->conn.congestion_threshold = se->conn.max_background * 3 / 4;
+    }
+
+    outarg.max_background = se->conn.max_background;
+    outarg.congestion_threshold = se->conn.congestion_threshold;
+    outarg.time_gran = se->conn.time_gran;
+
+    fuse_log(FUSE_LOG_DEBUG, "   INIT: %u.%u\n", outarg.major, outarg.minor);
+    fuse_log(FUSE_LOG_DEBUG, "   flags=0x%08x\n", outarg.flags);
+    fuse_log(FUSE_LOG_DEBUG, "   max_readahead=0x%08x\n", outarg.max_readahead);
+    fuse_log(FUSE_LOG_DEBUG, "   max_write=0x%08x\n", outarg.max_write);
+    fuse_log(FUSE_LOG_DEBUG, "   max_background=%i\n", outarg.max_background);
+    fuse_log(FUSE_LOG_DEBUG, "   congestion_threshold=%i\n",
+             outarg.congestion_threshold);
+    fuse_log(FUSE_LOG_DEBUG, "   time_gran=%u\n", outarg.time_gran);
+
+    send_reply_ok(req, &outarg, outargsize);
+}
+
+static void do_destroy(fuse_req_t req, fuse_ino_t nodeid,
+                       struct fuse_mbuf_iter *iter)
+{
+    struct fuse_session *se = req->se;
+
+    (void)nodeid;
+    (void)iter;
+
+    se->got_destroy = 1;
+    se->got_init = 0;
+    if (se->op.destroy) {
+        se->op.destroy(se->userdata);
+    }
+
+    send_reply_ok(req, NULL, 0);
+}
+
+static int send_notify_iov(struct fuse_session *se, int notify_code,
+                           struct iovec *iov, int count)
+{
+    struct fuse_out_header out = {
+        .error = notify_code,
+    };
+
+    if (!se->got_init) {
+        return -ENOTCONN;
+    }
+
+    iov[0].iov_base = &out;
+    iov[0].iov_len = sizeof(struct fuse_out_header);
+
+    return fuse_send_msg(se, NULL, iov, count);
+}
+
+int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph)
+{
+    if (ph != NULL) {
+        struct fuse_notify_poll_wakeup_out outarg = {
+            .kh = ph->kh,
+        };
+        struct iovec iov[2];
+
+        iov[1].iov_base = &outarg;
+        iov[1].iov_len = sizeof(outarg);
+
+        return send_notify_iov(ph->se, FUSE_NOTIFY_POLL, iov, 2);
+    } else {
+        return 0;
+    }
+}
+
+int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino,
+                                     off_t off, off_t len)
+{
+    struct fuse_notify_inval_inode_out outarg = {
+        .ino = ino,
+        .off = off,
+        .len = len,
+    };
+    struct iovec iov[2];
+
+    if (!se) {
+        return -EINVAL;
+    }
+
+    iov[1].iov_base = &outarg;
+    iov[1].iov_len = sizeof(outarg);
+
+    return send_notify_iov(se, FUSE_NOTIFY_INVAL_INODE, iov, 2);
+}
+
+int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent,
+                                     const char *name, size_t namelen)
+{
+    struct fuse_notify_inval_entry_out outarg = {
+        .parent = parent,
+        .namelen = namelen,
+    };
+    struct iovec iov[3];
+
+    if (!se) {
+        return -EINVAL;
+    }
+
+    iov[1].iov_base = &outarg;
+    iov[1].iov_len = sizeof(outarg);
+    iov[2].iov_base = (void *)name;
+    iov[2].iov_len = namelen + 1;
+
+    return send_notify_iov(se, FUSE_NOTIFY_INVAL_ENTRY, iov, 3);
+}
+
+int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent,
+                                fuse_ino_t child, const char *name,
+                                size_t namelen)
+{
+    struct fuse_notify_delete_out outarg = {
+        .parent = parent,
+        .child = child,
+        .namelen = namelen,
+    };
+    struct iovec iov[3];
+
+    if (!se) {
+        return -EINVAL;
+    }
+
+    iov[1].iov_base = &outarg;
+    iov[1].iov_len = sizeof(outarg);
+    iov[2].iov_base = (void *)name;
+    iov[2].iov_len = namelen + 1;
+
+    return send_notify_iov(se, FUSE_NOTIFY_DELETE, iov, 3);
+}
+
+int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino,
+                               off_t offset, struct fuse_bufvec *bufv)
+{
+    struct fuse_out_header out = {
+        .error = FUSE_NOTIFY_STORE,
+    };
+    struct fuse_notify_store_out outarg = {
+        .nodeid = ino,
+        .offset = offset,
+        .size = fuse_buf_size(bufv),
+    };
+    struct iovec iov[3];
+    int res;
+
+    if (!se) {
+        return -EINVAL;
+    }
+
+    iov[0].iov_base = &out;
+    iov[0].iov_len = sizeof(out);
+    iov[1].iov_base = &outarg;
+    iov[1].iov_len = sizeof(outarg);
+
+    res = fuse_send_data_iov(se, NULL, iov, 2, bufv);
+    if (res > 0) {
+        res = -res;
+    }
+
+    return res;
+}
+
+void *fuse_req_userdata(fuse_req_t req)
+{
+    return req->se->userdata;
+}
+
+const struct fuse_ctx *fuse_req_ctx(fuse_req_t req)
+{
+    return &req->ctx;
+}
+
+void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func,
+                             void *data)
+{
+    pthread_mutex_lock(&req->lock);
+    pthread_mutex_lock(&req->se->lock);
+    req->u.ni.func = func;
+    req->u.ni.data = data;
+    pthread_mutex_unlock(&req->se->lock);
+    if (req->interrupted && func) {
+        func(req, data);
+    }
+    pthread_mutex_unlock(&req->lock);
+}
+
+int fuse_req_interrupted(fuse_req_t req)
+{
+    int interrupted;
+
+    pthread_mutex_lock(&req->se->lock);
+    interrupted = req->interrupted;
+    pthread_mutex_unlock(&req->se->lock);
+
+    return interrupted;
+}
+
+static struct {
+    void (*func)(fuse_req_t, fuse_ino_t, struct fuse_mbuf_iter *);
+    const char *name;
+} fuse_ll_ops[] = {
+    [FUSE_LOOKUP] = { do_lookup, "LOOKUP" },
+    [FUSE_FORGET] = { do_forget, "FORGET" },
+    [FUSE_GETATTR] = { do_getattr, "GETATTR" },
+    [FUSE_SETATTR] = { do_setattr, "SETATTR" },
+    [FUSE_READLINK] = { do_readlink, "READLINK" },
+    [FUSE_SYMLINK] = { do_symlink, "SYMLINK" },
+    [FUSE_MKNOD] = { do_mknod, "MKNOD" },
+    [FUSE_MKDIR] = { do_mkdir, "MKDIR" },
+    [FUSE_UNLINK] = { do_unlink, "UNLINK" },
+    [FUSE_RMDIR] = { do_rmdir, "RMDIR" },
+    [FUSE_RENAME] = { do_rename, "RENAME" },
+    [FUSE_LINK] = { do_link, "LINK" },
+    [FUSE_OPEN] = { do_open, "OPEN" },
+    [FUSE_READ] = { do_read, "READ" },
+    [FUSE_WRITE] = { do_write, "WRITE" },
+    [FUSE_STATFS] = { do_statfs, "STATFS" },
+    [FUSE_RELEASE] = { do_release, "RELEASE" },
+    [FUSE_FSYNC] = { do_fsync, "FSYNC" },
+    [FUSE_SETXATTR] = { do_setxattr, "SETXATTR" },
+    [FUSE_GETXATTR] = { do_getxattr, "GETXATTR" },
+    [FUSE_LISTXATTR] = { do_listxattr, "LISTXATTR" },
+    [FUSE_REMOVEXATTR] = { do_removexattr, "REMOVEXATTR" },
+    [FUSE_FLUSH] = { do_flush, "FLUSH" },
+    [FUSE_INIT] = { do_init, "INIT" },
+    [FUSE_OPENDIR] = { do_opendir, "OPENDIR" },
+    [FUSE_READDIR] = { do_readdir, "READDIR" },
+    [FUSE_RELEASEDIR] = { do_releasedir, "RELEASEDIR" },
+    [FUSE_FSYNCDIR] = { do_fsyncdir, "FSYNCDIR" },
+    [FUSE_GETLK] = { do_getlk, "GETLK" },
+    [FUSE_SETLK] = { do_setlk, "SETLK" },
+    [FUSE_SETLKW] = { do_setlkw, "SETLKW" },
+    [FUSE_ACCESS] = { do_access, "ACCESS" },
+    [FUSE_CREATE] = { do_create, "CREATE" },
+    [FUSE_INTERRUPT] = { do_interrupt, "INTERRUPT" },
+    [FUSE_BMAP] = { do_bmap, "BMAP" },
+    [FUSE_IOCTL] = { do_ioctl, "IOCTL" },
+    [FUSE_POLL] = { do_poll, "POLL" },
+    [FUSE_FALLOCATE] = { do_fallocate, "FALLOCATE" },
+    [FUSE_DESTROY] = { do_destroy, "DESTROY" },
+    [FUSE_NOTIFY_REPLY] = { NULL, "NOTIFY_REPLY" },
+    [FUSE_BATCH_FORGET] = { do_batch_forget, "BATCH_FORGET" },
+    [FUSE_READDIRPLUS] = { do_readdirplus, "READDIRPLUS" },
+    [FUSE_RENAME2] = { do_rename2, "RENAME2" },
+    [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" },
+    [FUSE_LSEEK] = { do_lseek, "LSEEK" },
+};
+
+#define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0]))
+
+static const char *opname(enum fuse_opcode opcode)
+{
+    if (opcode >= FUSE_MAXOP || !fuse_ll_ops[opcode].name) {
+        return "???";
+    } else {
+        return fuse_ll_ops[opcode].name;
+    }
+}
+
+void fuse_session_process_buf(struct fuse_session *se,
+                              const struct fuse_buf *buf)
+{
+    struct fuse_bufvec bufv = { .buf[0] = *buf, .count = 1 };
+    fuse_session_process_buf_int(se, &bufv, NULL);
+}
+
+/*
+ * Restriction:
+ *   bufv is normally a single entry buffer, except for a write
+ *   where (if it's in memory) then the bufv may be multiple entries,
+ *   where the first entry contains all headers and subsequent entries
+ *   contain data
+ *   bufv shall not use any offsets etc to make the data anything
+ *   other than contiguous starting from 0.
+ */
+void fuse_session_process_buf_int(struct fuse_session *se,
+                                  struct fuse_bufvec *bufv,
+                                  struct fuse_chan *ch)
+{
+    const struct fuse_buf *buf = bufv->buf;
+    struct fuse_mbuf_iter iter = FUSE_MBUF_ITER_INIT(buf);
+    struct fuse_in_header *in;
+    struct fuse_req *req;
+    int err;
+
+    /* The first buffer must be a memory buffer */
+    assert(!(buf->flags & FUSE_BUF_IS_FD));
+
+    in = fuse_mbuf_iter_advance(&iter, sizeof(*in));
+    assert(in); /* caller guarantees the input buffer is large enough */
+
+    fuse_log(
+        FUSE_LOG_DEBUG,
+        "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, pid: %u\n",
+        (unsigned long long)in->unique, opname((enum fuse_opcode)in->opcode),
+        in->opcode, (unsigned long long)in->nodeid, buf->size, in->pid);
+
+    req = fuse_ll_alloc_req(se);
+    if (req == NULL) {
+        struct fuse_out_header out = {
+            .unique = in->unique,
+            .error = -ENOMEM,
+        };
+        struct iovec iov = {
+            .iov_base = &out,
+            .iov_len = sizeof(struct fuse_out_header),
+        };
+
+        fuse_send_msg(se, ch, &iov, 1);
+        return;
+    }
+
+    req->unique = in->unique;
+    req->ctx.uid = in->uid;
+    req->ctx.gid = in->gid;
+    req->ctx.pid = in->pid;
+    req->ch = ch;
+
+    /*
+     * INIT and DESTROY requests are serialized, all other request types
+     * run in parallel.  This prevents races between FUSE_INIT and ordinary
+     * requests, FUSE_INIT and FUSE_INIT, FUSE_INIT and FUSE_DESTROY, and
+     * FUSE_DESTROY and FUSE_DESTROY.
+     */
+    if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT ||
+        in->opcode == FUSE_DESTROY) {
+        pthread_rwlock_wrlock(&se->init_rwlock);
+    } else {
+        pthread_rwlock_rdlock(&se->init_rwlock);
+    }
+
+    err = EIO;
+    if (!se->got_init) {
+        enum fuse_opcode expected;
+
+        expected = se->cuse_data ? CUSE_INIT : FUSE_INIT;
+        if (in->opcode != expected) {
+            goto reply_err;
+        }
+    } else if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT) {
+        if (fuse_lowlevel_is_virtio(se)) {
+            /*
+             * TODO: This is after a hard reboot typically, we need to do
+             * a destroy, but we can't reply to this request yet so
+             * we can't use do_destroy
+             */
+            fuse_log(FUSE_LOG_DEBUG, "%s: reinit\n", __func__);
+            se->got_destroy = 1;
+            se->got_init = 0;
+            if (se->op.destroy) {
+                se->op.destroy(se->userdata);
+            }
+        } else {
+            goto reply_err;
+        }
+    }
+
+    err = EACCES;
+    /* Implement -o allow_root */
+    if (se->deny_others && in->uid != se->owner && in->uid != 0 &&
+        in->opcode != FUSE_INIT && in->opcode != FUSE_READ &&
+        in->opcode != FUSE_WRITE && in->opcode != FUSE_FSYNC &&
+        in->opcode != FUSE_RELEASE && in->opcode != FUSE_READDIR &&
+        in->opcode != FUSE_FSYNCDIR && in->opcode != FUSE_RELEASEDIR &&
+        in->opcode != FUSE_NOTIFY_REPLY && in->opcode != FUSE_READDIRPLUS) {
+        goto reply_err;
+    }
+
+    err = ENOSYS;
+    if (in->opcode >= FUSE_MAXOP || !fuse_ll_ops[in->opcode].func) {
+        goto reply_err;
+    }
+    if (in->opcode != FUSE_INTERRUPT) {
+        struct fuse_req *intr;
+        pthread_mutex_lock(&se->lock);
+        intr = check_interrupt(se, req);
+        list_add_req(req, &se->list);
+        pthread_mutex_unlock(&se->lock);
+        if (intr) {
+            fuse_reply_err(intr, EAGAIN);
+        }
+    }
+
+    if (in->opcode == FUSE_WRITE && se->op.write_buf) {
+        do_write_buf(req, in->nodeid, &iter, bufv);
+    } else {
+        fuse_ll_ops[in->opcode].func(req, in->nodeid, &iter);
+    }
+
+    pthread_rwlock_unlock(&se->init_rwlock);
+    return;
+
+reply_err:
+    fuse_reply_err(req, err);
+    pthread_rwlock_unlock(&se->init_rwlock);
+}
+
+#define LL_OPTION(n, o, v)                     \
+    {                                          \
+        n, offsetof(struct fuse_session, o), v \
+    }
+
+static const struct fuse_opt fuse_ll_opts[] = {
+    LL_OPTION("debug", debug, 1),
+    LL_OPTION("-d", debug, 1),
+    LL_OPTION("--debug", debug, 1),
+    LL_OPTION("allow_root", deny_others, 1),
+    LL_OPTION("--socket-path=%s", vu_socket_path, 0),
+    LL_OPTION("--fd=%d", vu_listen_fd, 0),
+    LL_OPTION("--thread-pool-size=%d", thread_pool_size, 0),
+    FUSE_OPT_END
+};
+
+void fuse_lowlevel_version(void)
+{
+    printf("using FUSE kernel interface version %i.%i\n", FUSE_KERNEL_VERSION,
+           FUSE_KERNEL_MINOR_VERSION);
+}
+
+void fuse_lowlevel_help(void)
+{
+    /*
+     * These are not all options, but the ones that are
+     * potentially of interest to an end-user
+     */
+    printf(
+        "    -o allow_root              allow access by root\n"
+        "    --socket-path=PATH         path for the vhost-user socket\n"
+        "    --fd=FDNUM                 fd number of vhost-user socket\n"
+        "    --thread-pool-size=NUM     thread pool size limit (default %d)\n",
+        THREAD_POOL_SIZE);
+}
+
+void fuse_session_destroy(struct fuse_session *se)
+{
+    if (se->got_init && !se->got_destroy) {
+        if (se->op.destroy) {
+            se->op.destroy(se->userdata);
+        }
+    }
+    pthread_rwlock_destroy(&se->init_rwlock);
+    pthread_mutex_destroy(&se->lock);
+    free(se->cuse_data);
+    if (se->fd != -1) {
+        close(se->fd);
+    }
+
+    if (fuse_lowlevel_is_virtio(se)) {
+        virtio_session_close(se);
+    }
+
+    free(se->vu_socket_path);
+    se->vu_socket_path = NULL;
+
+    free(se);
+}
+
+
+struct fuse_session *fuse_session_new(struct fuse_args *args,
+                                      const struct fuse_lowlevel_ops *op,
+                                      size_t op_size, void *userdata)
+{
+    struct fuse_session *se;
+
+    if (sizeof(struct fuse_lowlevel_ops) < op_size) {
+        fuse_log(
+            FUSE_LOG_ERR,
+            "fuse: warning: library too old, some operations may not work\n");
+        op_size = sizeof(struct fuse_lowlevel_ops);
+    }
+
+    if (args->argc == 0) {
+        fuse_log(FUSE_LOG_ERR,
+                 "fuse: empty argv passed to fuse_session_new().\n");
+        return NULL;
+    }
+
+    se = (struct fuse_session *)calloc(1, sizeof(struct fuse_session));
+    if (se == NULL) {
+        fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate fuse object\n");
+        goto out1;
+    }
+    se->fd = -1;
+    se->vu_listen_fd = -1;
+    se->thread_pool_size = THREAD_POOL_SIZE;
+    se->conn.max_write = UINT_MAX;
+    se->conn.max_readahead = UINT_MAX;
+
+    /* Parse options */
+    if (fuse_opt_parse(args, se, fuse_ll_opts, NULL) == -1) {
+        goto out2;
+    }
+    if (args->argc == 1 && args->argv[0][0] == '-') {
+        fuse_log(FUSE_LOG_ERR,
+                 "fuse: warning: argv[0] looks like an option, but "
+                 "will be ignored\n");
+    } else if (args->argc != 1) {
+        int i;
+        fuse_log(FUSE_LOG_ERR, "fuse: unknown option(s): `");
+        for (i = 1; i < args->argc - 1; i++) {
+            fuse_log(FUSE_LOG_ERR, "%s ", args->argv[i]);
+        }
+        fuse_log(FUSE_LOG_ERR, "%s'\n", args->argv[i]);
+        goto out4;
+    }
+
+    if (!se->vu_socket_path && se->vu_listen_fd < 0) {
+        fuse_log(FUSE_LOG_ERR, "fuse: missing --socket-path or --fd option\n");
+        goto out4;
+    }
+    if (se->vu_socket_path && se->vu_listen_fd >= 0) {
+        fuse_log(FUSE_LOG_ERR,
+                 "fuse: --socket-path and --fd cannot be given together\n");
+        goto out4;
+    }
+
+    se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + FUSE_BUFFER_HEADER_SIZE;
+
+    list_init_req(&se->list);
+    list_init_req(&se->interrupts);
+    fuse_mutex_init(&se->lock);
+    pthread_rwlock_init(&se->init_rwlock, NULL);
+
+    memcpy(&se->op, op, op_size);
+    se->owner = getuid();
+    se->userdata = userdata;
+
+    return se;
+
+out4:
+    fuse_opt_free_args(args);
+out2:
+    free(se);
+out1:
+    return NULL;
+}
+
+int fuse_session_mount(struct fuse_session *se)
+{
+    return virtio_session_mount(se);
+}
+
+int fuse_session_fd(struct fuse_session *se)
+{
+    return se->fd;
+}
+
+void fuse_session_unmount(struct fuse_session *se)
+{
+}
+
+int fuse_lowlevel_is_virtio(struct fuse_session *se)
+{
+    return !!se->virtio_dev;
+}
+
+#ifdef linux
+int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[])
+{
+    char *buf;
+    size_t bufsize = 1024;
+    char path[128];
+    int ret;
+    int fd;
+    unsigned long pid = req->ctx.pid;
+    char *s;
+
+    sprintf(path, "/proc/%lu/task/%lu/status", pid, pid);
+
+retry:
+    buf = malloc(bufsize);
+    if (buf == NULL) {
+        return -ENOMEM;
+    }
+
+    ret = -EIO;
+    fd = open(path, O_RDONLY);
+    if (fd == -1) {
+        goto out_free;
+    }
+
+    ret = read(fd, buf, bufsize);
+    close(fd);
+    if (ret < 0) {
+        ret = -EIO;
+        goto out_free;
+    }
+
+    if ((size_t)ret == bufsize) {
+        free(buf);
+        bufsize *= 4;
+        goto retry;
+    }
+
+    ret = -EIO;
+    s = strstr(buf, "\nGroups:");
+    if (s == NULL) {
+        goto out_free;
+    }
+
+    s += 8;
+    ret = 0;
+    while (1) {
+        char *end;
+        unsigned long val = strtoul(s, &end, 0);
+        if (end == s) {
+            break;
+        }
+
+        s = end;
+        if (ret < size) {
+            list[ret] = val;
+        }
+        ret++;
+    }
+
+out_free:
+    free(buf);
+    return ret;
+}
+#else /* linux */
+/*
+ * This is currently not implemented on other than Linux...
+ */
+int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[])
+{
+    (void)req;
+    (void)size;
+    (void)list;
+    return -ENOSYS;
+}
+#endif
+
+void fuse_session_exit(struct fuse_session *se)
+{
+    se->exited = 1;
+}
+
+void fuse_session_reset(struct fuse_session *se)
+{
+    se->exited = 0;
+    se->error = 0;
+}
+
+int fuse_session_exited(struct fuse_session *se)
+{
+    return se->exited;
+}
diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h
new file mode 100644
index 0000000000..138041e5f1
--- /dev/null
+++ b/tools/virtiofsd/fuse_lowlevel.h
@@ -0,0 +1,1991 @@
+/*
+ * FUSE: Filesystem in Userspace
+ * Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
+ *
+ * This program can be distributed under the terms of the GNU LGPLv2.
+ * See the file COPYING.LIB.
+ */
+
+#ifndef FUSE_LOWLEVEL_H_
+#define FUSE_LOWLEVEL_H_
+
+/**
+ * @file
+ *
+ * Low level API
+ *
+ * IMPORTANT: you should define FUSE_USE_VERSION before including this
+ * header.  To use the newest API define it to 31 (recommended for any
+ * new application).
+ */
+
+#ifndef FUSE_USE_VERSION
+#error FUSE_USE_VERSION not defined
+#endif
+
+#include "fuse_common.h"
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/statvfs.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <utime.h>
+
+/*
+ * Miscellaneous definitions
+ */
+
+/** The node ID of the root inode */
+#define FUSE_ROOT_ID 1
+
+/** Inode number type */
+typedef uint64_t fuse_ino_t;
+
+/** Request pointer type */
+typedef struct fuse_req *fuse_req_t;
+
+/**
+ * Session
+ *
+ * This provides hooks for processing requests, and exiting
+ */
+struct fuse_session;
+
+/** Directory entry parameters supplied to fuse_reply_entry() */
+struct fuse_entry_param {
+    /**
+     * Unique inode number
+     *
+     * In lookup, zero means negative entry (from version 2.5)
+     * Returning ENOENT also means negative entry, but by setting zero
+     * ino the kernel may cache negative entries for entry_timeout
+     * seconds.
+     */
+    fuse_ino_t ino;
+
+    /**
+     * Generation number for this entry.
+     *
+     * If the file system will be exported over NFS, the
+     * ino/generation pairs need to be unique over the file
+     * system's lifetime (rather than just the mount time). So if
+     * the file system reuses an inode after it has been deleted,
+     * it must assign a new, previously unused generation number
+     * to the inode at the same time.
+     *
+     */
+    uint64_t generation;
+
+    /**
+     * Inode attributes.
+     *
+     * Even if attr_timeout == 0, attr must be correct. For example,
+     * for open(), FUSE uses attr.st_size from lookup() to determine
+     * how many bytes to request. If this value is not correct,
+     * incorrect data will be returned.
+     */
+    struct stat attr;
+
+    /**
+     * Validity timeout (in seconds) for inode attributes. If
+     *  attributes only change as a result of requests that come
+     *  through the kernel, this should be set to a very large
+     *  value.
+     */
+    double attr_timeout;
+
+    /**
+     * Validity timeout (in seconds) for the name. If directory
+     *  entries are changed/deleted only as a result of requests
+     *  that come through the kernel, this should be set to a very
+     *  large value.
+     */
+    double entry_timeout;
+};
+
+/**
+ * Additional context associated with requests.
+ *
+ * Note that the reported client uid, gid and pid may be zero in some
+ * situations. For example, if the FUSE file system is running in a
+ * PID or user namespace but then accessed from outside the namespace,
+ * there is no valid uid/pid/gid that could be reported.
+ */
+struct fuse_ctx {
+    /** User ID of the calling process */
+    uid_t uid;
+
+    /** Group ID of the calling process */
+    gid_t gid;
+
+    /** Thread ID of the calling process */
+    pid_t pid;
+
+    /** Umask of the calling process */
+    mode_t umask;
+};
+
+struct fuse_forget_data {
+    fuse_ino_t ino;
+    uint64_t nlookup;
+};
+
+/* 'to_set' flags in setattr */
+#define FUSE_SET_ATTR_MODE (1 << 0)
+#define FUSE_SET_ATTR_UID (1 << 1)
+#define FUSE_SET_ATTR_GID (1 << 2)
+#define FUSE_SET_ATTR_SIZE (1 << 3)
+#define FUSE_SET_ATTR_ATIME (1 << 4)
+#define FUSE_SET_ATTR_MTIME (1 << 5)
+#define FUSE_SET_ATTR_ATIME_NOW (1 << 7)
+#define FUSE_SET_ATTR_MTIME_NOW (1 << 8)
+#define FUSE_SET_ATTR_CTIME (1 << 10)
+
+/*
+ * Request methods and replies
+ */
+
+/**
+ * Low level filesystem operations
+ *
+ * Most of the methods (with the exception of init and destroy)
+ * receive a request handle (fuse_req_t) as their first argument.
+ * This handle must be passed to one of the specified reply functions.
+ *
+ * This may be done inside the method invocation, or after the call
+ * has returned.  The request handle is valid until one of the reply
+ * functions is called.
+ *
+ * Other pointer arguments (name, fuse_file_info, etc) are not valid
+ * after the call has returned, so if they are needed later, their
+ * contents have to be copied.
+ *
+ * In general, all methods are expected to perform any necessary
+ * permission checking. However, a filesystem may delegate this task
+ * to the kernel by passing the `default_permissions` mount option to
+ * `fuse_session_new()`. In this case, methods will only be called if
+ * the kernel's permission check has succeeded.
+ *
+ * The filesystem sometimes needs to handle a return value of -ENOENT
+ * from the reply function, which means, that the request was
+ * interrupted, and the reply discarded.  For example if
+ * fuse_reply_open() return -ENOENT means, that the release method for
+ * this file will not be called.
+ */
+struct fuse_lowlevel_ops {
+    /**
+     * Initialize filesystem
+     *
+     * This function is called when libfuse establishes
+     * communication with the FUSE kernel module. The file system
+     * should use this module to inspect and/or modify the
+     * connection parameters provided in the `conn` structure.
+     *
+     * Note that some parameters may be overwritten by options
+     * passed to fuse_session_new() which take precedence over the
+     * values set in this handler.
+     *
+     * There's no reply to this function
+     *
+     * @param userdata the user data passed to fuse_session_new()
+     */
+    void (*init)(void *userdata, struct fuse_conn_info *conn);
+
+    /**
+     * Clean up filesystem.
+     *
+     * Called on filesystem exit. When this method is called, the
+     * connection to the kernel may be gone already, so that eg. calls
+     * to fuse_lowlevel_notify_* will fail.
+     *
+     * There's no reply to this function
+     *
+     * @param userdata the user data passed to fuse_session_new()
+     */
+    void (*destroy)(void *userdata);
+
+    /**
+     * Look up a directory entry by name and get its attributes.
+     *
+     * Valid replies:
+     *   fuse_reply_entry
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param parent inode number of the parent directory
+     * @param name the name to look up
+     */
+    void (*lookup)(fuse_req_t req, fuse_ino_t parent, const char *name);
+
+    /**
+     * Forget about an inode
+     *
+     * This function is called when the kernel removes an inode
+     * from its internal caches.
+     *
+     * The inode's lookup count increases by one for every call to
+     * fuse_reply_entry and fuse_reply_create. The nlookup parameter
+     * indicates by how much the lookup count should be decreased.
+     *
+     * Inodes with a non-zero lookup count may receive request from
+     * the kernel even after calls to unlink, rmdir or (when
+     * overwriting an existing file) rename. Filesystems must handle
+     * such requests properly and it is recommended to defer removal
+     * of the inode until the lookup count reaches zero. Calls to
+     * unlink, rmdir or rename will be followed closely by forget
+     * unless the file or directory is open, in which case the
+     * kernel issues forget only after the release or releasedir
+     * calls.
+     *
+     * Note that if a file system will be exported over NFS the
+     * inodes lifetime must extend even beyond forget. See the
+     * generation field in struct fuse_entry_param above.
+     *
+     * On unmount the lookup count for all inodes implicitly drops
+     * to zero. It is not guaranteed that the file system will
+     * receive corresponding forget messages for the affected
+     * inodes.
+     *
+     * Valid replies:
+     *   fuse_reply_none
+     *
+     * @param req request handle
+     * @param ino the inode number
+     * @param nlookup the number of lookups to forget
+     */
+    void (*forget)(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup);
+
+    /**
+     * Get file attributes.
+     *
+     * If writeback caching is enabled, the kernel may have a
+     * better idea of a file's length than the FUSE file system
+     * (eg if there has been a write that extended the file size,
+     * but that has not yet been passed to the filesystem.n
+     *
+     * In this case, the st_size value provided by the file system
+     * will be ignored.
+     *
+     * Valid replies:
+     *   fuse_reply_attr
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param ino the inode number
+     * @param fi for future use, currently always NULL
+     */
+    void (*getattr)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi);
+
+    /**
+     * Set file attributes
+     *
+     * In the 'attr' argument only members indicated by the 'to_set'
+     * bitmask contain valid values.  Other members contain undefined
+     * values.
+     *
+     * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is
+     * expected to reset the setuid and setgid bits if the file
+     * size or owner is being changed.
+     *
+     * If the setattr was invoked from the ftruncate() system call
+     * under Linux kernel versions 2.6.15 or later, the fi->fh will
+     * contain the value set by the open method or will be undefined
+     * if the open method didn't set any value.  Otherwise (not
+     * ftruncate call, or kernel version earlier than 2.6.15) the fi
+     * parameter will be NULL.
+     *
+     * Valid replies:
+     *   fuse_reply_attr
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param ino the inode number
+     * @param attr the attributes
+     * @param to_set bit mask of attributes which should be set
+     * @param fi file information, or NULL
+     */
+    void (*setattr)(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
+                    int to_set, struct fuse_file_info *fi);
+
+    /**
+     * Read symbolic link
+     *
+     * Valid replies:
+     *   fuse_reply_readlink
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param ino the inode number
+     */
+    void (*readlink)(fuse_req_t req, fuse_ino_t ino);
+
+    /**
+     * Create file node
+     *
+     * Create a regular file, character device, block device, fifo or
+     * socket node.
+     *
+     * Valid replies:
+     *   fuse_reply_entry
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param parent inode number of the parent directory
+     * @param name to create
+     * @param mode file type and mode with which to create the new file
+     * @param rdev the device number (only valid if created file is a device)
+     */
+    void (*mknod)(fuse_req_t req, fuse_ino_t parent, const char *name,
+                  mode_t mode, dev_t rdev);
+
+    /**
+     * Create a directory
+     *
+     * Valid replies:
+     *   fuse_reply_entry
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param parent inode number of the parent directory
+     * @param name to create
+     * @param mode with which to create the new file
+     */
+    void (*mkdir)(fuse_req_t req, fuse_ino_t parent, const char *name,
+                  mode_t mode);
+
+    /**
+     * Remove a file
+     *
+     * If the file's inode's lookup count is non-zero, the file
+     * system is expected to postpone any removal of the inode
+     * until the lookup count reaches zero (see description of the
+     * forget function).
+     *
+     * Valid replies:
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param parent inode number of the parent directory
+     * @param name to remove
+     */
+    void (*unlink)(fuse_req_t req, fuse_ino_t parent, const char *name);
+
+    /**
+     * Remove a directory
+     *
+     * If the directory's inode's lookup count is non-zero, the
+     * file system is expected to postpone any removal of the
+     * inode until the lookup count reaches zero (see description
+     * of the forget function).
+     *
+     * Valid replies:
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param parent inode number of the parent directory
+     * @param name to remove
+     */
+    void (*rmdir)(fuse_req_t req, fuse_ino_t parent, const char *name);
+
+    /**
+     * Create a symbolic link
+     *
+     * Valid replies:
+     *   fuse_reply_entry
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param link the contents of the symbolic link
+     * @param parent inode number of the parent directory
+     * @param name to create
+     */
+    void (*symlink)(fuse_req_t req, const char *link, fuse_ino_t parent,
+                    const char *name);
+
+    /**
+     * Rename a file
+     *
+     * If the target exists it should be atomically replaced. If
+     * the target's inode's lookup count is non-zero, the file
+     * system is expected to postpone any removal of the inode
+     * until the lookup count reaches zero (see description of the
+     * forget function).
+     *
+     * If this request is answered with an error code of ENOSYS, this is
+     * treated as a permanent failure with error code EINVAL, i.e. all
+     * future bmap requests will fail with EINVAL without being
+     * send to the filesystem process.
+     *
+     * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If
+     * RENAME_NOREPLACE is specified, the filesystem must not
+     * overwrite *newname* if it exists and return an error
+     * instead. If `RENAME_EXCHANGE` is specified, the filesystem
+     * must atomically exchange the two files, i.e. both must
+     * exist and neither may be deleted.
+     *
+     * Valid replies:
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param parent inode number of the old parent directory
+     * @param name old name
+     * @param newparent inode number of the new parent directory
+     * @param newname new name
+     */
+    void (*rename)(fuse_req_t req, fuse_ino_t parent, const char *name,
+                   fuse_ino_t newparent, const char *newname,
+                   unsigned int flags);
+
+    /**
+     * Create a hard link
+     *
+     * Valid replies:
+     *   fuse_reply_entry
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param ino the old inode number
+     * @param newparent inode number of the new parent directory
+     * @param newname new name to create
+     */
+    void (*link)(fuse_req_t req, fuse_ino_t ino, fuse_ino_t newparent,
+                 const char *newname);
+
+    /**
+     * Open a file
+     *
+     * Open flags are available in fi->flags. The following rules
+     * apply.
+     *
+     *  - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be
+     *    filtered out / handled by the kernel.
+     *
+     *  - Access modes (O_RDONLY, O_WRONLY, O_RDWR) should be used
+     *    by the filesystem to check if the operation is
+     *    permitted.  If the ``-o default_permissions`` mount
+     *    option is given, this check is already done by the
+     *    kernel before calling open() and may thus be omitted by
+     *    the filesystem.
+     *
+     *  - When writeback caching is enabled, the kernel may send
+     *    read requests even for files opened with O_WRONLY. The
+     *    filesystem should be prepared to handle this.
+     *
+     *  - When writeback caching is disabled, the filesystem is
+     *    expected to properly handle the O_APPEND flag and ensure
+     *    that each write is appending to the end of the file.
+     *
+     *  - When writeback caching is enabled, the kernel will
+     *    handle O_APPEND. However, unless all changes to the file
+     *    come through the kernel this will not work reliably. The
+     *    filesystem should thus either ignore the O_APPEND flag
+     *    (and let the kernel handle it), or return an error
+     *    (indicating that reliably O_APPEND is not available).
+     *
+     * Filesystem may store an arbitrary file handle (pointer,
+     * index, etc) in fi->fh, and use this in other all other file
+     * operations (read, write, flush, release, fsync).
+     *
+     * Filesystem may also implement stateless file I/O and not store
+     * anything in fi->fh.
+     *
+     * There are also some flags (direct_io, keep_cache) which the
+     * filesystem may set in fi, to change the way the file is opened.
+     * See fuse_file_info structure in <fuse_common.h> for more details.
+     *
+     * If this request is answered with an error code of ENOSYS
+     * and FUSE_CAP_NO_OPEN_SUPPORT is set in
+     * `fuse_conn_info.capable`, this is treated as success and
+     * future calls to open and release will also succeed without being
+     * sent to the filesystem process.
+     *
+     * Valid replies:
+     *   fuse_reply_open
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param ino the inode number
+     * @param fi file information
+     */
+    void (*open)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi);
+
+    /**
+     * Read data
+     *
+     * Read should send exactly the number of bytes requested except
+     * on EOF or error, otherwise the rest of the data will be
+     * substituted with zeroes.  An exception to this is when the file
+     * has been opened in 'direct_io' mode, in which case the return
+     * value of the read system call will reflect the return value of
+     * this operation.
+     *
+     * fi->fh will contain the value set by the open method, or will
+     * be undefined if the open method didn't set any value.
+     *
+     * Valid replies:
+     *   fuse_reply_buf
+     *   fuse_reply_iov
+     *   fuse_reply_data
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param ino the inode number
+     * @param size number of bytes to read
+     * @param off offset to read from
+     * @param fi file information
+     */
+    void (*read)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off,
+                 struct fuse_file_info *fi);
+
+    /**
+     * Write data
+     *
+     * Write should return exactly the number of bytes requested
+     * except on error.  An exception to this is when the file has
+     * been opened in 'direct_io' mode, in which case the return value
+     * of the write system call will reflect the return value of this
+     * operation.
+     *
+     * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is
+     * expected to reset the setuid and setgid bits.
+     *
+     * fi->fh will contain the value set by the open method, or will
+     * be undefined if the open method didn't set any value.
+     *
+     * Valid replies:
+     *   fuse_reply_write
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param ino the inode number
+     * @param buf data to write
+     * @param size number of bytes to write
+     * @param off offset to write to
+     * @param fi file information
+     */
+    void (*write)(fuse_req_t req, fuse_ino_t ino, const char *buf, size_t size,
+                  off_t off, struct fuse_file_info *fi);
+
+    /**
+     * Flush method
+     *
+     * This is called on each close() of the opened file.
+     *
+     * Since file descriptors can be duplicated (dup, dup2, fork), for
+     * one open call there may be many flush calls.
+     *
+     * Filesystems shouldn't assume that flush will always be called
+     * after some writes, or that if will be called at all.
+     *
+     * fi->fh will contain the value set by the open method, or will
+     * be undefined if the open method didn't set any value.
+     *
+     * NOTE: the name of the method is misleading, since (unlike
+     * fsync) the filesystem is not forced to flush pending writes.
+     * One reason to flush data is if the filesystem wants to return
+     * write errors during close.  However, such use is non-portable
+     * because POSIX does not require [close] to wait for delayed I/O to
+     * complete.
+     *
+     * If the filesystem supports file locking operations (setlk,
+     * getlk) it should remove all locks belonging to 'fi->owner'.
+     *
+     * If this request is answered with an error code of ENOSYS,
+     * this is treated as success and future calls to flush() will
+     * succeed automatically without being send to the filesystem
+     * process.
+     *
+     * Valid replies:
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param ino the inode number
+     * @param fi file information
+     *
+     * [close]:
+     * http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html
+     */
+    void (*flush)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi);
+
+    /**
+     * Release an open file
+     *
+     * Release is called when there are no more references to an open
+     * file: all file descriptors are closed and all memory mappings
+     * are unmapped.
+     *
+     * For every open call there will be exactly one release call (unless
+     * the filesystem is force-unmounted).
+     *
+     * The filesystem may reply with an error, but error values are
+     * not returned to close() or munmap() which triggered the
+     * release.
+     *
+     * fi->fh will contain the value set by the open method, or will
+     * be undefined if the open method didn't set any value.
+     * fi->flags will contain the same flags as for open.
+     *
+     * Valid replies:
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param ino the inode number
+     * @param fi file information
+     */
+    void (*release)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi);
+
+    /**
+     * Synchronize file contents
+     *
+     * If the datasync parameter is non-zero, then only the user data
+     * should be flushed, not the meta data.
+     *
+     * If this request is answered with an error code of ENOSYS,
+     * this is treated as success and future calls to fsync() will
+     * succeed automatically without being send to the filesystem
+     * process.
+     *
+     * Valid replies:
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param ino the inode number
+     * @param datasync flag indicating if only data should be flushed
+     * @param fi file information
+     */
+    void (*fsync)(fuse_req_t req, fuse_ino_t ino, int datasync,
+                  struct fuse_file_info *fi);
+
+    /**
+     * Open a directory
+     *
+     * Filesystem may store an arbitrary file handle (pointer, index,
+     * etc) in fi->fh, and use this in other all other directory
+     * stream operations (readdir, releasedir, fsyncdir).
+     *
+     * If this request is answered with an error code of ENOSYS and
+     * FUSE_CAP_NO_OPENDIR_SUPPORT is set in `fuse_conn_info.capable`,
+     * this is treated as success and future calls to opendir and
+     * releasedir will also succeed without being sent to the filesystem
+     * process. In addition, the kernel will cache readdir results
+     * as if opendir returned FOPEN_KEEP_CACHE | FOPEN_CACHE_DIR.
+     *
+     * Valid replies:
+     *   fuse_reply_open
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param ino the inode number
+     * @param fi file information
+     */
+    void (*opendir)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi);
+
+    /**
+     * Read directory
+     *
+     * Send a buffer filled using fuse_add_direntry(), with size not
+     * exceeding the requested size.  Send an empty buffer on end of
+     * stream.
+     *
+     * fi->fh will contain the value set by the opendir method, or
+     * will be undefined if the opendir method didn't set any value.
+     *
+     * Returning a directory entry from readdir() does not affect
+     * its lookup count.
+     *
+     * If off_t is non-zero, then it will correspond to one of the off_t
+     * values that was previously returned by readdir() for the same
+     * directory handle. In this case, readdir() should skip over entries
+     * coming before the position defined by the off_t value. If entries
+     * are added or removed while the directory handle is open, they filesystem
+     * may still include the entries that have been removed, and may not
+     * report the entries that have been created. However, addition or
+     * removal of entries must never cause readdir() to skip over unrelated
+     * entries or to report them more than once. This means
+     * that off_t can not be a simple index that enumerates the entries
+     * that have been returned but must contain sufficient information to
+     * uniquely determine the next directory entry to return even when the
+     * set of entries is changing.
+     *
+     * The function does not have to report the '.' and '..'
+     * entries, but is allowed to do so. Note that, if readdir does
+     * not return '.' or '..', they will not be implicitly returned,
+     * and this behavior is observable by the caller.
+     *
+     * Valid replies:
+     *   fuse_reply_buf
+     *   fuse_reply_data
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param ino the inode number
+     * @param size maximum number of bytes to send
+     * @param off offset to continue reading the directory stream
+     * @param fi file information
+     */
+    void (*readdir)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off,
+                    struct fuse_file_info *fi);
+
+    /**
+     * Release an open directory
+     *
+     * For every opendir call there will be exactly one releasedir
+     * call (unless the filesystem is force-unmounted).
+     *
+     * fi->fh will contain the value set by the opendir method, or
+     * will be undefined if the opendir method didn't set any value.
+     *
+     * Valid replies:
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param ino the inode number
+     * @param fi file information
+     */
+    void (*releasedir)(fuse_req_t req, fuse_ino_t ino,
+                       struct fuse_file_info *fi);
+
+    /**
+     * Synchronize directory contents
+     *
+     * If the datasync parameter is non-zero, then only the directory
+     * contents should be flushed, not the meta data.
+     *
+     * fi->fh will contain the value set by the opendir method, or
+     * will be undefined if the opendir method didn't set any value.
+     *
+     * If this request is answered with an error code of ENOSYS,
+     * this is treated as success and future calls to fsyncdir() will
+     * succeed automatically without being send to the filesystem
+     * process.
+     *
+     * Valid replies:
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param ino the inode number
+     * @param datasync flag indicating if only data should be flushed
+     * @param fi file information
+     */
+    void (*fsyncdir)(fuse_req_t req, fuse_ino_t ino, int datasync,
+                     struct fuse_file_info *fi);
+
+    /**
+     * Get file system statistics
+     *
+     * Valid replies:
+     *   fuse_reply_statfs
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param ino the inode number, zero means "undefined"
+     */
+    void (*statfs)(fuse_req_t req, fuse_ino_t ino);
+
+    /**
+     * Set an extended attribute
+     *
+     * If this request is answered with an error code of ENOSYS, this is
+     * treated as a permanent failure with error code EOPNOTSUPP, i.e. all
+     * future setxattr() requests will fail with EOPNOTSUPP without being
+     * send to the filesystem process.
+     *
+     * Valid replies:
+     *   fuse_reply_err
+     */
+    void (*setxattr)(fuse_req_t req, fuse_ino_t ino, const char *name,
+                     const char *value, size_t size, int flags);
+
+    /**
+     * Get an extended attribute
+     *
+     * If size is zero, the size of the value should be sent with
+     * fuse_reply_xattr.
+     *
+     * If the size is non-zero, and the value fits in the buffer, the
+     * value should be sent with fuse_reply_buf.
+     *
+     * If the size is too small for the value, the ERANGE error should
+     * be sent.
+     *
+     * If this request is answered with an error code of ENOSYS, this is
+     * treated as a permanent failure with error code EOPNOTSUPP, i.e. all
+     * future getxattr() requests will fail with EOPNOTSUPP without being
+     * send to the filesystem process.
+     *
+     * Valid replies:
+     *   fuse_reply_buf
+     *   fuse_reply_data
+     *   fuse_reply_xattr
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param ino the inode number
+     * @param name of the extended attribute
+     * @param size maximum size of the value to send
+     */
+    void (*getxattr)(fuse_req_t req, fuse_ino_t ino, const char *name,
+                     size_t size);
+
+    /**
+     * List extended attribute names
+     *
+     * If size is zero, the total size of the attribute list should be
+     * sent with fuse_reply_xattr.
+     *
+     * If the size is non-zero, and the null character separated
+     * attribute list fits in the buffer, the list should be sent with
+     * fuse_reply_buf.
+     *
+     * If the size is too small for the list, the ERANGE error should
+     * be sent.
+     *
+     * If this request is answered with an error code of ENOSYS, this is
+     * treated as a permanent failure with error code EOPNOTSUPP, i.e. all
+     * future listxattr() requests will fail with EOPNOTSUPP without being
+     * send to the filesystem process.
+     *
+     * Valid replies:
+     *   fuse_reply_buf
+     *   fuse_reply_data
+     *   fuse_reply_xattr
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param ino the inode number
+     * @param size maximum size of the list to send
+     */
+    void (*listxattr)(fuse_req_t req, fuse_ino_t ino, size_t size);
+
+    /**
+     * Remove an extended attribute
+     *
+     * If this request is answered with an error code of ENOSYS, this is
+     * treated as a permanent failure with error code EOPNOTSUPP, i.e. all
+     * future removexattr() requests will fail with EOPNOTSUPP without being
+     * send to the filesystem process.
+     *
+     * Valid replies:
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param ino the inode number
+     * @param name of the extended attribute
+     */
+    void (*removexattr)(fuse_req_t req, fuse_ino_t ino, const char *name);
+
+    /**
+     * Check file access permissions
+     *
+     * This will be called for the access() and chdir() system
+     * calls.  If the 'default_permissions' mount option is given,
+     * this method is not called.
+     *
+     * This method is not called under Linux kernel versions 2.4.x
+     *
+     * If this request is answered with an error code of ENOSYS, this is
+     * treated as a permanent success, i.e. this and all future access()
+     * requests will succeed without being send to the filesystem process.
+     *
+     * Valid replies:
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param ino the inode number
+     * @param mask requested access mode
+     */
+    void (*access)(fuse_req_t req, fuse_ino_t ino, int mask);
+
+    /**
+     * Create and open a file
+     *
+     * If the file does not exist, first create it with the specified
+     * mode, and then open it.
+     *
+     * See the description of the open handler for more
+     * information.
+     *
+     * If this method is not implemented or under Linux kernel
+     * versions earlier than 2.6.15, the mknod() and open() methods
+     * will be called instead.
+     *
+     * If this request is answered with an error code of ENOSYS, the handler
+     * is treated as not implemented (i.e., for this and future requests the
+     * mknod() and open() handlers will be called instead).
+     *
+     * Valid replies:
+     *   fuse_reply_create
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param parent inode number of the parent directory
+     * @param name to create
+     * @param mode file type and mode with which to create the new file
+     * @param fi file information
+     */
+    void (*create)(fuse_req_t req, fuse_ino_t parent, const char *name,
+                   mode_t mode, struct fuse_file_info *fi);
+
+    /**
+     * Test for a POSIX file lock
+     *
+     * Valid replies:
+     *   fuse_reply_lock
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param ino the inode number
+     * @param fi file information
+     * @param lock the region/type to test
+     */
+    void (*getlk)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
+                  struct flock *lock);
+
+    /**
+     * Acquire, modify or release a POSIX file lock
+     *
+     * For POSIX threads (NPTL) there's a 1-1 relation between pid and
+     * owner, but otherwise this is not always the case.  For checking
+     * lock ownership, 'fi->owner' must be used.  The l_pid field in
+     * 'struct flock' should only be used to fill in this field in
+     * getlk().
+     *
+     * Note: if the locking methods are not implemented, the kernel
+     * will still allow file locking to work locally.  Hence these are
+     * only interesting for network filesystems and similar.
+     *
+     * Valid replies:
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param ino the inode number
+     * @param fi file information
+     * @param lock the region/type to set
+     * @param sleep locking operation may sleep
+     */
+    void (*setlk)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
+                  struct flock *lock, int sleep);
+
+    /**
+     * Map block index within file to block index within device
+     *
+     * Note: This makes sense only for block device backed filesystems
+     * mounted with the 'blkdev' option
+     *
+     * If this request is answered with an error code of ENOSYS, this is
+     * treated as a permanent failure, i.e. all future bmap() requests will
+     * fail with the same error code without being send to the filesystem
+     * process.
+     *
+     * Valid replies:
+     *   fuse_reply_bmap
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param ino the inode number
+     * @param blocksize unit of block index
+     * @param idx block index within file
+     */
+    void (*bmap)(fuse_req_t req, fuse_ino_t ino, size_t blocksize,
+                 uint64_t idx);
+
+    /**
+     * Ioctl
+     *
+     * Note: For unrestricted ioctls (not allowed for FUSE
+     * servers), data in and out areas can be discovered by giving
+     * iovs and setting FUSE_IOCTL_RETRY in *flags*.  For
+     * restricted ioctls, kernel prepares in/out data area
+     * according to the information encoded in cmd.
+     *
+     * Valid replies:
+     *   fuse_reply_ioctl_retry
+     *   fuse_reply_ioctl
+     *   fuse_reply_ioctl_iov
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param ino the inode number
+     * @param cmd ioctl command
+     * @param arg ioctl argument
+     * @param fi file information
+     * @param flags for FUSE_IOCTL_* flags
+     * @param in_buf data fetched from the caller
+     * @param in_bufsz number of fetched bytes
+     * @param out_bufsz maximum size of output data
+     *
+     * Note : the unsigned long request submitted by the application
+     * is truncated to 32 bits.
+     */
+    void (*ioctl)(fuse_req_t req, fuse_ino_t ino, unsigned int cmd, void *arg,
+                  struct fuse_file_info *fi, unsigned flags, const void *in_buf,
+                  size_t in_bufsz, size_t out_bufsz);
+
+    /**
+     * Poll for IO readiness
+     *
+     * Note: If ph is non-NULL, the client should notify
+     * when IO readiness events occur by calling
+     * fuse_lowlevel_notify_poll() with the specified ph.
+     *
+     * Regardless of the number of times poll with a non-NULL ph
+     * is received, single notification is enough to clear all.
+     * Notifying more times incurs overhead but doesn't harm
+     * correctness.
+     *
+     * The callee is responsible for destroying ph with
+     * fuse_pollhandle_destroy() when no longer in use.
+     *
+     * If this request is answered with an error code of ENOSYS, this is
+     * treated as success (with a kernel-defined default poll-mask) and
+     * future calls to pull() will succeed the same way without being send
+     * to the filesystem process.
+     *
+     * Valid replies:
+     *   fuse_reply_poll
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param ino the inode number
+     * @param fi file information
+     * @param ph poll handle to be used for notification
+     */
+    void (*poll)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
+                 struct fuse_pollhandle *ph);
+
+    /**
+     * Write data made available in a buffer
+     *
+     * This is a more generic version of the ->write() method.  If
+     * FUSE_CAP_SPLICE_READ is set in fuse_conn_info.want and the
+     * kernel supports splicing from the fuse device, then the
+     * data will be made available in pipe for supporting zero
+     * copy data transfer.
+     *
+     * buf->count is guaranteed to be one (and thus buf->idx is
+     * always zero). The write_buf handler must ensure that
+     * bufv->off is correctly updated (reflecting the number of
+     * bytes read from bufv->buf[0]).
+     *
+     * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is
+     * expected to reset the setuid and setgid bits.
+     *
+     * Valid replies:
+     *   fuse_reply_write
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param ino the inode number
+     * @param bufv buffer containing the data
+     * @param off offset to write to
+     * @param fi file information
+     */
+    void (*write_buf)(fuse_req_t req, fuse_ino_t ino, struct fuse_bufvec *bufv,
+                      off_t off, struct fuse_file_info *fi);
+
+    /**
+     * Forget about multiple inodes
+     *
+     * See description of the forget function for more
+     * information.
+     *
+     * Valid replies:
+     *   fuse_reply_none
+     *
+     * @param req request handle
+     */
+    void (*forget_multi)(fuse_req_t req, size_t count,
+                         struct fuse_forget_data *forgets);
+
+    /**
+     * Acquire, modify or release a BSD file lock
+     *
+     * Note: if the locking methods are not implemented, the kernel
+     * will still allow file locking to work locally.  Hence these are
+     * only interesting for network filesystems and similar.
+     *
+     * Valid replies:
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param ino the inode number
+     * @param fi file information
+     * @param op the locking operation, see flock(2)
+     */
+    void (*flock)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
+                  int op);
+
+    /**
+     * Allocate requested space. If this function returns success then
+     * subsequent writes to the specified range shall not fail due to the lack
+     * of free space on the file system storage media.
+     *
+     * If this request is answered with an error code of ENOSYS, this is
+     * treated as a permanent failure with error code EOPNOTSUPP, i.e. all
+     * future fallocate() requests will fail with EOPNOTSUPP without being
+     * send to the filesystem process.
+     *
+     * Valid replies:
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param ino the inode number
+     * @param offset starting point for allocated region
+     * @param length size of allocated region
+     * @param mode determines the operation to be performed on the given range,
+     *             see fallocate(2)
+     */
+    void (*fallocate)(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset,
+                      off_t length, struct fuse_file_info *fi);
+
+    /**
+     * Read directory with attributes
+     *
+     * Send a buffer filled using fuse_add_direntry_plus(), with size not
+     * exceeding the requested size.  Send an empty buffer on end of
+     * stream.
+     *
+     * fi->fh will contain the value set by the opendir method, or
+     * will be undefined if the opendir method didn't set any value.
+     *
+     * In contrast to readdir() (which does not affect the lookup counts),
+     * the lookup count of every entry returned by readdirplus(), except "."
+     * and "..", is incremented by one.
+     *
+     * Valid replies:
+     *   fuse_reply_buf
+     *   fuse_reply_data
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param ino the inode number
+     * @param size maximum number of bytes to send
+     * @param off offset to continue reading the directory stream
+     * @param fi file information
+     */
+    void (*readdirplus)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off,
+                        struct fuse_file_info *fi);
+
+    /**
+     * Copy a range of data from one file to another
+     *
+     * Performs an optimized copy between two file descriptors without the
+     * additional cost of transferring data through the FUSE kernel module
+     * to user space (glibc) and then back into the FUSE filesystem again.
+     *
+     * In case this method is not implemented, glibc falls back to reading
+     * data from the source and writing to the destination. Effectively
+     * doing an inefficient copy of the data.
+     *
+     * If this request is answered with an error code of ENOSYS, this is
+     * treated as a permanent failure with error code EOPNOTSUPP, i.e. all
+     * future copy_file_range() requests will fail with EOPNOTSUPP without
+     * being send to the filesystem process.
+     *
+     * Valid replies:
+     *   fuse_reply_write
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param ino_in the inode number or the source file
+     * @param off_in starting point from were the data should be read
+     * @param fi_in file information of the source file
+     * @param ino_out the inode number or the destination file
+     * @param off_out starting point where the data should be written
+     * @param fi_out file information of the destination file
+     * @param len maximum size of the data to copy
+     * @param flags passed along with the copy_file_range() syscall
+     */
+    void (*copy_file_range)(fuse_req_t req, fuse_ino_t ino_in, off_t off_in,
+                            struct fuse_file_info *fi_in, fuse_ino_t ino_out,
+                            off_t off_out, struct fuse_file_info *fi_out,
+                            size_t len, int flags);
+
+    /**
+     * Find next data or hole after the specified offset
+     *
+     * If this request is answered with an error code of ENOSYS, this is
+     * treated as a permanent failure, i.e. all future lseek() requests will
+     * fail with the same error code without being send to the filesystem
+     * process.
+     *
+     * Valid replies:
+     *   fuse_reply_lseek
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param ino the inode number
+     * @param off offset to start search from
+     * @param whence either SEEK_DATA or SEEK_HOLE
+     * @param fi file information
+     */
+    void (*lseek)(fuse_req_t req, fuse_ino_t ino, off_t off, int whence,
+                  struct fuse_file_info *fi);
+};
+
+/**
+ * Reply with an error code or success.
+ *
+ * Possible requests:
+ *   all except forget
+ *
+ * Whereever possible, error codes should be chosen from the list of
+ * documented error conditions in the corresponding system calls
+ * manpage.
+ *
+ * An error code of ENOSYS is sometimes treated specially. This is
+ * indicated in the documentation of the affected handler functions.
+ *
+ * The following requests may be answered with a zero error code:
+ * unlink, rmdir, rename, flush, release, fsync, fsyncdir, setxattr,
+ * removexattr, setlk.
+ *
+ * @param req request handle
+ * @param err the positive error value, or zero for success
+ * @return zero for success, -errno for failure to send reply
+ */
+int fuse_reply_err(fuse_req_t req, int err);
+
+/**
+ * Don't send reply
+ *
+ * Possible requests:
+ *   forget
+ *   forget_multi
+ *   retrieve_reply
+ *
+ * @param req request handle
+ */
+void fuse_reply_none(fuse_req_t req);
+
+/**
+ * Reply with a directory entry
+ *
+ * Possible requests:
+ *   lookup, mknod, mkdir, symlink, link
+ *
+ * Side effects:
+ *   increments the lookup count on success
+ *
+ * @param req request handle
+ * @param e the entry parameters
+ * @return zero for success, -errno for failure to send reply
+ */
+int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e);
+
+/**
+ * Reply with a directory entry and open parameters
+ *
+ * currently the following members of 'fi' are used:
+ *   fh, direct_io, keep_cache
+ *
+ * Possible requests:
+ *   create
+ *
+ * Side effects:
+ *   increments the lookup count on success
+ *
+ * @param req request handle
+ * @param e the entry parameters
+ * @param fi file information
+ * @return zero for success, -errno for failure to send reply
+ */
+int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e,
+                      const struct fuse_file_info *fi);
+
+/**
+ * Reply with attributes
+ *
+ * Possible requests:
+ *   getattr, setattr
+ *
+ * @param req request handle
+ * @param attr the attributes
+ * @param attr_timeout validity timeout (in seconds) for the attributes
+ * @return zero for success, -errno for failure to send reply
+ */
+int fuse_reply_attr(fuse_req_t req, const struct stat *attr,
+                    double attr_timeout);
+
+/**
+ * Reply with the contents of a symbolic link
+ *
+ * Possible requests:
+ *   readlink
+ *
+ * @param req request handle
+ * @param link symbolic link contents
+ * @return zero for success, -errno for failure to send reply
+ */
+int fuse_reply_readlink(fuse_req_t req, const char *link);
+
+/**
+ * Reply with open parameters
+ *
+ * currently the following members of 'fi' are used:
+ *   fh, direct_io, keep_cache
+ *
+ * Possible requests:
+ *   open, opendir
+ *
+ * @param req request handle
+ * @param fi file information
+ * @return zero for success, -errno for failure to send reply
+ */
+int fuse_reply_open(fuse_req_t req, const struct fuse_file_info *fi);
+
+/**
+ * Reply with number of bytes written
+ *
+ * Possible requests:
+ *   write
+ *
+ * @param req request handle
+ * @param count the number of bytes written
+ * @return zero for success, -errno for failure to send reply
+ */
+int fuse_reply_write(fuse_req_t req, size_t count);
+
+/**
+ * Reply with data
+ *
+ * Possible requests:
+ *   read, readdir, getxattr, listxattr
+ *
+ * @param req request handle
+ * @param buf buffer containing data
+ * @param size the size of data in bytes
+ * @return zero for success, -errno for failure to send reply
+ */
+int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size);
+
+/**
+ * Reply with data copied/moved from buffer(s)
+ *
+ * Possible requests:
+ *   read, readdir, getxattr, listxattr
+ *
+ * Side effects:
+ *   when used to return data from a readdirplus() (but not readdir())
+ *   call, increments the lookup count of each returned entry by one
+ *   on success.
+ *
+ * @param req request handle
+ * @param bufv buffer vector
+ * @return zero for success, -errno for failure to send reply
+ */
+int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv);
+
+/**
+ * Reply with data vector
+ *
+ * Possible requests:
+ *   read, readdir, getxattr, listxattr
+ *
+ * @param req request handle
+ * @param iov the vector containing the data
+ * @param count the size of vector
+ * @return zero for success, -errno for failure to send reply
+ */
+int fuse_reply_iov(fuse_req_t req, const struct iovec *iov, int count);
+
+/**
+ * Reply with filesystem statistics
+ *
+ * Possible requests:
+ *   statfs
+ *
+ * @param req request handle
+ * @param stbuf filesystem statistics
+ * @return zero for success, -errno for failure to send reply
+ */
+int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf);
+
+/**
+ * Reply with needed buffer size
+ *
+ * Possible requests:
+ *   getxattr, listxattr
+ *
+ * @param req request handle
+ * @param count the buffer size needed in bytes
+ * @return zero for success, -errno for failure to send reply
+ */
+int fuse_reply_xattr(fuse_req_t req, size_t count);
+
+/**
+ * Reply with file lock information
+ *
+ * Possible requests:
+ *   getlk
+ *
+ * @param req request handle
+ * @param lock the lock information
+ * @return zero for success, -errno for failure to send reply
+ */
+int fuse_reply_lock(fuse_req_t req, const struct flock *lock);
+
+/**
+ * Reply with block index
+ *
+ * Possible requests:
+ *   bmap
+ *
+ * @param req request handle
+ * @param idx block index within device
+ * @return zero for success, -errno for failure to send reply
+ */
+int fuse_reply_bmap(fuse_req_t req, uint64_t idx);
+
+/*
+ * Filling a buffer in readdir
+ */
+
+/**
+ * Add a directory entry to the buffer
+ *
+ * Buffer needs to be large enough to hold the entry.  If it's not,
+ * then the entry is not filled in but the size of the entry is still
+ * returned.  The caller can check this by comparing the bufsize
+ * parameter with the returned entry size.  If the entry size is
+ * larger than the buffer size, the operation failed.
+ *
+ * From the 'stbuf' argument the st_ino field and bits 12-15 of the
+ * st_mode field are used.  The other fields are ignored.
+ *
+ * *off* should be any non-zero value that the filesystem can use to
+ * identify the current point in the directory stream. It does not
+ * need to be the actual physical position. A value of zero is
+ * reserved to mean "from the beginning", and should therefore never
+ * be used (the first call to fuse_add_direntry should be passed the
+ * offset of the second directory entry).
+ *
+ * @param req request handle
+ * @param buf the point where the new entry will be added to the buffer
+ * @param bufsize remaining size of the buffer
+ * @param name the name of the entry
+ * @param stbuf the file attributes
+ * @param off the offset of the next entry
+ * @return the space needed for the entry
+ */
+size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize,
+                         const char *name, const struct stat *stbuf, off_t off);
+
+/**
+ * Add a directory entry to the buffer with the attributes
+ *
+ * See documentation of `fuse_add_direntry()` for more details.
+ *
+ * @param req request handle
+ * @param buf the point where the new entry will be added to the buffer
+ * @param bufsize remaining size of the buffer
+ * @param name the name of the entry
+ * @param e the directory entry
+ * @param off the offset of the next entry
+ * @return the space needed for the entry
+ */
+size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize,
+                              const char *name,
+                              const struct fuse_entry_param *e, off_t off);
+
+/**
+ * Reply to ask for data fetch and output buffer preparation.  ioctl
+ * will be retried with the specified input data fetched and output
+ * buffer prepared.
+ *
+ * Possible requests:
+ *   ioctl
+ *
+ * @param req request handle
+ * @param in_iov iovec specifying data to fetch from the caller
+ * @param in_count number of entries in in_iov
+ * @param out_iov iovec specifying addresses to write output to
+ * @param out_count number of entries in out_iov
+ * @return zero for success, -errno for failure to send reply
+ */
+int fuse_reply_ioctl_retry(fuse_req_t req, const struct iovec *in_iov,
+                           size_t in_count, const struct iovec *out_iov,
+                           size_t out_count);
+
+/**
+ * Reply to finish ioctl
+ *
+ * Possible requests:
+ *   ioctl
+ *
+ * @param req request handle
+ * @param result result to be passed to the caller
+ * @param buf buffer containing output data
+ * @param size length of output data
+ */
+int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size);
+
+/**
+ * Reply to finish ioctl with iov buffer
+ *
+ * Possible requests:
+ *   ioctl
+ *
+ * @param req request handle
+ * @param result result to be passed to the caller
+ * @param iov the vector containing the data
+ * @param count the size of vector
+ */
+int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov,
+                         int count);
+
+/**
+ * Reply with poll result event mask
+ *
+ * @param req request handle
+ * @param revents poll result event mask
+ */
+int fuse_reply_poll(fuse_req_t req, unsigned revents);
+
+/**
+ * Reply with offset
+ *
+ * Possible requests:
+ *   lseek
+ *
+ * @param req request handle
+ * @param off offset of next data or hole
+ * @return zero for success, -errno for failure to send reply
+ */
+int fuse_reply_lseek(fuse_req_t req, off_t off);
+
+/*
+ * Notification
+ */
+
+/**
+ * Notify IO readiness event
+ *
+ * For more information, please read comment for poll operation.
+ *
+ * @param ph poll handle to notify IO readiness event for
+ */
+int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph);
+
+/**
+ * Notify to invalidate cache for an inode.
+ *
+ * Added in FUSE protocol version 7.12. If the kernel does not support
+ * this (or a newer) version, the function will return -ENOSYS and do
+ * nothing.
+ *
+ * If the filesystem has writeback caching enabled, invalidating an
+ * inode will first trigger a writeback of all dirty pages. The call
+ * will block until all writeback requests have completed and the
+ * inode has been invalidated. It will, however, not wait for
+ * completion of pending writeback requests that have been issued
+ * before.
+ *
+ * If there are no dirty pages, this function will never block.
+ *
+ * @param se the session object
+ * @param ino the inode number
+ * @param off the offset in the inode where to start invalidating
+ *            or negative to invalidate attributes only
+ * @param len the amount of cache to invalidate or 0 for all
+ * @return zero for success, -errno for failure
+ */
+int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino,
+                                     off_t off, off_t len);
+
+/**
+ * Notify to invalidate parent attributes and the dentry matching
+ * parent/name
+ *
+ * To avoid a deadlock this function must not be called in the
+ * execution path of a related filesytem operation or within any code
+ * that could hold a lock that could be needed to execute such an
+ * operation. As of kernel 4.18, a "related operation" is a lookup(),
+ * symlink(), mknod(), mkdir(), unlink(), rename(), link() or create()
+ * request for the parent, and a setattr(), unlink(), rmdir(),
+ * rename(), setxattr(), removexattr(), readdir() or readdirplus()
+ * request for the inode itself.
+ *
+ * When called correctly, this function will never block.
+ *
+ * Added in FUSE protocol version 7.12. If the kernel does not support
+ * this (or a newer) version, the function will return -ENOSYS and do
+ * nothing.
+ *
+ * @param se the session object
+ * @param parent inode number
+ * @param name file name
+ * @param namelen strlen() of file name
+ * @return zero for success, -errno for failure
+ */
+int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent,
+                                     const char *name, size_t namelen);
+
+/**
+ * This function behaves like fuse_lowlevel_notify_inval_entry() with
+ * the following additional effect (at least as of Linux kernel 4.8):
+ *
+ * If the provided *child* inode matches the inode that is currently
+ * associated with the cached dentry, and if there are any inotify
+ * watches registered for the dentry, then the watchers are informed
+ * that the dentry has been deleted.
+ *
+ * To avoid a deadlock this function must not be called while
+ * executing a related filesytem operation or while holding a lock
+ * that could be needed to execute such an operation (see the
+ * description of fuse_lowlevel_notify_inval_entry() for more
+ * details).
+ *
+ * When called correctly, this function will never block.
+ *
+ * Added in FUSE protocol version 7.18. If the kernel does not support
+ * this (or a newer) version, the function will return -ENOSYS and do
+ * nothing.
+ *
+ * @param se the session object
+ * @param parent inode number
+ * @param child inode number
+ * @param name file name
+ * @param namelen strlen() of file name
+ * @return zero for success, -errno for failure
+ */
+int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent,
+                                fuse_ino_t child, const char *name,
+                                size_t namelen);
+
+/**
+ * Store data to the kernel buffers
+ *
+ * Synchronously store data in the kernel buffers belonging to the
+ * given inode.  The stored data is marked up-to-date (no read will be
+ * performed against it, unless it's invalidated or evicted from the
+ * cache).
+ *
+ * If the stored data overflows the current file size, then the size
+ * is extended, similarly to a write(2) on the filesystem.
+ *
+ * If this function returns an error, then the store wasn't fully
+ * completed, but it may have been partially completed.
+ *
+ * Added in FUSE protocol version 7.15. If the kernel does not support
+ * this (or a newer) version, the function will return -ENOSYS and do
+ * nothing.
+ *
+ * @param se the session object
+ * @param ino the inode number
+ * @param offset the starting offset into the file to store to
+ * @param bufv buffer vector
+ * @return zero for success, -errno for failure
+ */
+int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino,
+                               off_t offset, struct fuse_bufvec *bufv);
+
+/*
+ * Utility functions
+ */
+
+/**
+ * Get the userdata from the request
+ *
+ * @param req request handle
+ * @return the user data passed to fuse_session_new()
+ */
+void *fuse_req_userdata(fuse_req_t req);
+
+/**
+ * Get the context from the request
+ *
+ * The pointer returned by this function will only be valid for the
+ * request's lifetime
+ *
+ * @param req request handle
+ * @return the context structure
+ */
+const struct fuse_ctx *fuse_req_ctx(fuse_req_t req);
+
+/**
+ * Get the current supplementary group IDs for the specified request
+ *
+ * Similar to the getgroups(2) system call, except the return value is
+ * always the total number of group IDs, even if it is larger than the
+ * specified size.
+ *
+ * The current fuse kernel module in linux (as of 2.6.30) doesn't pass
+ * the group list to userspace, hence this function needs to parse
+ * "/proc/$TID/task/$TID/status" to get the group IDs.
+ *
+ * This feature may not be supported on all operating systems.  In
+ * such a case this function will return -ENOSYS.
+ *
+ * @param req request handle
+ * @param size size of given array
+ * @param list array of group IDs to be filled in
+ * @return the total number of supplementary group IDs or -errno on failure
+ */
+int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]);
+
+/**
+ * Callback function for an interrupt
+ *
+ * @param req interrupted request
+ * @param data user data
+ */
+typedef void (*fuse_interrupt_func_t)(fuse_req_t req, void *data);
+
+/**
+ * Register/unregister callback for an interrupt
+ *
+ * If an interrupt has already happened, then the callback function is
+ * called from within this function, hence it's not possible for
+ * interrupts to be lost.
+ *
+ * @param req request handle
+ * @param func the callback function or NULL for unregister
+ * @param data user data passed to the callback function
+ */
+void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func,
+                             void *data);
+
+/**
+ * Check if a request has already been interrupted
+ *
+ * @param req request handle
+ * @return 1 if the request has been interrupted, 0 otherwise
+ */
+int fuse_req_interrupted(fuse_req_t req);
+
+/**
+ * Check if the session is connected via virtio
+ *
+ * @param se session object
+ * @return 1 if the session is a virtio session
+ */
+int fuse_lowlevel_is_virtio(struct fuse_session *se);
+
+/*
+ * Inquiry functions
+ */
+
+/**
+ * Print low-level version information to stdout.
+ */
+void fuse_lowlevel_version(void);
+
+/**
+ * Print available low-level options to stdout. This is not an
+ * exhaustive list, but includes only those options that may be of
+ * interest to an end-user of a file system.
+ */
+void fuse_lowlevel_help(void);
+
+/**
+ * Print available options for `fuse_parse_cmdline()`.
+ */
+void fuse_cmdline_help(void);
+
+/*
+ * Filesystem setup & teardown
+ */
+
+struct fuse_cmdline_opts {
+    int foreground;
+    int debug;
+    int nodefault_subtype;
+    int show_version;
+    int show_help;
+    int print_capabilities;
+    int syslog;
+    int log_level;
+    unsigned int max_idle_threads;
+};
+
+/**
+ * Utility function to parse common options for simple file systems
+ * using the low-level API. A help text that describes the available
+ * options can be printed with `fuse_cmdline_help`. A single
+ * non-option argument is treated as the mountpoint. Multiple
+ * non-option arguments will result in an error.
+ *
+ * If neither -o subtype= or -o fsname= options are given, a new
+ * subtype option will be added and set to the basename of the program
+ * (the fsname will remain unset, and then defaults to "fuse").
+ *
+ * Known options will be removed from *args*, unknown options will
+ * remain.
+ *
+ * @param args argument vector (input+output)
+ * @param opts output argument for parsed options
+ * @return 0 on success, -1 on failure
+ */
+int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts);
+
+/**
+ * Create a low level session.
+ *
+ * Returns a session structure suitable for passing to
+ * fuse_session_mount() and fuse_session_loop().
+ *
+ * This function accepts most file-system independent mount options
+ * (like context, nodev, ro - see mount(8)), as well as the general
+ * fuse mount options listed in mount.fuse(8) (e.g. -o allow_root and
+ * -o default_permissions, but not ``-o use_ino``).  Instead of `-o
+ * debug`, debugging may also enabled with `-d` or `--debug`.
+ *
+ * If not all options are known, an error message is written to stderr
+ * and the function returns NULL.
+ *
+ * Option parsing skips argv[0], which is assumed to contain the
+ * program name. To prevent accidentally passing an option in
+ * argv[0], this element must always be present (even if no options
+ * are specified). It may be set to the empty string ('\0') if no
+ * reasonable value can be provided.
+ *
+ * @param args argument vector
+ * @param op the (low-level) filesystem operations
+ * @param op_size sizeof(struct fuse_lowlevel_ops)
+ * @param userdata user data
+ *
+ * @return the fuse session on success, NULL on failure
+ **/
+struct fuse_session *fuse_session_new(struct fuse_args *args,
+                                      const struct fuse_lowlevel_ops *op,
+                                      size_t op_size, void *userdata);
+
+/**
+ * Mount a FUSE file system.
+ *
+ * @param se session object
+ *
+ * @return 0 on success, -1 on failure.
+ **/
+int fuse_session_mount(struct fuse_session *se);
+
+/**
+ * Enter a single threaded, blocking event loop.
+ *
+ * When the event loop terminates because the connection to the FUSE
+ * kernel module has been closed, this function returns zero. This
+ * happens when the filesystem is unmounted regularly (by the
+ * filesystem owner or root running the umount(8) or fusermount(1)
+ * command), or if connection is explicitly severed by writing ``1``
+ * to the``abort`` file in ``/sys/fs/fuse/connections/NNN``. The only
+ * way to distinguish between these two conditions is to check if the
+ * filesystem is still mounted after the session loop returns.
+ *
+ * When some error occurs during request processing, the function
+ * returns a negated errno(3) value.
+ *
+ * If the loop has been terminated because of a signal handler
+ * installed by fuse_set_signal_handlers(), this function returns the
+ * (positive) signal value that triggered the exit.
+ *
+ * @param se the session
+ * @return 0, -errno, or a signal value
+ */
+int fuse_session_loop(struct fuse_session *se);
+
+/**
+ * Flag a session as terminated.
+ *
+ * This function is invoked by the POSIX signal handlers, when
+ * registered using fuse_set_signal_handlers(). It will cause any
+ * running event loops to terminate on the next opportunity.
+ *
+ * @param se the session
+ */
+void fuse_session_exit(struct fuse_session *se);
+
+/**
+ * Reset the terminated flag of a session
+ *
+ * @param se the session
+ */
+void fuse_session_reset(struct fuse_session *se);
+
+/**
+ * Query the terminated flag of a session
+ *
+ * @param se the session
+ * @return 1 if exited, 0 if not exited
+ */
+int fuse_session_exited(struct fuse_session *se);
+
+/**
+ * Ensure that file system is unmounted.
+ *
+ * In regular operation, the file system is typically unmounted by the
+ * user calling umount(8) or fusermount(1), which then terminates the
+ * FUSE session loop. However, the session loop may also terminate as
+ * a result of an explicit call to fuse_session_exit() (e.g. by a
+ * signal handler installed by fuse_set_signal_handler()). In this
+ * case the filesystem remains mounted, but any attempt to access it
+ * will block (while the filesystem process is still running) or give
+ * an ESHUTDOWN error (after the filesystem process has terminated).
+ *
+ * If the communication channel with the FUSE kernel module is still
+ * open (i.e., if the session loop was terminated by an explicit call
+ * to fuse_session_exit()), this function will close it and unmount
+ * the filesystem. If the communication channel has been closed by the
+ * kernel, this method will do (almost) nothing.
+ *
+ * NOTE: The above semantics mean that if the connection to the kernel
+ * is terminated via the ``/sys/fs/fuse/connections/NNN/abort`` file,
+ * this method will *not* unmount the filesystem.
+ *
+ * @param se the session
+ */
+void fuse_session_unmount(struct fuse_session *se);
+
+/**
+ * Destroy a session
+ *
+ * @param se the session
+ */
+void fuse_session_destroy(struct fuse_session *se);
+
+/*
+ * Custom event loop support
+ */
+
+/**
+ * Return file descriptor for communication with kernel.
+ *
+ * The file selector can be used to integrate FUSE with a custom event
+ * loop. Whenever data is available for reading on the provided fd,
+ * the event loop should call `fuse_session_receive_buf` followed by
+ * `fuse_session_process_buf` to process the request.
+ *
+ * The returned file descriptor is valid until `fuse_session_unmount`
+ * is called.
+ *
+ * @param se the session
+ * @return a file descriptor
+ */
+int fuse_session_fd(struct fuse_session *se);
+
+/**
+ * Process a raw request supplied in a generic buffer
+ *
+ * The fuse_buf may contain a memory buffer or a pipe file descriptor.
+ *
+ * @param se the session
+ * @param buf the fuse_buf containing the request
+ */
+void fuse_session_process_buf(struct fuse_session *se,
+                              const struct fuse_buf *buf);
+
+/**
+ * Read a raw request from the kernel into the supplied buffer.
+ *
+ * Depending on file system options, system capabilities, and request
+ * size the request is either read into a memory buffer or spliced
+ * into a temporary pipe.
+ *
+ * @param se the session
+ * @param buf the fuse_buf to store the request in
+ * @return the actual size of the raw request, or -errno on error
+ */
+int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf);
+
+#endif /* FUSE_LOWLEVEL_H_ */
diff --git a/tools/virtiofsd/fuse_misc.h b/tools/virtiofsd/fuse_misc.h
new file mode 100644
index 0000000000..5c618ce21f
--- /dev/null
+++ b/tools/virtiofsd/fuse_misc.h
@@ -0,0 +1,60 @@
+/*
+ * FUSE: Filesystem in Userspace
+ * Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
+ *
+ * This program can be distributed under the terms of the GNU LGPLv2.
+ * See the file COPYING.LIB
+ */
+
+#include <pthread.h>
+#include "config-host.h"
+
+/*
+ * Versioned symbols cannot be used in some cases because it
+ *   - confuse the dynamic linker in uClibc
+ *   - not supported on MacOSX (in MachO binary format)
+ */
+#if (!defined(__UCLIBC__) && !defined(__APPLE__))
+#define FUSE_SYMVER(x) __asm__(x)
+#else
+#define FUSE_SYMVER(x)
+#endif
+
+#ifndef USE_UCLIBC
+#define fuse_mutex_init(mut) pthread_mutex_init(mut, NULL)
+#else
+/* Is this hack still needed? */
+static inline void fuse_mutex_init(pthread_mutex_t *mut)
+{
+    pthread_mutexattr_t attr;
+    pthread_mutexattr_init(&attr);
+    pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP);
+    pthread_mutex_init(mut, &attr);
+    pthread_mutexattr_destroy(&attr);
+}
+#endif
+
+#ifdef HAVE_STRUCT_STAT_ST_ATIM
+/* Linux */
+#define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atim.tv_nsec)
+#define ST_CTIM_NSEC(stbuf) ((stbuf)->st_ctim.tv_nsec)
+#define ST_MTIM_NSEC(stbuf) ((stbuf)->st_mtim.tv_nsec)
+#define ST_ATIM_NSEC_SET(stbuf, val) (stbuf)->st_atim.tv_nsec = (val)
+#define ST_CTIM_NSEC_SET(stbuf, val) (stbuf)->st_ctim.tv_nsec = (val)
+#define ST_MTIM_NSEC_SET(stbuf, val) (stbuf)->st_mtim.tv_nsec = (val)
+#elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC)
+/* FreeBSD */
+#define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atimespec.tv_nsec)
+#define ST_CTIM_NSEC(stbuf) ((stbuf)->st_ctimespec.tv_nsec)
+#define ST_MTIM_NSEC(stbuf) ((stbuf)->st_mtimespec.tv_nsec)
+#define ST_ATIM_NSEC_SET(stbuf, val) (stbuf)->st_atimespec.tv_nsec = (val)
+#define ST_CTIM_NSEC_SET(stbuf, val) (stbuf)->st_ctimespec.tv_nsec = (val)
+#define ST_MTIM_NSEC_SET(stbuf, val) (stbuf)->st_mtimespec.tv_nsec = (val)
+#else
+#define ST_ATIM_NSEC(stbuf) 0
+#define ST_CTIM_NSEC(stbuf) 0
+#define ST_MTIM_NSEC(stbuf) 0
+#define ST_ATIM_NSEC_SET(stbuf, val) do { } while (0)
+#define ST_CTIM_NSEC_SET(stbuf, val) do { } while (0)
+#define ST_MTIM_NSEC_SET(stbuf, val) do { } while (0)
+#endif
diff --git a/tools/virtiofsd/fuse_opt.c b/tools/virtiofsd/fuse_opt.c
new file mode 100644
index 0000000000..28922361a2
--- /dev/null
+++ b/tools/virtiofsd/fuse_opt.c
@@ -0,0 +1,450 @@
+/*
+ * FUSE: Filesystem in Userspace
+ * Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
+ *
+ * Implementation of option parsing routines (dealing with `struct
+ * fuse_args`).
+ *
+ * This program can be distributed under the terms of the GNU LGPLv2.
+ * See the file COPYING.LIB
+ */
+
+#include "qemu/osdep.h"
+#include "fuse_opt.h"
+#include "fuse_i.h"
+#include "fuse_misc.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+struct fuse_opt_context {
+    void *data;
+    const struct fuse_opt *opt;
+    fuse_opt_proc_t proc;
+    int argctr;
+    int argc;
+    char **argv;
+    struct fuse_args outargs;
+    char *opts;
+    int nonopt;
+};
+
+void fuse_opt_free_args(struct fuse_args *args)
+{
+    if (args) {
+        if (args->argv && args->allocated) {
+            int i;
+            for (i = 0; i < args->argc; i++) {
+                free(args->argv[i]);
+            }
+            free(args->argv);
+        }
+        args->argc = 0;
+        args->argv = NULL;
+        args->allocated = 0;
+    }
+}
+
+static int alloc_failed(void)
+{
+    fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n");
+    return -1;
+}
+
+int fuse_opt_add_arg(struct fuse_args *args, const char *arg)
+{
+    char **newargv;
+    char *newarg;
+
+    assert(!args->argv || args->allocated);
+
+    newarg = strdup(arg);
+    if (!newarg) {
+        return alloc_failed();
+    }
+
+    newargv = realloc(args->argv, (args->argc + 2) * sizeof(char *));
+    if (!newargv) {
+        free(newarg);
+        return alloc_failed();
+    }
+
+    args->argv = newargv;
+    args->allocated = 1;
+    args->argv[args->argc++] = newarg;
+    args->argv[args->argc] = NULL;
+    return 0;
+}
+
+static int fuse_opt_insert_arg_common(struct fuse_args *args, int pos,
+                                      const char *arg)
+{
+    assert(pos <= args->argc);
+    if (fuse_opt_add_arg(args, arg) == -1) {
+        return -1;
+    }
+
+    if (pos != args->argc - 1) {
+        char *newarg = args->argv[args->argc - 1];
+        memmove(&args->argv[pos + 1], &args->argv[pos],
+                sizeof(char *) * (args->argc - pos - 1));
+        args->argv[pos] = newarg;
+    }
+    return 0;
+}
+
+int fuse_opt_insert_arg(struct fuse_args *args, int pos, const char *arg)
+{
+    return fuse_opt_insert_arg_common(args, pos, arg);
+}
+
+static int next_arg(struct fuse_opt_context *ctx, const char *opt)
+{
+    if (ctx->argctr + 1 >= ctx->argc) {
+        fuse_log(FUSE_LOG_ERR, "fuse: missing argument after `%s'\n", opt);
+        return -1;
+    }
+    ctx->argctr++;
+    return 0;
+}
+
+static int add_arg(struct fuse_opt_context *ctx, const char *arg)
+{
+    return fuse_opt_add_arg(&ctx->outargs, arg);
+}
+
+static int add_opt_common(char **opts, const char *opt, int esc)
+{
+    unsigned oldlen = *opts ? strlen(*opts) : 0;
+    char *d = realloc(*opts, oldlen + 1 + strlen(opt) * 2 + 1);
+
+    if (!d) {
+        return alloc_failed();
+    }
+
+    *opts = d;
+    if (oldlen) {
+        d += oldlen;
+        *d++ = ',';
+    }
+
+    for (; *opt; opt++) {
+        if (esc && (*opt == ',' || *opt == '\\')) {
+            *d++ = '\\';
+        }
+        *d++ = *opt;
+    }
+    *d = '\0';
+
+    return 0;
+}
+
+int fuse_opt_add_opt(char **opts, const char *opt)
+{
+    return add_opt_common(opts, opt, 0);
+}
+
+int fuse_opt_add_opt_escaped(char **opts, const char *opt)
+{
+    return add_opt_common(opts, opt, 1);
+}
+
+static int add_opt(struct fuse_opt_context *ctx, const char *opt)
+{
+    return add_opt_common(&ctx->opts, opt, 1);
+}
+
+static int call_proc(struct fuse_opt_context *ctx, const char *arg, int key,
+                     int iso)
+{
+    if (key == FUSE_OPT_KEY_DISCARD) {
+        return 0;
+    }
+
+    if (key != FUSE_OPT_KEY_KEEP && ctx->proc) {
+        int res = ctx->proc(ctx->data, arg, key, &ctx->outargs);
+        if (res == -1 || !res) {
+            return res;
+        }
+    }
+    if (iso) {
+        return add_opt(ctx, arg);
+    } else {
+        return add_arg(ctx, arg);
+    }
+}
+
+static int match_template(const char *t, const char *arg, unsigned *sepp)
+{
+    int arglen = strlen(arg);
+    const char *sep = strchr(t, '=');
+    sep = sep ? sep : strchr(t, ' ');
+    if (sep && (!sep[1] || sep[1] == '%')) {
+        int tlen = sep - t;
+        if (sep[0] == '=') {
+            tlen++;
+        }
+        if (arglen >= tlen && strncmp(arg, t, tlen) == 0) {
+            *sepp = sep - t;
+            return 1;
+        }
+    }
+    if (strcmp(t, arg) == 0) {
+        *sepp = 0;
+        return 1;
+    }
+    return 0;
+}
+
+static const struct fuse_opt *find_opt(const struct fuse_opt *opt,
+                                       const char *arg, unsigned *sepp)
+{
+    for (; opt && opt->templ; opt++) {
+        if (match_template(opt->templ, arg, sepp)) {
+            return opt;
+        }
+    }
+    return NULL;
+}
+
+int fuse_opt_match(const struct fuse_opt *opts, const char *opt)
+{
+    unsigned dummy;
+    return find_opt(opts, opt, &dummy) ? 1 : 0;
+}
+
+static int process_opt_param(void *var, const char *format, const char *param,
+                             const char *arg)
+{
+    assert(format[0] == '%');
+    if (format[1] == 's') {
+        char **s = var;
+        char *copy = strdup(param);
+        if (!copy) {
+            return alloc_failed();
+        }
+
+        free(*s);
+        *s = copy;
+    } else {
+        if (sscanf(param, format, var) != 1) {
+            fuse_log(FUSE_LOG_ERR, "fuse: invalid parameter in option `%s'\n",
+                     arg);
+            return -1;
+        }
+    }
+    return 0;
+}
+
+static int process_opt(struct fuse_opt_context *ctx, const struct fuse_opt *opt,
+                       unsigned sep, const char *arg, int iso)
+{
+    if (opt->offset == -1U) {
+        if (call_proc(ctx, arg, opt->value, iso) == -1) {
+            return -1;
+        }
+    } else {
+        void *var = (char *)ctx->data + opt->offset;
+        if (sep && opt->templ[sep + 1]) {
+            const char *param = arg + sep;
+            if (opt->templ[sep] == '=') {
+                param++;
+            }
+            if (process_opt_param(var, opt->templ + sep + 1, param, arg) ==
+                -1) {
+                return -1;
+            }
+        } else {
+            *(int *)var = opt->value;
+        }
+    }
+    return 0;
+}
+
+static int process_opt_sep_arg(struct fuse_opt_context *ctx,
+                               const struct fuse_opt *opt, unsigned sep,
+                               const char *arg, int iso)
+{
+    int res;
+    char *newarg;
+    char *param;
+
+    if (next_arg(ctx, arg) == -1) {
+        return -1;
+    }
+
+    param = ctx->argv[ctx->argctr];
+    newarg = malloc(sep + strlen(param) + 1);
+    if (!newarg) {
+        return alloc_failed();
+    }
+
+    memcpy(newarg, arg, sep);
+    strcpy(newarg + sep, param);
+    res = process_opt(ctx, opt, sep, newarg, iso);
+    free(newarg);
+
+    return res;
+}
+
+static int process_gopt(struct fuse_opt_context *ctx, const char *arg, int iso)
+{
+    unsigned sep;
+    const struct fuse_opt *opt = find_opt(ctx->opt, arg, &sep);
+    if (opt) {
+        for (; opt; opt = find_opt(opt + 1, arg, &sep)) {
+            int res;
+            if (sep && opt->templ[sep] == ' ' && !arg[sep]) {
+                res = process_opt_sep_arg(ctx, opt, sep, arg, iso);
+            } else {
+                res = process_opt(ctx, opt, sep, arg, iso);
+            }
+            if (res == -1) {
+                return -1;
+            }
+        }
+        return 0;
+    } else {
+        return call_proc(ctx, arg, FUSE_OPT_KEY_OPT, iso);
+    }
+}
+
+static int process_real_option_group(struct fuse_opt_context *ctx, char *opts)
+{
+    char *s = opts;
+    char *d = s;
+    int end = 0;
+
+    while (!end) {
+        if (*s == '\0') {
+            end = 1;
+        }
+        if (*s == ',' || end) {
+            int res;
+
+            *d = '\0';
+            res = process_gopt(ctx, opts, 1);
+            if (res == -1) {
+                return -1;
+            }
+            d = opts;
+        } else {
+            if (s[0] == '\\' && s[1] != '\0') {
+                s++;
+                if (s[0] >= '0' && s[0] <= '3' && s[1] >= '0' && s[1] <= '7' &&
+                    s[2] >= '0' && s[2] <= '7') {
+                    *d++ = (s[0] - '0') * 0100 + (s[1] - '0') * 0010 +
+                           (s[2] - '0');
+                    s += 2;
+                } else {
+                    *d++ = *s;
+                }
+            } else {
+                *d++ = *s;
+            }
+        }
+        s++;
+    }
+
+    return 0;
+}
+
+static int process_option_group(struct fuse_opt_context *ctx, const char *opts)
+{
+    int res;
+    char *copy = strdup(opts);
+
+    if (!copy) {
+        fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n");
+        return -1;
+    }
+    res = process_real_option_group(ctx, copy);
+    free(copy);
+    return res;
+}
+
+static int process_one(struct fuse_opt_context *ctx, const char *arg)
+{
+    if (ctx->nonopt || arg[0] != '-') {
+        return call_proc(ctx, arg, FUSE_OPT_KEY_NONOPT, 0);
+    } else if (arg[1] == 'o') {
+        if (arg[2]) {
+            return process_option_group(ctx, arg + 2);
+        } else {
+            if (next_arg(ctx, arg) == -1) {
+                return -1;
+            }
+
+            return process_option_group(ctx, ctx->argv[ctx->argctr]);
+        }
+    } else if (arg[1] == '-' && !arg[2]) {
+        if (add_arg(ctx, arg) == -1) {
+            return -1;
+        }
+        ctx->nonopt = ctx->outargs.argc;
+        return 0;
+    } else {
+        return process_gopt(ctx, arg, 0);
+    }
+}
+
+static int opt_parse(struct fuse_opt_context *ctx)
+{
+    if (ctx->argc) {
+        if (add_arg(ctx, ctx->argv[0]) == -1) {
+            return -1;
+        }
+    }
+
+    for (ctx->argctr = 1; ctx->argctr < ctx->argc; ctx->argctr++) {
+        if (process_one(ctx, ctx->argv[ctx->argctr]) == -1) {
+            return -1;
+        }
+    }
+
+    if (ctx->opts) {
+        if (fuse_opt_insert_arg(&ctx->outargs, 1, "-o") == -1 ||
+            fuse_opt_insert_arg(&ctx->outargs, 2, ctx->opts) == -1) {
+            return -1;
+        }
+    }
+
+    /* If option separator ("--") is the last argument, remove it */
+    if (ctx->nonopt && ctx->nonopt == ctx->outargs.argc &&
+        strcmp(ctx->outargs.argv[ctx->outargs.argc - 1], "--") == 0) {
+        free(ctx->outargs.argv[ctx->outargs.argc - 1]);
+        ctx->outargs.argv[--ctx->outargs.argc] = NULL;
+    }
+
+    return 0;
+}
+
+int fuse_opt_parse(struct fuse_args *args, void *data,
+                   const struct fuse_opt opts[], fuse_opt_proc_t proc)
+{
+    int res;
+    struct fuse_opt_context ctx = {
+        .data = data,
+        .opt = opts,
+        .proc = proc,
+    };
+
+    if (!args || !args->argv || !args->argc) {
+        return 0;
+    }
+
+    ctx.argc = args->argc;
+    ctx.argv = args->argv;
+
+    res = opt_parse(&ctx);
+    if (res != -1) {
+        struct fuse_args tmp = *args;
+        *args = ctx.outargs;
+        ctx.outargs = tmp;
+    }
+    free(ctx.opts);
+    fuse_opt_free_args(&ctx.outargs);
+    return res;
+}
diff --git a/tools/virtiofsd/fuse_opt.h b/tools/virtiofsd/fuse_opt.h
new file mode 100644
index 0000000000..8f59b4d301
--- /dev/null
+++ b/tools/virtiofsd/fuse_opt.h
@@ -0,0 +1,272 @@
+/*
+ * FUSE: Filesystem in Userspace
+ * Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
+ *
+ * This program can be distributed under the terms of the GNU LGPLv2.
+ * See the file COPYING.LIB.
+ */
+
+#ifndef FUSE_OPT_H_
+#define FUSE_OPT_H_
+
+/** @file
+ *
+ * This file defines the option parsing interface of FUSE
+ */
+
+/**
+ * Option description
+ *
+ * This structure describes a single option, and action associated
+ * with it, in case it matches.
+ *
+ * More than one such match may occur, in which case the action for
+ * each match is executed.
+ *
+ * There are three possible actions in case of a match:
+ *
+ * i) An integer (int or unsigned) variable determined by 'offset' is
+ *    set to 'value'
+ *
+ * ii) The processing function is called, with 'value' as the key
+ *
+ * iii) An integer (any) or string (char *) variable determined by
+ *    'offset' is set to the value of an option parameter
+ *
+ * 'offset' should normally be either set to
+ *
+ *  - 'offsetof(struct foo, member)'  actions i) and iii)
+ *
+ *  - -1                              action ii)
+ *
+ * The 'offsetof()' macro is defined in the <stddef.h> header.
+ *
+ * The template determines which options match, and also have an
+ * effect on the action.  Normally the action is either i) or ii), but
+ * if a format is present in the template, then action iii) is
+ * performed.
+ *
+ * The types of templates are:
+ *
+ * 1) "-x", "-foo", "--foo", "--foo-bar", etc. These match only
+ *   themselves.  Invalid values are "--" and anything beginning
+ *   with "-o"
+ *
+ * 2) "foo", "foo-bar", etc.  These match "-ofoo", "-ofoo-bar" or
+ *    the relevant option in a comma separated option list
+ *
+ * 3) "bar=", "--foo=", etc.  These are variations of 1) and 2)
+ *    which have a parameter
+ *
+ * 4) "bar=%s", "--foo=%lu", etc.  Same matching as above but perform
+ *    action iii).
+ *
+ * 5) "-x ", etc.  Matches either "-xparam" or "-x param" as
+ *    two separate arguments
+ *
+ * 6) "-x %s", etc.  Combination of 4) and 5)
+ *
+ * If the format is "%s", memory is allocated for the string unlike with
+ * scanf().  The previous value (if non-NULL) stored at the this location is
+ * freed.
+ */
+struct fuse_opt {
+    /** Matching template and optional parameter formatting */
+    const char *templ;
+
+    /**
+     * Offset of variable within 'data' parameter of fuse_opt_parse()
+     * or -1
+     */
+    unsigned long offset;
+
+    /**
+     * Value to set the variable to, or to be passed as 'key' to the
+     * processing function. Ignored if template has a format
+     */
+    int value;
+};
+
+/**
+ * Key option. In case of a match, the processing function will be
+ * called with the specified key.
+ */
+#define FUSE_OPT_KEY(templ, key) \
+    {                            \
+        templ, -1U, key          \
+    }
+
+/**
+ * Last option. An array of 'struct fuse_opt' must end with a NULL
+ * template value
+ */
+#define FUSE_OPT_END \
+    {                \
+        NULL, 0, 0   \
+    }
+
+/**
+ * Argument list
+ */
+struct fuse_args {
+    /** Argument count */
+    int argc;
+
+    /** Argument vector.  NULL terminated */
+    char **argv;
+
+    /** Is 'argv' allocated? */
+    int allocated;
+};
+
+/**
+ * Initializer for 'struct fuse_args'
+ */
+#define FUSE_ARGS_INIT(argc, argv) \
+    {                              \
+        argc, argv, 0              \
+    }
+
+/**
+ * Key value passed to the processing function if an option did not
+ * match any template
+ */
+#define FUSE_OPT_KEY_OPT -1
+
+/**
+ * Key value passed to the processing function for all non-options
+ *
+ * Non-options are the arguments beginning with a character other than
+ * '-' or all arguments after the special '--' option
+ */
+#define FUSE_OPT_KEY_NONOPT -2
+
+/**
+ * Special key value for options to keep
+ *
+ * Argument is not passed to processing function, but behave as if the
+ * processing function returned 1
+ */
+#define FUSE_OPT_KEY_KEEP -3
+
+/**
+ * Special key value for options to discard
+ *
+ * Argument is not passed to processing function, but behave as if the
+ * processing function returned zero
+ */
+#define FUSE_OPT_KEY_DISCARD -4
+
+/**
+ * Processing function
+ *
+ * This function is called if
+ *    - option did not match any 'struct fuse_opt'
+ *    - argument is a non-option
+ *    - option did match and offset was set to -1
+ *
+ * The 'arg' parameter will always contain the whole argument or
+ * option including the parameter if exists.  A two-argument option
+ * ("-x foo") is always converted to single argument option of the
+ * form "-xfoo" before this function is called.
+ *
+ * Options of the form '-ofoo' are passed to this function without the
+ * '-o' prefix.
+ *
+ * The return value of this function determines whether this argument
+ * is to be inserted into the output argument vector, or discarded.
+ *
+ * @param data is the user data passed to the fuse_opt_parse() function
+ * @param arg is the whole argument or option
+ * @param key determines why the processing function was called
+ * @param outargs the current output argument list
+ * @return -1 on error, 0 if arg is to be discarded, 1 if arg should be kept
+ */
+typedef int (*fuse_opt_proc_t)(void *data, const char *arg, int key,
+                               struct fuse_args *outargs);
+
+/**
+ * Option parsing function
+ *
+ * If 'args' was returned from a previous call to fuse_opt_parse() or
+ * it was constructed from
+ *
+ * A NULL 'args' is equivalent to an empty argument vector
+ *
+ * A NULL 'opts' is equivalent to an 'opts' array containing a single
+ * end marker
+ *
+ * A NULL 'proc' is equivalent to a processing function always
+ * returning '1'
+ *
+ * @param args is the input and output argument list
+ * @param data is the user data
+ * @param opts is the option description array
+ * @param proc is the processing function
+ * @return -1 on error, 0 on success
+ */
+int fuse_opt_parse(struct fuse_args *args, void *data,
+                   const struct fuse_opt opts[], fuse_opt_proc_t proc);
+
+/**
+ * Add an option to a comma separated option list
+ *
+ * @param opts is a pointer to an option list, may point to a NULL value
+ * @param opt is the option to add
+ * @return -1 on allocation error, 0 on success
+ */
+int fuse_opt_add_opt(char **opts, const char *opt);
+
+/**
+ * Add an option, escaping commas, to a comma separated option list
+ *
+ * @param opts is a pointer to an option list, may point to a NULL value
+ * @param opt is the option to add
+ * @return -1 on allocation error, 0 on success
+ */
+int fuse_opt_add_opt_escaped(char **opts, const char *opt);
+
+/**
+ * Add an argument to a NULL terminated argument vector
+ *
+ * @param args is the structure containing the current argument list
+ * @param arg is the new argument to add
+ * @return -1 on allocation error, 0 on success
+ */
+int fuse_opt_add_arg(struct fuse_args *args, const char *arg);
+
+/**
+ * Add an argument at the specified position in a NULL terminated
+ * argument vector
+ *
+ * Adds the argument to the N-th position.  This is useful for adding
+ * options at the beginning of the array which must not come after the
+ * special '--' option.
+ *
+ * @param args is the structure containing the current argument list
+ * @param pos is the position at which to add the argument
+ * @param arg is the new argument to add
+ * @return -1 on allocation error, 0 on success
+ */
+int fuse_opt_insert_arg(struct fuse_args *args, int pos, const char *arg);
+
+/**
+ * Free the contents of argument list
+ *
+ * The structure itself is not freed
+ *
+ * @param args is the structure containing the argument list
+ */
+void fuse_opt_free_args(struct fuse_args *args);
+
+
+/**
+ * Check if an option matches
+ *
+ * @param opts is the option description array
+ * @param opt is the option to match
+ * @return 1 if a match is found, 0 if not
+ */
+int fuse_opt_match(const struct fuse_opt opts[], const char *opt);
+
+#endif /* FUSE_OPT_H_ */
diff --git a/tools/virtiofsd/fuse_signals.c b/tools/virtiofsd/fuse_signals.c
new file mode 100644
index 0000000000..f18625b6e2
--- /dev/null
+++ b/tools/virtiofsd/fuse_signals.c
@@ -0,0 +1,98 @@
+/*
+ * FUSE: Filesystem in Userspace
+ * Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
+ *
+ * Utility functions for setting signal handlers.
+ *
+ * This program can be distributed under the terms of the GNU LGPLv2.
+ * See the file COPYING.LIB
+ */
+
+#include "qemu/osdep.h"
+#include "fuse_i.h"
+#include "fuse_lowlevel.h"
+
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+static struct fuse_session *fuse_instance;
+
+static void exit_handler(int sig)
+{
+    if (fuse_instance) {
+        fuse_session_exit(fuse_instance);
+        if (sig <= 0) {
+            fuse_log(FUSE_LOG_ERR, "assertion error: signal value <= 0\n");
+            abort();
+        }
+        fuse_instance->error = sig;
+    }
+}
+
+static void do_nothing(int sig)
+{
+    (void)sig;
+}
+
+static int set_one_signal_handler(int sig, void (*handler)(int), int remove)
+{
+    struct sigaction sa;
+    struct sigaction old_sa;
+
+    memset(&sa, 0, sizeof(struct sigaction));
+    sa.sa_handler = remove ? SIG_DFL : handler;
+    sigemptyset(&(sa.sa_mask));
+    sa.sa_flags = 0;
+
+    if (sigaction(sig, NULL, &old_sa) == -1) {
+        fuse_log(FUSE_LOG_ERR, "fuse: cannot get old signal handler: %s\n",
+                 strerror(errno));
+        return -1;
+    }
+
+    if (old_sa.sa_handler == (remove ? handler : SIG_DFL) &&
+        sigaction(sig, &sa, NULL) == -1) {
+        fuse_log(FUSE_LOG_ERR, "fuse: cannot set signal handler: %s\n",
+                 strerror(errno));
+        return -1;
+    }
+    return 0;
+}
+
+int fuse_set_signal_handlers(struct fuse_session *se)
+{
+    /*
+     * If we used SIG_IGN instead of the do_nothing function,
+     * then we would be unable to tell if we set SIG_IGN (and
+     * thus should reset to SIG_DFL in fuse_remove_signal_handlers)
+     * or if it was already set to SIG_IGN (and should be left
+     * untouched.
+     */
+    if (set_one_signal_handler(SIGHUP, exit_handler, 0) == -1 ||
+        set_one_signal_handler(SIGINT, exit_handler, 0) == -1 ||
+        set_one_signal_handler(SIGTERM, exit_handler, 0) == -1 ||
+        set_one_signal_handler(SIGPIPE, do_nothing, 0) == -1) {
+        return -1;
+    }
+
+    fuse_instance = se;
+    return 0;
+}
+
+void fuse_remove_signal_handlers(struct fuse_session *se)
+{
+    if (fuse_instance != se) {
+        fuse_log(FUSE_LOG_ERR,
+                 "fuse: fuse_remove_signal_handlers: unknown session\n");
+    } else {
+        fuse_instance = NULL;
+    }
+
+    set_one_signal_handler(SIGHUP, exit_handler, 1);
+    set_one_signal_handler(SIGINT, exit_handler, 1);
+    set_one_signal_handler(SIGTERM, exit_handler, 1);
+    set_one_signal_handler(SIGPIPE, do_nothing, 1);
+}
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
new file mode 100644
index 0000000000..80a6e929df
--- /dev/null
+++ b/tools/virtiofsd/fuse_virtio.c
@@ -0,0 +1,986 @@
+/*
+ * virtio-fs glue for FUSE
+ * Copyright (C) 2018 Red Hat, Inc. and/or its affiliates
+ *
+ * Authors:
+ *   Dave Gilbert  <dgilbert@redhat.com>
+ *
+ * Implements the glue between libfuse and libvhost-user
+ *
+ * This program can be distributed under the terms of the GNU LGPLv2.
+ * See the file COPYING.LIB
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/iov.h"
+#include "qapi/error.h"
+#include "fuse_i.h"
+#include "standard-headers/linux/fuse.h"
+#include "fuse_misc.h"
+#include "fuse_opt.h"
+#include "fuse_virtio.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <glib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/eventfd.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#include "contrib/libvhost-user/libvhost-user.h"
+
+struct fv_VuDev;
+struct fv_QueueInfo {
+    pthread_t thread;
+    /*
+     * This lock protects the VuVirtq preventing races between
+     * fv_queue_thread() and fv_queue_worker().
+     */
+    pthread_mutex_t vq_lock;
+
+    struct fv_VuDev *virtio_dev;
+
+    /* Our queue index, corresponds to array position */
+    int qidx;
+    int kick_fd;
+    int kill_fd; /* For killing the thread */
+};
+
+/* A FUSE request */
+typedef struct {
+    VuVirtqElement elem;
+    struct fuse_chan ch;
+
+    /* Used to complete requests that involve no reply */
+    bool reply_sent;
+} FVRequest;
+
+/*
+ * We pass the dev element into libvhost-user
+ * and then use it to get back to the outer
+ * container for other data.
+ */
+struct fv_VuDev {
+    VuDev dev;
+    struct fuse_session *se;
+
+    /*
+     * Either handle virtqueues or vhost-user protocol messages.  Don't do
+     * both at the same time since that could lead to race conditions if
+     * virtqueues or memory tables change while another thread is accessing
+     * them.
+     *
+     * The assumptions are:
+     * 1. fv_queue_thread() reads/writes to virtqueues and only reads VuDev.
+     * 2. virtio_loop() reads/writes virtqueues and VuDev.
+     */
+    pthread_rwlock_t vu_dispatch_rwlock;
+
+    /*
+     * The following pair of fields are only accessed in the main
+     * virtio_loop
+     */
+    size_t nqueues;
+    struct fv_QueueInfo **qi;
+};
+
+/* From spec */
+struct virtio_fs_config {
+    char tag[36];
+    uint32_t num_queues;
+};
+
+/* Callback from libvhost-user */
+static uint64_t fv_get_features(VuDev *dev)
+{
+    return 1ULL << VIRTIO_F_VERSION_1;
+}
+
+/* Callback from libvhost-user */
+static void fv_set_features(VuDev *dev, uint64_t features)
+{
+}
+
+/*
+ * Callback from libvhost-user if there's a new fd we're supposed to listen
+ * to, typically a queue kick?
+ */
+static void fv_set_watch(VuDev *dev, int fd, int condition, vu_watch_cb cb,
+                         void *data)
+{
+    fuse_log(FUSE_LOG_WARNING, "%s: TODO! fd=%d\n", __func__, fd);
+}
+
+/*
+ * Callback from libvhost-user if we're no longer supposed to listen on an fd
+ */
+static void fv_remove_watch(VuDev *dev, int fd)
+{
+    fuse_log(FUSE_LOG_WARNING, "%s: TODO! fd=%d\n", __func__, fd);
+}
+
+/* Callback from libvhost-user to panic */
+static void fv_panic(VuDev *dev, const char *err)
+{
+    fuse_log(FUSE_LOG_ERR, "%s: libvhost-user: %s\n", __func__, err);
+    /* TODO: Allow reconnects?? */
+    exit(EXIT_FAILURE);
+}
+
+/*
+ * Copy from an iovec into a fuse_buf (memory only)
+ * Caller must ensure there is space
+ */
+static void copy_from_iov(struct fuse_buf *buf, size_t out_num,
+                          const struct iovec *out_sg)
+{
+    void *dest = buf->mem;
+
+    while (out_num) {
+        size_t onelen = out_sg->iov_len;
+        memcpy(dest, out_sg->iov_base, onelen);
+        dest += onelen;
+        out_sg++;
+        out_num--;
+    }
+}
+
+/*
+ * Copy from one iov to another, the given number of bytes
+ * The caller must have checked sizes.
+ */
+static void copy_iov(struct iovec *src_iov, int src_count,
+                     struct iovec *dst_iov, int dst_count, size_t to_copy)
+{
+    size_t dst_offset = 0;
+    /* Outer loop copies 'src' elements */
+    while (to_copy) {
+        assert(src_count);
+        size_t src_len = src_iov[0].iov_len;
+        size_t src_offset = 0;
+
+        if (src_len > to_copy) {
+            src_len = to_copy;
+        }
+        /* Inner loop copies contents of one 'src' to maybe multiple dst. */
+        while (src_len) {
+            assert(dst_count);
+            size_t dst_len = dst_iov[0].iov_len - dst_offset;
+            if (dst_len > src_len) {
+                dst_len = src_len;
+            }
+
+            memcpy(dst_iov[0].iov_base + dst_offset,
+                   src_iov[0].iov_base + src_offset, dst_len);
+            src_len -= dst_len;
+            to_copy -= dst_len;
+            src_offset += dst_len;
+            dst_offset += dst_len;
+
+            assert(dst_offset <= dst_iov[0].iov_len);
+            if (dst_offset == dst_iov[0].iov_len) {
+                dst_offset = 0;
+                dst_iov++;
+                dst_count--;
+            }
+        }
+        src_iov++;
+        src_count--;
+    }
+}
+
+/*
+ * Called back by ll whenever it wants to send a reply/message back
+ * The 1st element of the iov starts with the fuse_out_header
+ * 'unique'==0 means it's a notify message.
+ */
+int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch,
+                    struct iovec *iov, int count)
+{
+    FVRequest *req = container_of(ch, FVRequest, ch);
+    struct fv_QueueInfo *qi = ch->qi;
+    VuDev *dev = &se->virtio_dev->dev;
+    VuVirtq *q = vu_get_queue(dev, qi->qidx);
+    VuVirtqElement *elem = &req->elem;
+    int ret = 0;
+
+    assert(count >= 1);
+    assert(iov[0].iov_len >= sizeof(struct fuse_out_header));
+
+    struct fuse_out_header *out = iov[0].iov_base;
+    /* TODO: Endianness! */
+
+    size_t tosend_len = iov_size(iov, count);
+
+    /* unique == 0 is notification, which we don't support */
+    assert(out->unique);
+    assert(!req->reply_sent);
+
+    /* The 'in' part of the elem is to qemu */
+    unsigned int in_num = elem->in_num;
+    struct iovec *in_sg = elem->in_sg;
+    size_t in_len = iov_size(in_sg, in_num);
+    fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n",
+             __func__, elem->index, in_num, in_len);
+
+    /*
+     * The elem should have room for a 'fuse_out_header' (out from fuse)
+     * plus the data based on the len in the header.
+     */
+    if (in_len < sizeof(struct fuse_out_header)) {
+        fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n",
+                 __func__, elem->index);
+        ret = -E2BIG;
+        goto err;
+    }
+    if (in_len < tosend_len) {
+        fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n",
+                 __func__, elem->index, tosend_len);
+        ret = -E2BIG;
+        goto err;
+    }
+
+    copy_iov(iov, count, in_sg, in_num, tosend_len);
+
+    pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock);
+    pthread_mutex_lock(&qi->vq_lock);
+    vu_queue_push(dev, q, elem, tosend_len);
+    vu_queue_notify(dev, q);
+    pthread_mutex_unlock(&qi->vq_lock);
+    pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock);
+
+    req->reply_sent = true;
+
+err:
+    return ret;
+}
+
+/*
+ * Callback from fuse_send_data_iov_* when it's virtio and the buffer
+ * is a single FD with FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK
+ * We need send the iov and then the buffer.
+ * Return 0 on success
+ */
+int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
+                         struct iovec *iov, int count, struct fuse_bufvec *buf,
+                         size_t len)
+{
+    FVRequest *req = container_of(ch, FVRequest, ch);
+    struct fv_QueueInfo *qi = ch->qi;
+    VuDev *dev = &se->virtio_dev->dev;
+    VuVirtq *q = vu_get_queue(dev, qi->qidx);
+    VuVirtqElement *elem = &req->elem;
+    int ret = 0;
+
+    assert(count >= 1);
+    assert(iov[0].iov_len >= sizeof(struct fuse_out_header));
+
+    struct fuse_out_header *out = iov[0].iov_base;
+    /* TODO: Endianness! */
+
+    size_t iov_len = iov_size(iov, count);
+    size_t tosend_len = iov_len + len;
+
+    out->len = tosend_len;
+
+    fuse_log(FUSE_LOG_DEBUG, "%s: count=%d len=%zd iov_len=%zd\n", __func__,
+             count, len, iov_len);
+
+    /* unique == 0 is notification which we don't support */
+    assert(out->unique);
+
+    assert(!req->reply_sent);
+
+    /* The 'in' part of the elem is to qemu */
+    unsigned int in_num = elem->in_num;
+    struct iovec *in_sg = elem->in_sg;
+    size_t in_len = iov_size(in_sg, in_num);
+    fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n",
+             __func__, elem->index, in_num, in_len);
+
+    /*
+     * The elem should have room for a 'fuse_out_header' (out from fuse)
+     * plus the data based on the len in the header.
+     */
+    if (in_len < sizeof(struct fuse_out_header)) {
+        fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n",
+                 __func__, elem->index);
+        ret = E2BIG;
+        goto err;
+    }
+    if (in_len < tosend_len) {
+        fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n",
+                 __func__, elem->index, tosend_len);
+        ret = E2BIG;
+        goto err;
+    }
+
+    /* TODO: Limit to 'len' */
+
+    /* First copy the header data from iov->in_sg */
+    copy_iov(iov, count, in_sg, in_num, iov_len);
+
+    /*
+     * Build a copy of the the in_sg iov so we can skip bits in it,
+     * including changing the offsets
+     */
+    struct iovec *in_sg_cpy = calloc(sizeof(struct iovec), in_num);
+    assert(in_sg_cpy);
+    memcpy(in_sg_cpy, in_sg, sizeof(struct iovec) * in_num);
+    /* These get updated as we skip */
+    struct iovec *in_sg_ptr = in_sg_cpy;
+    int in_sg_cpy_count = in_num;
+
+    /* skip over parts of in_sg that contained the header iov */
+    size_t skip_size = iov_len;
+
+    size_t in_sg_left = 0;
+    do {
+        while (skip_size != 0 && in_sg_cpy_count) {
+            if (skip_size >= in_sg_ptr[0].iov_len) {
+                skip_size -= in_sg_ptr[0].iov_len;
+                in_sg_ptr++;
+                in_sg_cpy_count--;
+            } else {
+                in_sg_ptr[0].iov_len -= skip_size;
+                in_sg_ptr[0].iov_base += skip_size;
+                break;
+            }
+        }
+
+        int i;
+        for (i = 0, in_sg_left = 0; i < in_sg_cpy_count; i++) {
+            in_sg_left += in_sg_ptr[i].iov_len;
+        }
+        fuse_log(FUSE_LOG_DEBUG,
+                 "%s: after skip skip_size=%zd in_sg_cpy_count=%d "
+                 "in_sg_left=%zd\n",
+                 __func__, skip_size, in_sg_cpy_count, in_sg_left);
+        ret = preadv(buf->buf[0].fd, in_sg_ptr, in_sg_cpy_count,
+                     buf->buf[0].pos);
+
+        if (ret == -1) {
+            ret = errno;
+            fuse_log(FUSE_LOG_DEBUG, "%s: preadv failed (%m) len=%zd\n",
+                     __func__, len);
+            free(in_sg_cpy);
+            goto err;
+        }
+        fuse_log(FUSE_LOG_DEBUG, "%s: preadv ret=%d len=%zd\n", __func__,
+                 ret, len);
+        if (ret < len && ret) {
+            fuse_log(FUSE_LOG_DEBUG, "%s: ret < len\n", __func__);
+            /* Skip over this much next time around */
+            skip_size = ret;
+            buf->buf[0].pos += ret;
+            len -= ret;
+
+            /* Lets do another read */
+            continue;
+        }
+        if (!ret) {
+            /* EOF case? */
+            fuse_log(FUSE_LOG_DEBUG, "%s: !ret in_sg_left=%zd\n", __func__,
+                     in_sg_left);
+            break;
+        }
+        if (ret != len) {
+            fuse_log(FUSE_LOG_DEBUG, "%s: ret!=len\n", __func__);
+            ret = EIO;
+            free(in_sg_cpy);
+            goto err;
+        }
+        in_sg_left -= ret;
+        len -= ret;
+    } while (in_sg_left);
+    free(in_sg_cpy);
+
+    /* Need to fix out->len on EOF */
+    if (len) {
+        struct fuse_out_header *out_sg = in_sg[0].iov_base;
+
+        tosend_len -= len;
+        out_sg->len = tosend_len;
+    }
+
+    ret = 0;
+
+    pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock);
+    pthread_mutex_lock(&qi->vq_lock);
+    vu_queue_push(dev, q, elem, tosend_len);
+    vu_queue_notify(dev, q);
+    pthread_mutex_unlock(&qi->vq_lock);
+    pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock);
+
+err:
+    if (ret == 0) {
+        req->reply_sent = true;
+    }
+
+    return ret;
+}
+
+/* Process one FVRequest in a thread pool */
+static void fv_queue_worker(gpointer data, gpointer user_data)
+{
+    struct fv_QueueInfo *qi = user_data;
+    struct fuse_session *se = qi->virtio_dev->se;
+    struct VuDev *dev = &qi->virtio_dev->dev;
+    FVRequest *req = data;
+    VuVirtqElement *elem = &req->elem;
+    struct fuse_buf fbuf = {};
+    bool allocated_bufv = false;
+    struct fuse_bufvec bufv;
+    struct fuse_bufvec *pbufv;
+
+    assert(se->bufsize > sizeof(struct fuse_in_header));
+
+    /*
+     * An element contains one request and the space to send our response
+     * They're spread over multiple descriptors in a scatter/gather set
+     * and we can't trust the guest to keep them still; so copy in/out.
+     */
+    fbuf.mem = malloc(se->bufsize);
+    assert(fbuf.mem);
+
+    fuse_mutex_init(&req->ch.lock);
+    req->ch.fd = -1;
+    req->ch.qi = qi;
+
+    /* The 'out' part of the elem is from qemu */
+    unsigned int out_num = elem->out_num;
+    struct iovec *out_sg = elem->out_sg;
+    size_t out_len = iov_size(out_sg, out_num);
+    fuse_log(FUSE_LOG_DEBUG,
+             "%s: elem %d: with %d out desc of length %zd\n",
+             __func__, elem->index, out_num, out_len);
+
+    /*
+     * The elem should contain a 'fuse_in_header' (in to fuse)
+     * plus the data based on the len in the header.
+     */
+    if (out_len < sizeof(struct fuse_in_header)) {
+        fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for in_header\n",
+                 __func__, elem->index);
+        assert(0); /* TODO */
+    }
+    if (out_len > se->bufsize) {
+        fuse_log(FUSE_LOG_ERR, "%s: elem %d too large for buffer\n", __func__,
+                 elem->index);
+        assert(0); /* TODO */
+    }
+    /* Copy just the first element and look at it */
+    copy_from_iov(&fbuf, 1, out_sg);
+
+    pbufv = NULL; /* Compiler thinks an unitialised path */
+    if (out_num > 2 &&
+        out_sg[0].iov_len == sizeof(struct fuse_in_header) &&
+        ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE &&
+        out_sg[1].iov_len == sizeof(struct fuse_write_in)) {
+        /*
+         * For a write we don't actually need to copy the
+         * data, we can just do it straight out of guest memory
+         * but we must still copy the headers in case the guest
+         * was nasty and changed them while we were using them.
+         */
+        fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__);
+
+        /* copy the fuse_write_in header afte rthe fuse_in_header */
+        fbuf.mem += out_sg->iov_len;
+        copy_from_iov(&fbuf, 1, out_sg + 1);
+        fbuf.mem -= out_sg->iov_len;
+        fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len;
+
+        /* Allocate the bufv, with space for the rest of the iov */
+        pbufv = malloc(sizeof(struct fuse_bufvec) +
+                       sizeof(struct fuse_buf) * (out_num - 2));
+        if (!pbufv) {
+            fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n",
+                    __func__);
+            goto out;
+        }
+
+        allocated_bufv = true;
+        pbufv->count = 1;
+        pbufv->buf[0] = fbuf;
+
+        size_t iovindex, pbufvindex;
+        iovindex = 2; /* 2 headers, separate iovs */
+        pbufvindex = 1; /* 2 headers, 1 fusebuf */
+
+        for (; iovindex < out_num; iovindex++, pbufvindex++) {
+            pbufv->count++;
+            pbufv->buf[pbufvindex].pos = ~0; /* Dummy */
+            pbufv->buf[pbufvindex].flags = 0;
+            pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base;
+            pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len;
+        }
+    } else {
+        /* Normal (non fast write) path */
+
+        /* Copy the rest of the buffer */
+        fbuf.mem += out_sg->iov_len;
+        copy_from_iov(&fbuf, out_num - 1, out_sg + 1);
+        fbuf.mem -= out_sg->iov_len;
+        fbuf.size = out_len;
+
+        /* TODO! Endianness of header */
+
+        /* TODO: Add checks for fuse_session_exited */
+        bufv.buf[0] = fbuf;
+        bufv.count = 1;
+        pbufv = &bufv;
+    }
+    pbufv->idx = 0;
+    pbufv->off = 0;
+    fuse_session_process_buf_int(se, pbufv, &req->ch);
+
+out:
+    if (allocated_bufv) {
+        free(pbufv);
+    }
+
+    /* If the request has no reply, still recycle the virtqueue element */
+    if (!req->reply_sent) {
+        struct VuVirtq *q = vu_get_queue(dev, qi->qidx);
+
+        fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", __func__,
+                 elem->index);
+
+        pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock);
+        pthread_mutex_lock(&qi->vq_lock);
+        vu_queue_push(dev, q, elem, 0);
+        vu_queue_notify(dev, q);
+        pthread_mutex_unlock(&qi->vq_lock);
+        pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock);
+    }
+
+    pthread_mutex_destroy(&req->ch.lock);
+    free(fbuf.mem);
+    free(req);
+}
+
+/* Thread function for individual queues, created when a queue is 'started' */
+static void *fv_queue_thread(void *opaque)
+{
+    struct fv_QueueInfo *qi = opaque;
+    struct VuDev *dev = &qi->virtio_dev->dev;
+    struct VuVirtq *q = vu_get_queue(dev, qi->qidx);
+    struct fuse_session *se = qi->virtio_dev->se;
+    GThreadPool *pool;
+
+    pool = g_thread_pool_new(fv_queue_worker, qi, se->thread_pool_size, TRUE,
+                             NULL);
+    if (!pool) {
+        fuse_log(FUSE_LOG_ERR, "%s: g_thread_pool_new failed\n", __func__);
+        return NULL;
+    }
+
+    fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__,
+             qi->qidx, qi->kick_fd);
+    while (1) {
+        struct pollfd pf[2];
+        int ret;
+
+        pf[0].fd = qi->kick_fd;
+        pf[0].events = POLLIN;
+        pf[0].revents = 0;
+        pf[1].fd = qi->kill_fd;
+        pf[1].events = POLLIN;
+        pf[1].revents = 0;
+
+        fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for Queue %d event\n", __func__,
+                 qi->qidx);
+        int poll_res = ppoll(pf, 2, NULL, NULL);
+
+        if (poll_res == -1) {
+            if (errno == EINTR) {
+                fuse_log(FUSE_LOG_INFO, "%s: ppoll interrupted, going around\n",
+                         __func__);
+                continue;
+            }
+            fuse_log(FUSE_LOG_ERR, "fv_queue_thread ppoll: %m\n");
+            break;
+        }
+        assert(poll_res >= 1);
+        if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) {
+            fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x Queue %d\n",
+                     __func__, pf[0].revents, qi->qidx);
+            break;
+        }
+        if (pf[1].revents & (POLLERR | POLLHUP | POLLNVAL)) {
+            fuse_log(FUSE_LOG_ERR,
+                     "%s: Unexpected poll revents %x Queue %d killfd\n",
+                     __func__, pf[1].revents, qi->qidx);
+            break;
+        }
+        if (pf[1].revents) {
+            fuse_log(FUSE_LOG_INFO, "%s: kill event on queue %d - quitting\n",
+                     __func__, qi->qidx);
+            break;
+        }
+        assert(pf[0].revents & POLLIN);
+        fuse_log(FUSE_LOG_DEBUG, "%s: Got queue event on Queue %d\n", __func__,
+                 qi->qidx);
+
+        eventfd_t evalue;
+        if (eventfd_read(qi->kick_fd, &evalue)) {
+            fuse_log(FUSE_LOG_ERR, "Eventfd_read for queue: %m\n");
+            break;
+        }
+        /* Mutual exclusion with virtio_loop() */
+        ret = pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock);
+        assert(ret == 0); /* there is no possible error case */
+        pthread_mutex_lock(&qi->vq_lock);
+        /* out is from guest, in is too guest */
+        unsigned int in_bytes, out_bytes;
+        vu_queue_get_avail_bytes(dev, q, &in_bytes, &out_bytes, ~0, ~0);
+
+        fuse_log(FUSE_LOG_DEBUG,
+                 "%s: Queue %d gave evalue: %zx available: in: %u out: %u\n",
+                 __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes);
+
+        while (1) {
+            FVRequest *req = vu_queue_pop(dev, q, sizeof(FVRequest));
+            if (!req) {
+                break;
+            }
+
+            req->reply_sent = false;
+
+            g_thread_pool_push(pool, req, NULL);
+        }
+
+        pthread_mutex_unlock(&qi->vq_lock);
+        pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock);
+    }
+
+    g_thread_pool_free(pool, FALSE, TRUE);
+
+    return NULL;
+}
+
+static void fv_queue_cleanup_thread(struct fv_VuDev *vud, int qidx)
+{
+    int ret;
+    struct fv_QueueInfo *ourqi;
+
+    assert(qidx < vud->nqueues);
+    ourqi = vud->qi[qidx];
+
+    /* Kill the thread */
+    if (eventfd_write(ourqi->kill_fd, 1)) {
+        fuse_log(FUSE_LOG_ERR, "Eventfd_write for queue %d: %s\n",
+                 qidx, strerror(errno));
+    }
+    ret = pthread_join(ourqi->thread, NULL);
+    if (ret) {
+        fuse_log(FUSE_LOG_ERR, "%s: Failed to join thread idx %d err %d\n",
+                 __func__, qidx, ret);
+    }
+    pthread_mutex_destroy(&ourqi->vq_lock);
+    close(ourqi->kill_fd);
+    ourqi->kick_fd = -1;
+    free(vud->qi[qidx]);
+    vud->qi[qidx] = NULL;
+}
+
+/* Callback from libvhost-user on start or stop of a queue */
+static void fv_queue_set_started(VuDev *dev, int qidx, bool started)
+{
+    struct fv_VuDev *vud = container_of(dev, struct fv_VuDev, dev);
+    struct fv_QueueInfo *ourqi;
+
+    fuse_log(FUSE_LOG_INFO, "%s: qidx=%d started=%d\n", __func__, qidx,
+             started);
+    assert(qidx >= 0);
+
+    /*
+     * Ignore additional request queues for now.  passthrough_ll.c must be
+     * audited for thread-safety issues first.  It was written with a
+     * well-behaved client in mind and may not protect against all types of
+     * races yet.
+     */
+    if (qidx > 1) {
+        fuse_log(FUSE_LOG_ERR,
+                 "%s: multiple request queues not yet implemented, please only "
+                 "configure 1 request queue\n",
+                 __func__);
+        exit(EXIT_FAILURE);
+    }
+
+    if (started) {
+        /* Fire up a thread to watch this queue */
+        if (qidx >= vud->nqueues) {
+            vud->qi = realloc(vud->qi, (qidx + 1) * sizeof(vud->qi[0]));
+            assert(vud->qi);
+            memset(vud->qi + vud->nqueues, 0,
+                   sizeof(vud->qi[0]) * (1 + (qidx - vud->nqueues)));
+            vud->nqueues = qidx + 1;
+        }
+        if (!vud->qi[qidx]) {
+            vud->qi[qidx] = calloc(sizeof(struct fv_QueueInfo), 1);
+            assert(vud->qi[qidx]);
+            vud->qi[qidx]->virtio_dev = vud;
+            vud->qi[qidx]->qidx = qidx;
+        } else {
+            /* Shouldn't have been started */
+            assert(vud->qi[qidx]->kick_fd == -1);
+        }
+        ourqi = vud->qi[qidx];
+        ourqi->kick_fd = dev->vq[qidx].kick_fd;
+
+        ourqi->kill_fd = eventfd(0, EFD_CLOEXEC | EFD_SEMAPHORE);
+        assert(ourqi->kill_fd != -1);
+        pthread_mutex_init(&ourqi->vq_lock, NULL);
+
+        if (pthread_create(&ourqi->thread, NULL, fv_queue_thread, ourqi)) {
+            fuse_log(FUSE_LOG_ERR, "%s: Failed to create thread for queue %d\n",
+                     __func__, qidx);
+            assert(0);
+        }
+    } else {
+        fv_queue_cleanup_thread(vud, qidx);
+    }
+}
+
+static bool fv_queue_order(VuDev *dev, int qidx)
+{
+    return false;
+}
+
+static const VuDevIface fv_iface = {
+    .get_features = fv_get_features,
+    .set_features = fv_set_features,
+
+    /* Don't need process message, we've not got any at vhost-user level */
+    .queue_set_started = fv_queue_set_started,
+
+    .queue_is_processed_in_order = fv_queue_order,
+};
+
+/*
+ * Main loop; this mostly deals with events on the vhost-user
+ * socket itself, and not actual fuse data.
+ */
+int virtio_loop(struct fuse_session *se)
+{
+    fuse_log(FUSE_LOG_INFO, "%s: Entry\n", __func__);
+
+    while (!fuse_session_exited(se)) {
+        struct pollfd pf[1];
+        bool ok;
+        int ret;
+        pf[0].fd = se->vu_socketfd;
+        pf[0].events = POLLIN;
+        pf[0].revents = 0;
+
+        fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for VU event\n", __func__);
+        int poll_res = ppoll(pf, 1, NULL, NULL);
+
+        if (poll_res == -1) {
+            if (errno == EINTR) {
+                fuse_log(FUSE_LOG_INFO, "%s: ppoll interrupted, going around\n",
+                         __func__);
+                continue;
+            }
+            fuse_log(FUSE_LOG_ERR, "virtio_loop ppoll: %m\n");
+            break;
+        }
+        assert(poll_res == 1);
+        if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) {
+            fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x\n", __func__,
+                     pf[0].revents);
+            break;
+        }
+        assert(pf[0].revents & POLLIN);
+        fuse_log(FUSE_LOG_DEBUG, "%s: Got VU event\n", __func__);
+        /* Mutual exclusion with fv_queue_thread() */
+        ret = pthread_rwlock_wrlock(&se->virtio_dev->vu_dispatch_rwlock);
+        assert(ret == 0); /* there is no possible error case */
+
+        ok = vu_dispatch(&se->virtio_dev->dev);
+
+        pthread_rwlock_unlock(&se->virtio_dev->vu_dispatch_rwlock);
+
+        if (!ok) {
+            fuse_log(FUSE_LOG_ERR, "%s: vu_dispatch failed\n", __func__);
+            break;
+        }
+    }
+
+    /*
+     * Make sure all fv_queue_thread()s quit on exit, as we're about to
+     * free virtio dev and fuse session, no one should access them anymore.
+     */
+    for (int i = 0; i < se->virtio_dev->nqueues; i++) {
+        if (!se->virtio_dev->qi[i]) {
+            continue;
+        }
+
+        fuse_log(FUSE_LOG_INFO, "%s: Stopping queue %d thread\n", __func__, i);
+        fv_queue_cleanup_thread(se->virtio_dev, i);
+    }
+
+    fuse_log(FUSE_LOG_INFO, "%s: Exit\n", __func__);
+
+    return 0;
+}
+
+static void strreplace(char *s, char old, char new)
+{
+    for (; *s; ++s) {
+        if (*s == old) {
+            *s = new;
+        }
+    }
+}
+
+static bool fv_socket_lock(struct fuse_session *se)
+{
+    g_autofree gchar *sk_name = NULL;
+    g_autofree gchar *pidfile = NULL;
+    g_autofree gchar *dir = NULL;
+    Error *local_err = NULL;
+
+    dir = qemu_get_local_state_pathname("run/virtiofsd");
+
+    if (g_mkdir_with_parents(dir, S_IRWXU) < 0) {
+        fuse_log(FUSE_LOG_ERR, "%s: Failed to create directory %s: %s",
+                 __func__, dir, strerror(errno));
+        return false;
+    }
+
+    sk_name = g_strdup(se->vu_socket_path);
+    strreplace(sk_name, '/', '.');
+    pidfile = g_strdup_printf("%s/%s.pid", dir, sk_name);
+
+    if (!qemu_write_pidfile(pidfile, &local_err)) {
+        error_report_err(local_err);
+        return false;
+    }
+
+    return true;
+}
+
+static int fv_create_listen_socket(struct fuse_session *se)
+{
+    struct sockaddr_un un;
+    mode_t old_umask;
+
+    /* Nothing to do if fd is already initialized */
+    if (se->vu_listen_fd >= 0) {
+        return 0;
+    }
+
+    if (strlen(se->vu_socket_path) >= sizeof(un.sun_path)) {
+        fuse_log(FUSE_LOG_ERR, "Socket path too long\n");
+        return -1;
+    }
+
+    if (!strlen(se->vu_socket_path)) {
+        fuse_log(FUSE_LOG_ERR, "Socket path is empty\n");
+        return -1;
+    }
+
+    /* Check the vu_socket_path is already used */
+    if (!fv_socket_lock(se)) {
+        return -1;
+    }
+
+    /*
+     * Create the Unix socket to communicate with qemu
+     * based on QEMU's vhost-user-bridge
+     */
+    unlink(se->vu_socket_path);
+    strcpy(un.sun_path, se->vu_socket_path);
+    size_t addr_len = sizeof(un);
+
+    int listen_sock = socket(AF_UNIX, SOCK_STREAM, 0);
+    if (listen_sock == -1) {
+        fuse_log(FUSE_LOG_ERR, "vhost socket creation: %m\n");
+        return -1;
+    }
+    un.sun_family = AF_UNIX;
+
+    /*
+     * Unfortunately bind doesn't let you set the mask on the socket,
+     * so set umask to 077 and restore it later.
+     */
+    old_umask = umask(0077);
+    if (bind(listen_sock, (struct sockaddr *)&un, addr_len) == -1) {
+        fuse_log(FUSE_LOG_ERR, "vhost socket bind: %m\n");
+        umask(old_umask);
+        return -1;
+    }
+    umask(old_umask);
+
+    if (listen(listen_sock, 1) == -1) {
+        fuse_log(FUSE_LOG_ERR, "vhost socket listen: %m\n");
+        return -1;
+    }
+
+    se->vu_listen_fd = listen_sock;
+    return 0;
+}
+
+int virtio_session_mount(struct fuse_session *se)
+{
+    int ret;
+
+    ret = fv_create_listen_socket(se);
+    if (ret < 0) {
+        return ret;
+    }
+
+    se->fd = -1;
+
+    fuse_log(FUSE_LOG_INFO, "%s: Waiting for vhost-user socket connection...\n",
+             __func__);
+    int data_sock = accept(se->vu_listen_fd, NULL, NULL);
+    if (data_sock == -1) {
+        fuse_log(FUSE_LOG_ERR, "vhost socket accept: %m\n");
+        close(se->vu_listen_fd);
+        return -1;
+    }
+    close(se->vu_listen_fd);
+    se->vu_listen_fd = -1;
+    fuse_log(FUSE_LOG_INFO, "%s: Received vhost-user socket connection\n",
+             __func__);
+
+    /* TODO: Some cleanup/deallocation! */
+    se->virtio_dev = calloc(sizeof(struct fv_VuDev), 1);
+    if (!se->virtio_dev) {
+        fuse_log(FUSE_LOG_ERR, "%s: virtio_dev calloc failed\n", __func__);
+        close(data_sock);
+        return -1;
+    }
+
+    se->vu_socketfd = data_sock;
+    se->virtio_dev->se = se;
+    pthread_rwlock_init(&se->virtio_dev->vu_dispatch_rwlock, NULL);
+    vu_init(&se->virtio_dev->dev, 2, se->vu_socketfd, fv_panic, fv_set_watch,
+            fv_remove_watch, &fv_iface);
+
+    return 0;
+}
+
+void virtio_session_close(struct fuse_session *se)
+{
+    close(se->vu_socketfd);
+
+    if (!se->virtio_dev) {
+        return;
+    }
+
+    free(se->virtio_dev->qi);
+    pthread_rwlock_destroy(&se->virtio_dev->vu_dispatch_rwlock);
+    free(se->virtio_dev);
+    se->virtio_dev = NULL;
+}
diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h
new file mode 100644
index 0000000000..111684032c
--- /dev/null
+++ b/tools/virtiofsd/fuse_virtio.h
@@ -0,0 +1,33 @@
+/*
+ * virtio-fs glue for FUSE
+ * Copyright (C) 2018 Red Hat, Inc. and/or its affiliates
+ *
+ * Authors:
+ *   Dave Gilbert  <dgilbert@redhat.com>
+ *
+ * Implements the glue between libfuse and libvhost-user
+ *
+ * This program can be distributed under the terms of the GNU LGPLv2.
+ *  See the file COPYING.LIB
+ */
+
+#ifndef FUSE_VIRTIO_H
+#define FUSE_VIRTIO_H
+
+#include "fuse_i.h"
+
+struct fuse_session;
+
+int virtio_session_mount(struct fuse_session *se);
+void virtio_session_close(struct fuse_session *se);
+int virtio_loop(struct fuse_session *se);
+
+
+int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch,
+                    struct iovec *iov, int count);
+
+int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
+                         struct iovec *iov, int count,
+                         struct fuse_bufvec *buf, size_t len);
+
+#endif
diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
new file mode 100644
index 0000000000..0801cf752c
--- /dev/null
+++ b/tools/virtiofsd/helper.c
@@ -0,0 +1,349 @@
+/*
+ * FUSE: Filesystem in Userspace
+ * Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
+ *
+ * Helper functions to create (simple) standalone programs. With the
+ * aid of these functions it should be possible to create full FUSE
+ * file system by implementing nothing but the request handlers.
+
+ * This program can be distributed under the terms of the GNU LGPLv2.
+ * See the file COPYING.LIB.
+ */
+
+#include "qemu/osdep.h"
+#include "fuse_i.h"
+#include "fuse_lowlevel.h"
+#include "fuse_misc.h"
+#include "fuse_opt.h"
+
+#include <errno.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/param.h>
+#include <unistd.h>
+
+#define FUSE_HELPER_OPT(t, p)                       \
+    {                                               \
+        t, offsetof(struct fuse_cmdline_opts, p), 1 \
+    }
+#define FUSE_HELPER_OPT_VALUE(t, p, v)              \
+    {                                               \
+        t, offsetof(struct fuse_cmdline_opts, p), v \
+    }
+
+static const struct fuse_opt fuse_helper_opts[] = {
+    FUSE_HELPER_OPT("-h", show_help),
+    FUSE_HELPER_OPT("--help", show_help),
+    FUSE_HELPER_OPT("-V", show_version),
+    FUSE_HELPER_OPT("--version", show_version),
+    FUSE_HELPER_OPT("--print-capabilities", print_capabilities),
+    FUSE_HELPER_OPT("-d", debug),
+    FUSE_HELPER_OPT("debug", debug),
+    FUSE_HELPER_OPT("-d", foreground),
+    FUSE_HELPER_OPT("debug", foreground),
+    FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP),
+    FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP),
+    FUSE_HELPER_OPT("-f", foreground),
+    FUSE_HELPER_OPT_VALUE("--daemonize", foreground, 0),
+    FUSE_HELPER_OPT("fsname=", nodefault_subtype),
+    FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP),
+    FUSE_HELPER_OPT("subtype=", nodefault_subtype),
+    FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP),
+    FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads),
+    FUSE_HELPER_OPT("--syslog", syslog),
+    FUSE_HELPER_OPT_VALUE("log_level=debug", log_level, FUSE_LOG_DEBUG),
+    FUSE_HELPER_OPT_VALUE("log_level=info", log_level, FUSE_LOG_INFO),
+    FUSE_HELPER_OPT_VALUE("log_level=warn", log_level, FUSE_LOG_WARNING),
+    FUSE_HELPER_OPT_VALUE("log_level=err", log_level, FUSE_LOG_ERR),
+    FUSE_OPT_END
+};
+
+struct fuse_conn_info_opts {
+    int atomic_o_trunc;
+    int no_remote_posix_lock;
+    int no_remote_flock;
+    int splice_write;
+    int splice_move;
+    int splice_read;
+    int no_splice_write;
+    int no_splice_move;
+    int no_splice_read;
+    int auto_inval_data;
+    int no_auto_inval_data;
+    int no_readdirplus;
+    int no_readdirplus_auto;
+    int async_dio;
+    int no_async_dio;
+    int writeback_cache;
+    int no_writeback_cache;
+    int async_read;
+    int sync_read;
+    unsigned max_write;
+    unsigned max_readahead;
+    unsigned max_background;
+    unsigned congestion_threshold;
+    unsigned time_gran;
+    int set_max_write;
+    int set_max_readahead;
+    int set_max_background;
+    int set_congestion_threshold;
+    int set_time_gran;
+};
+
+#define CONN_OPTION(t, p, v)                          \
+    {                                                 \
+        t, offsetof(struct fuse_conn_info_opts, p), v \
+    }
+static const struct fuse_opt conn_info_opt_spec[] = {
+    CONN_OPTION("max_write=%u", max_write, 0),
+    CONN_OPTION("max_write=", set_max_write, 1),
+    CONN_OPTION("max_readahead=%u", max_readahead, 0),
+    CONN_OPTION("max_readahead=", set_max_readahead, 1),
+    CONN_OPTION("max_background=%u", max_background, 0),
+    CONN_OPTION("max_background=", set_max_background, 1),
+    CONN_OPTION("congestion_threshold=%u", congestion_threshold, 0),
+    CONN_OPTION("congestion_threshold=", set_congestion_threshold, 1),
+    CONN_OPTION("sync_read", sync_read, 1),
+    CONN_OPTION("async_read", async_read, 1),
+    CONN_OPTION("atomic_o_trunc", atomic_o_trunc, 1),
+    CONN_OPTION("no_remote_lock", no_remote_posix_lock, 1),
+    CONN_OPTION("no_remote_lock", no_remote_flock, 1),
+    CONN_OPTION("no_remote_flock", no_remote_flock, 1),
+    CONN_OPTION("no_remote_posix_lock", no_remote_posix_lock, 1),
+    CONN_OPTION("splice_write", splice_write, 1),
+    CONN_OPTION("no_splice_write", no_splice_write, 1),
+    CONN_OPTION("splice_move", splice_move, 1),
+    CONN_OPTION("no_splice_move", no_splice_move, 1),
+    CONN_OPTION("splice_read", splice_read, 1),
+    CONN_OPTION("no_splice_read", no_splice_read, 1),
+    CONN_OPTION("auto_inval_data", auto_inval_data, 1),
+    CONN_OPTION("no_auto_inval_data", no_auto_inval_data, 1),
+    CONN_OPTION("readdirplus=no", no_readdirplus, 1),
+    CONN_OPTION("readdirplus=yes", no_readdirplus, 0),
+    CONN_OPTION("readdirplus=yes", no_readdirplus_auto, 1),
+    CONN_OPTION("readdirplus=auto", no_readdirplus, 0),
+    CONN_OPTION("readdirplus=auto", no_readdirplus_auto, 0),
+    CONN_OPTION("async_dio", async_dio, 1),
+    CONN_OPTION("no_async_dio", no_async_dio, 1),
+    CONN_OPTION("writeback_cache", writeback_cache, 1),
+    CONN_OPTION("no_writeback_cache", no_writeback_cache, 1),
+    CONN_OPTION("time_gran=%u", time_gran, 0),
+    CONN_OPTION("time_gran=", set_time_gran, 1),
+    FUSE_OPT_END
+};
+
+
+void fuse_cmdline_help(void)
+{
+    printf("    -h   --help                print help\n"
+           "    -V   --version             print version\n"
+           "    --print-capabilities       print vhost-user.json\n"
+           "    -d   -o debug              enable debug output (implies -f)\n"
+           "    --syslog                   log to syslog (default stderr)\n"
+           "    -f                         foreground operation\n"
+           "    --daemonize                run in background\n"
+           "    -o cache=<mode>            cache mode. could be one of \"auto, "
+           "always, none\"\n"
+           "                               default: auto\n"
+           "    -o flock|no_flock          enable/disable flock\n"
+           "                               default: no_flock\n"
+           "    -o log_level=<level>       log level, default to \"info\"\n"
+           "                               level could be one of \"debug, "
+           "info, warn, err\"\n"
+           "    -o max_idle_threads        the maximum number of idle worker "
+           "threads\n"
+           "                               allowed (default: 10)\n"
+           "    -o norace                  disable racy fallback\n"
+           "                               default: false\n"
+           "    -o posix_lock|no_posix_lock\n"
+           "                               enable/disable remote posix lock\n"
+           "                               default: posix_lock\n"
+           "    -o readdirplus|no_readdirplus\n"
+           "                               enable/disable readirplus\n"
+           "                               default: readdirplus except with "
+           "cache=none\n"
+           "    -o timeout=<number>        I/O timeout (second)\n"
+           "                               default: depends on cache= option.\n"
+           "    -o writeback|no_writeback  enable/disable writeback cache\n"
+           "                               default: no_writeback\n"
+           "    -o xattr|no_xattr          enable/disable xattr\n"
+           "                               default: no_xattr\n"
+           );
+}
+
+static int fuse_helper_opt_proc(void *data, const char *arg, int key,
+                                struct fuse_args *outargs)
+{
+    (void)data;
+    (void)outargs;
+
+    switch (key) {
+    case FUSE_OPT_KEY_NONOPT:
+        fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg);
+        return -1;
+
+    default:
+        /* Pass through unknown options */
+        return 1;
+    }
+}
+
+int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts)
+{
+    memset(opts, 0, sizeof(struct fuse_cmdline_opts));
+
+    opts->max_idle_threads = 10;
+    opts->foreground = 1;
+
+    if (fuse_opt_parse(args, opts, fuse_helper_opts, fuse_helper_opt_proc) ==
+        -1) {
+        return -1;
+    }
+
+    return 0;
+}
+
+
+int fuse_daemonize(int foreground)
+{
+    int ret = 0, rett;
+    if (!foreground) {
+        int nullfd;
+        int waiter[2];
+        char completed;
+
+        if (pipe(waiter)) {
+            fuse_log(FUSE_LOG_ERR, "fuse_daemonize: pipe: %s\n",
+                     strerror(errno));
+            return -1;
+        }
+
+        /*
+         * demonize current process by forking it and killing the
+         * parent.  This makes current process as a child of 'init'.
+         */
+        switch (fork()) {
+        case -1:
+            fuse_log(FUSE_LOG_ERR, "fuse_daemonize: fork: %s\n",
+                     strerror(errno));
+            return -1;
+        case 0:
+            break;
+        default:
+            _exit(read(waiter[0], &completed,
+                       sizeof(completed) != sizeof(completed)));
+        }
+
+        if (setsid() == -1) {
+            fuse_log(FUSE_LOG_ERR, "fuse_daemonize: setsid: %s\n",
+                     strerror(errno));
+            return -1;
+        }
+
+        ret = chdir("/");
+
+        nullfd = open("/dev/null", O_RDWR, 0);
+        if (nullfd != -1) {
+            rett = dup2(nullfd, 0);
+            if (!ret) {
+                ret = rett;
+            }
+            rett = dup2(nullfd, 1);
+            if (!ret) {
+                ret = rett;
+            }
+            rett = dup2(nullfd, 2);
+            if (!ret) {
+                ret = rett;
+            }
+            if (nullfd > 2) {
+                close(nullfd);
+            }
+        }
+
+        /* Propagate completion of daemon initialization */
+        completed = 1;
+        rett = write(waiter[1], &completed, sizeof(completed));
+        if (!ret) {
+            ret = rett;
+        }
+        close(waiter[0]);
+        close(waiter[1]);
+    } else {
+        ret = chdir("/");
+    }
+    return ret;
+}
+
+void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts,
+                               struct fuse_conn_info *conn)
+{
+    if (opts->set_max_write) {
+        conn->max_write = opts->max_write;
+    }
+    if (opts->set_max_background) {
+        conn->max_background = opts->max_background;
+    }
+    if (opts->set_congestion_threshold) {
+        conn->congestion_threshold = opts->congestion_threshold;
+    }
+    if (opts->set_time_gran) {
+        conn->time_gran = opts->time_gran;
+    }
+    if (opts->set_max_readahead) {
+        conn->max_readahead = opts->max_readahead;
+    }
+
+#define LL_ENABLE(cond, cap) \
+    if (cond)                \
+        conn->want |= (cap)
+#define LL_DISABLE(cond, cap) \
+    if (cond)                 \
+        conn->want &= ~(cap)
+
+    LL_ENABLE(opts->splice_read, FUSE_CAP_SPLICE_READ);
+    LL_DISABLE(opts->no_splice_read, FUSE_CAP_SPLICE_READ);
+
+    LL_ENABLE(opts->splice_write, FUSE_CAP_SPLICE_WRITE);
+    LL_DISABLE(opts->no_splice_write, FUSE_CAP_SPLICE_WRITE);
+
+    LL_ENABLE(opts->splice_move, FUSE_CAP_SPLICE_MOVE);
+    LL_DISABLE(opts->no_splice_move, FUSE_CAP_SPLICE_MOVE);
+
+    LL_ENABLE(opts->auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA);
+    LL_DISABLE(opts->no_auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA);
+
+    LL_DISABLE(opts->no_readdirplus, FUSE_CAP_READDIRPLUS);
+    LL_DISABLE(opts->no_readdirplus_auto, FUSE_CAP_READDIRPLUS_AUTO);
+
+    LL_ENABLE(opts->async_dio, FUSE_CAP_ASYNC_DIO);
+    LL_DISABLE(opts->no_async_dio, FUSE_CAP_ASYNC_DIO);
+
+    LL_ENABLE(opts->writeback_cache, FUSE_CAP_WRITEBACK_CACHE);
+    LL_DISABLE(opts->no_writeback_cache, FUSE_CAP_WRITEBACK_CACHE);
+
+    LL_ENABLE(opts->async_read, FUSE_CAP_ASYNC_READ);
+    LL_DISABLE(opts->sync_read, FUSE_CAP_ASYNC_READ);
+
+    LL_DISABLE(opts->no_remote_posix_lock, FUSE_CAP_POSIX_LOCKS);
+    LL_DISABLE(opts->no_remote_flock, FUSE_CAP_FLOCK_LOCKS);
+}
+
+struct fuse_conn_info_opts *fuse_parse_conn_info_opts(struct fuse_args *args)
+{
+    struct fuse_conn_info_opts *opts;
+
+    opts = calloc(1, sizeof(struct fuse_conn_info_opts));
+    if (opts == NULL) {
+        fuse_log(FUSE_LOG_ERR, "calloc failed\n");
+        return NULL;
+    }
+    if (fuse_opt_parse(args, opts, conn_info_opt_spec, NULL) == -1) {
+        free(opts);
+        return NULL;
+    }
+    return opts;
+}
diff --git a/tools/virtiofsd/passthrough_helpers.h b/tools/virtiofsd/passthrough_helpers.h
new file mode 100644
index 0000000000..0b98275ed5
--- /dev/null
+++ b/tools/virtiofsd/passthrough_helpers.h
@@ -0,0 +1,51 @@
+/*
+ * FUSE: Filesystem in Userspace
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE
+ */
+
+/*
+ * Creates files on the underlying file system in response to a FUSE_MKNOD
+ * operation
+ */
+static int mknod_wrapper(int dirfd, const char *path, const char *link,
+                         int mode, dev_t rdev)
+{
+    int res;
+
+    if (S_ISREG(mode)) {
+        res = openat(dirfd, path, O_CREAT | O_EXCL | O_WRONLY, mode);
+        if (res >= 0) {
+            res = close(res);
+        }
+    } else if (S_ISDIR(mode)) {
+        res = mkdirat(dirfd, path, mode);
+    } else if (S_ISLNK(mode) && link != NULL) {
+        res = symlinkat(link, dirfd, path);
+    } else if (S_ISFIFO(mode)) {
+        res = mkfifoat(dirfd, path, mode);
+    } else {
+        res = mknodat(dirfd, path, mode, rdev);
+    }
+
+    return res;
+}
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
new file mode 100644
index 0000000000..e6f2399efc
--- /dev/null
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -0,0 +1,3006 @@
+/*
+ * FUSE: Filesystem in Userspace
+ * Copyright (C) 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
+ *
+ * This program can be distributed under the terms of the GNU GPLv2.
+ * See the file COPYING.
+ */
+
+/*
+ *
+ * This file system mirrors the existing file system hierarchy of the
+ * system, starting at the root file system. This is implemented by
+ * just "passing through" all requests to the corresponding user-space
+ * libc functions. In contrast to passthrough.c and passthrough_fh.c,
+ * this implementation uses the low-level API. Its performance should
+ * be the least bad among the three, but many operations are not
+ * implemented. In particular, it is not possible to remove files (or
+ * directories) because the code necessary to defer actual removal
+ * until the file is not opened anymore would make the example much
+ * more complicated.
+ *
+ * When writeback caching is enabled (-o writeback mount option), it
+ * is only possible to write to files for which the mounting user has
+ * read permissions. This is because the writeback cache requires the
+ * kernel to be able to issue read requests for all files (which the
+ * passthrough filesystem cannot satisfy if it can't read the file in
+ * the underlying filesystem).
+ *
+ * Compile with:
+ *
+ *     gcc -Wall passthrough_ll.c `pkg-config fuse3 --cflags --libs` -o
+ * passthrough_ll
+ *
+ * ## Source code ##
+ * \include passthrough_ll.c
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/timer.h"
+#include "fuse_virtio.h"
+#include "fuse_log.h"
+#include "fuse_lowlevel.h"
+#include <assert.h>
+#include <cap-ng.h>
+#include <dirent.h>
+#include <errno.h>
+#include <glib.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/file.h>
+#include <sys/mount.h>
+#include <sys/prctl.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/xattr.h>
+#include <syslog.h>
+#include <unistd.h>
+
+#include "passthrough_helpers.h"
+#include "seccomp.h"
+
+/* Keep track of inode posix locks for each owner. */
+struct lo_inode_plock {
+    uint64_t lock_owner;
+    int fd; /* fd for OFD locks */
+};
+
+struct lo_map_elem {
+    union {
+        struct lo_inode *inode;
+        struct lo_dirp *dirp;
+        int fd;
+        ssize_t freelist;
+    };
+    bool in_use;
+};
+
+/* Maps FUSE fh or ino values to internal objects */
+struct lo_map {
+    struct lo_map_elem *elems;
+    size_t nelems;
+    ssize_t freelist;
+};
+
+struct lo_key {
+    ino_t ino;
+    dev_t dev;
+};
+
+struct lo_inode {
+    int fd;
+
+    /*
+     * Atomic reference count for this object.  The nlookup field holds a
+     * reference and release it when nlookup reaches 0.
+     */
+    gint refcount;
+
+    struct lo_key key;
+
+    /*
+     * This counter keeps the inode alive during the FUSE session.
+     * Incremented when the FUSE inode number is sent in a reply
+     * (FUSE_LOOKUP, FUSE_READDIRPLUS, etc).  Decremented when an inode is
+     * released by requests like FUSE_FORGET, FUSE_RMDIR, FUSE_RENAME, etc.
+     *
+     * Note that this value is untrusted because the client can manipulate
+     * it arbitrarily using FUSE_FORGET requests.
+     *
+     * Protected by lo->mutex.
+     */
+    uint64_t nlookup;
+
+    fuse_ino_t fuse_ino;
+    pthread_mutex_t plock_mutex;
+    GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */
+
+    bool is_symlink;
+};
+
+struct lo_cred {
+    uid_t euid;
+    gid_t egid;
+};
+
+enum {
+    CACHE_NONE,
+    CACHE_AUTO,
+    CACHE_ALWAYS,
+};
+
+struct lo_data {
+    pthread_mutex_t mutex;
+    int debug;
+    int norace;
+    int writeback;
+    int flock;
+    int posix_lock;
+    int xattr;
+    char *source;
+    double timeout;
+    int cache;
+    int timeout_set;
+    int readdirplus_set;
+    int readdirplus_clear;
+    struct lo_inode root;
+    GHashTable *inodes; /* protected by lo->mutex */
+    struct lo_map ino_map; /* protected by lo->mutex */
+    struct lo_map dirp_map; /* protected by lo->mutex */
+    struct lo_map fd_map; /* protected by lo->mutex */
+
+    /* An O_PATH file descriptor to /proc/self/fd/ */
+    int proc_self_fd;
+};
+
+static const struct fuse_opt lo_opts[] = {
+    { "writeback", offsetof(struct lo_data, writeback), 1 },
+    { "no_writeback", offsetof(struct lo_data, writeback), 0 },
+    { "source=%s", offsetof(struct lo_data, source), 0 },
+    { "flock", offsetof(struct lo_data, flock), 1 },
+    { "no_flock", offsetof(struct lo_data, flock), 0 },
+    { "posix_lock", offsetof(struct lo_data, posix_lock), 1 },
+    { "no_posix_lock", offsetof(struct lo_data, posix_lock), 0 },
+    { "xattr", offsetof(struct lo_data, xattr), 1 },
+    { "no_xattr", offsetof(struct lo_data, xattr), 0 },
+    { "timeout=%lf", offsetof(struct lo_data, timeout), 0 },
+    { "timeout=", offsetof(struct lo_data, timeout_set), 1 },
+    { "cache=none", offsetof(struct lo_data, cache), CACHE_NONE },
+    { "cache=auto", offsetof(struct lo_data, cache), CACHE_AUTO },
+    { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS },
+    { "norace", offsetof(struct lo_data, norace), 1 },
+    { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 },
+    { "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 },
+    FUSE_OPT_END
+};
+static bool use_syslog = false;
+static int current_log_level;
+static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode,
+                                 uint64_t n);
+
+static struct {
+    pthread_mutex_t mutex;
+    void *saved;
+} cap;
+/* That we loaded cap-ng in the current thread from the saved */
+static __thread bool cap_loaded = 0;
+
+static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st);
+
+static int is_dot_or_dotdot(const char *name)
+{
+    return name[0] == '.' &&
+           (name[1] == '\0' || (name[1] == '.' && name[2] == '\0'));
+}
+
+/* Is `path` a single path component that is not "." or ".."? */
+static int is_safe_path_component(const char *path)
+{
+    if (strchr(path, '/')) {
+        return 0;
+    }
+
+    return !is_dot_or_dotdot(path);
+}
+
+static struct lo_data *lo_data(fuse_req_t req)
+{
+    return (struct lo_data *)fuse_req_userdata(req);
+}
+
+/*
+ * Load capng's state from our saved state if the current thread
+ * hadn't previously been loaded.
+ * returns 0 on success
+ */
+static int load_capng(void)
+{
+    if (!cap_loaded) {
+        pthread_mutex_lock(&cap.mutex);
+        capng_restore_state(&cap.saved);
+        /*
+         * restore_state free's the saved copy
+         * so make another.
+         */
+        cap.saved = capng_save_state();
+        if (!cap.saved) {
+            fuse_log(FUSE_LOG_ERR, "capng_save_state (thread)\n");
+            return -EINVAL;
+        }
+        pthread_mutex_unlock(&cap.mutex);
+
+        /*
+         * We want to use the loaded state for our pid,
+         * not the original
+         */
+        capng_setpid(syscall(SYS_gettid));
+        cap_loaded = true;
+    }
+    return 0;
+}
+
+/*
+ * Helpers for dropping and regaining effective capabilities. Returns 0
+ * on success, error otherwise
+ */
+static int drop_effective_cap(const char *cap_name, bool *cap_dropped)
+{
+    int cap, ret;
+
+    cap = capng_name_to_capability(cap_name);
+    if (cap < 0) {
+        ret = errno;
+        fuse_log(FUSE_LOG_ERR, "capng_name_to_capability(%s) failed:%s\n",
+                 cap_name, strerror(errno));
+        goto out;
+    }
+
+    if (load_capng()) {
+        ret = errno;
+        fuse_log(FUSE_LOG_ERR, "load_capng() failed\n");
+        goto out;
+    }
+
+    /* We dont have this capability in effective set already. */
+    if (!capng_have_capability(CAPNG_EFFECTIVE, cap)) {
+        ret = 0;
+        goto out;
+    }
+
+    if (capng_update(CAPNG_DROP, CAPNG_EFFECTIVE, cap)) {
+        ret = errno;
+        fuse_log(FUSE_LOG_ERR, "capng_update(DROP,) failed\n");
+        goto out;
+    }
+
+    if (capng_apply(CAPNG_SELECT_CAPS)) {
+        ret = errno;
+        fuse_log(FUSE_LOG_ERR, "drop:capng_apply() failed\n");
+        goto out;
+    }
+
+    ret = 0;
+    if (cap_dropped) {
+        *cap_dropped = true;
+    }
+
+out:
+    return ret;
+}
+
+static int gain_effective_cap(const char *cap_name)
+{
+    int cap;
+    int ret = 0;
+
+    cap = capng_name_to_capability(cap_name);
+    if (cap < 0) {
+        ret = errno;
+        fuse_log(FUSE_LOG_ERR, "capng_name_to_capability(%s) failed:%s\n",
+                 cap_name, strerror(errno));
+        goto out;
+    }
+
+    if (load_capng()) {
+        ret = errno;
+        fuse_log(FUSE_LOG_ERR, "load_capng() failed\n");
+        goto out;
+    }
+
+    if (capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, cap)) {
+        ret = errno;
+        fuse_log(FUSE_LOG_ERR, "capng_update(ADD,) failed\n");
+        goto out;
+    }
+
+    if (capng_apply(CAPNG_SELECT_CAPS)) {
+        ret = errno;
+        fuse_log(FUSE_LOG_ERR, "gain:capng_apply() failed\n");
+        goto out;
+    }
+    ret = 0;
+
+out:
+    return ret;
+}
+
+static void lo_map_init(struct lo_map *map)
+{
+    map->elems = NULL;
+    map->nelems = 0;
+    map->freelist = -1;
+}
+
+static void lo_map_destroy(struct lo_map *map)
+{
+    free(map->elems);
+}
+
+static int lo_map_grow(struct lo_map *map, size_t new_nelems)
+{
+    struct lo_map_elem *new_elems;
+    size_t i;
+
+    if (new_nelems <= map->nelems) {
+        return 1;
+    }
+
+    new_elems = realloc(map->elems, sizeof(map->elems[0]) * new_nelems);
+    if (!new_elems) {
+        return 0;
+    }
+
+    for (i = map->nelems; i < new_nelems; i++) {
+        new_elems[i].freelist = i + 1;
+        new_elems[i].in_use = false;
+    }
+    new_elems[new_nelems - 1].freelist = -1;
+
+    map->elems = new_elems;
+    map->freelist = map->nelems;
+    map->nelems = new_nelems;
+    return 1;
+}
+
+static struct lo_map_elem *lo_map_alloc_elem(struct lo_map *map)
+{
+    struct lo_map_elem *elem;
+
+    if (map->freelist == -1 && !lo_map_grow(map, map->nelems + 256)) {
+        return NULL;
+    }
+
+    elem = &map->elems[map->freelist];
+    map->freelist = elem->freelist;
+
+    elem->in_use = true;
+
+    return elem;
+}
+
+static struct lo_map_elem *lo_map_reserve(struct lo_map *map, size_t key)
+{
+    ssize_t *prev;
+
+    if (!lo_map_grow(map, key + 1)) {
+        return NULL;
+    }
+
+    for (prev = &map->freelist; *prev != -1;
+         prev = &map->elems[*prev].freelist) {
+        if (*prev == key) {
+            struct lo_map_elem *elem = &map->elems[key];
+
+            *prev = elem->freelist;
+            elem->in_use = true;
+            return elem;
+        }
+    }
+    return NULL;
+}
+
+static struct lo_map_elem *lo_map_get(struct lo_map *map, size_t key)
+{
+    if (key >= map->nelems) {
+        return NULL;
+    }
+    if (!map->elems[key].in_use) {
+        return NULL;
+    }
+    return &map->elems[key];
+}
+
+static void lo_map_remove(struct lo_map *map, size_t key)
+{
+    struct lo_map_elem *elem;
+
+    if (key >= map->nelems) {
+        return;
+    }
+
+    elem = &map->elems[key];
+    if (!elem->in_use) {
+        return;
+    }
+
+    elem->in_use = false;
+
+    elem->freelist = map->freelist;
+    map->freelist = key;
+}
+
+/* Assumes lo->mutex is held */
+static ssize_t lo_add_fd_mapping(fuse_req_t req, int fd)
+{
+    struct lo_map_elem *elem;
+
+    elem = lo_map_alloc_elem(&lo_data(req)->fd_map);
+    if (!elem) {
+        return -1;
+    }
+
+    elem->fd = fd;
+    return elem - lo_data(req)->fd_map.elems;
+}
+
+/* Assumes lo->mutex is held */
+static ssize_t lo_add_dirp_mapping(fuse_req_t req, struct lo_dirp *dirp)
+{
+    struct lo_map_elem *elem;
+
+    elem = lo_map_alloc_elem(&lo_data(req)->dirp_map);
+    if (!elem) {
+        return -1;
+    }
+
+    elem->dirp = dirp;
+    return elem - lo_data(req)->dirp_map.elems;
+}
+
+/* Assumes lo->mutex is held */
+static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode)
+{
+    struct lo_map_elem *elem;
+
+    elem = lo_map_alloc_elem(&lo_data(req)->ino_map);
+    if (!elem) {
+        return -1;
+    }
+
+    elem->inode = inode;
+    return elem - lo_data(req)->ino_map.elems;
+}
+
+static void lo_inode_put(struct lo_data *lo, struct lo_inode **inodep)
+{
+    struct lo_inode *inode = *inodep;
+
+    if (!inode) {
+        return;
+    }
+
+    *inodep = NULL;
+
+    if (g_atomic_int_dec_and_test(&inode->refcount)) {
+        close(inode->fd);
+        free(inode);
+    }
+}
+
+/* Caller must release refcount using lo_inode_put() */
+static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
+{
+    struct lo_data *lo = lo_data(req);
+    struct lo_map_elem *elem;
+
+    pthread_mutex_lock(&lo->mutex);
+    elem = lo_map_get(&lo->ino_map, ino);
+    if (elem) {
+        g_atomic_int_inc(&elem->inode->refcount);
+    }
+    pthread_mutex_unlock(&lo->mutex);
+
+    if (!elem) {
+        return NULL;
+    }
+
+    return elem->inode;
+}
+
+/*
+ * TODO Remove this helper and force callers to hold an inode refcount until
+ * they are done with the fd.  This will be done in a later patch to make
+ * review easier.
+ */
+static int lo_fd(fuse_req_t req, fuse_ino_t ino)
+{
+    struct lo_inode *inode = lo_inode(req, ino);
+    int fd;
+
+    if (!inode) {
+        return -1;
+    }
+
+    fd = inode->fd;
+    lo_inode_put(lo_data(req), &inode);
+    return fd;
+}
+
+static void lo_init(void *userdata, struct fuse_conn_info *conn)
+{
+    struct lo_data *lo = (struct lo_data *)userdata;
+
+    if (conn->capable & FUSE_CAP_EXPORT_SUPPORT) {
+        conn->want |= FUSE_CAP_EXPORT_SUPPORT;
+    }
+
+    if (lo->writeback && conn->capable & FUSE_CAP_WRITEBACK_CACHE) {
+        fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n");
+        conn->want |= FUSE_CAP_WRITEBACK_CACHE;
+    }
+    if (conn->capable & FUSE_CAP_FLOCK_LOCKS) {
+        if (lo->flock) {
+            fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n");
+            conn->want |= FUSE_CAP_FLOCK_LOCKS;
+        } else {
+            fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling flock locks\n");
+            conn->want &= ~FUSE_CAP_FLOCK_LOCKS;
+        }
+    }
+
+    if (conn->capable & FUSE_CAP_POSIX_LOCKS) {
+        if (lo->posix_lock) {
+            fuse_log(FUSE_LOG_DEBUG, "lo_init: activating posix locks\n");
+            conn->want |= FUSE_CAP_POSIX_LOCKS;
+        } else {
+            fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling posix locks\n");
+            conn->want &= ~FUSE_CAP_POSIX_LOCKS;
+        }
+    }
+
+    if ((lo->cache == CACHE_NONE && !lo->readdirplus_set) ||
+        lo->readdirplus_clear) {
+        fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n");
+        conn->want &= ~FUSE_CAP_READDIRPLUS;
+    }
+}
+
+static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
+                       struct fuse_file_info *fi)
+{
+    int res;
+    struct stat buf;
+    struct lo_data *lo = lo_data(req);
+
+    (void)fi;
+
+    res =
+        fstatat(lo_fd(req, ino), "", &buf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
+    if (res == -1) {
+        return (void)fuse_reply_err(req, errno);
+    }
+
+    fuse_reply_attr(req, &buf, lo->timeout);
+}
+
+/*
+ * Increments parent->nlookup and caller must release refcount using
+ * lo_inode_put(&parent).
+ */
+static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode,
+                              char path[PATH_MAX], struct lo_inode **parent)
+{
+    char procname[64];
+    char *last;
+    struct stat stat;
+    struct lo_inode *p;
+    int retries = 2;
+    int res;
+
+retry:
+    sprintf(procname, "%i", inode->fd);
+
+    res = readlinkat(lo->proc_self_fd, procname, path, PATH_MAX);
+    if (res < 0) {
+        fuse_log(FUSE_LOG_WARNING, "%s: readlink failed: %m\n", __func__);
+        goto fail_noretry;
+    }
+
+    if (res >= PATH_MAX) {
+        fuse_log(FUSE_LOG_WARNING, "%s: readlink overflowed\n", __func__);
+        goto fail_noretry;
+    }
+    path[res] = '\0';
+
+    last = strrchr(path, '/');
+    if (last == NULL) {
+        /* Shouldn't happen */
+        fuse_log(
+            FUSE_LOG_WARNING,
+            "%s: INTERNAL ERROR: bad path read from proc\n", __func__);
+        goto fail_noretry;
+    }
+    if (last == path) {
+        p = &lo->root;
+        pthread_mutex_lock(&lo->mutex);
+        p->nlookup++;
+        g_atomic_int_inc(&p->refcount);
+        pthread_mutex_unlock(&lo->mutex);
+    } else {
+        *last = '\0';
+        res = fstatat(AT_FDCWD, last == path ? "/" : path, &stat, 0);
+        if (res == -1) {
+            if (!retries) {
+                fuse_log(FUSE_LOG_WARNING,
+                         "%s: failed to stat parent: %m\n", __func__);
+            }
+            goto fail;
+        }
+        p = lo_find(lo, &stat);
+        if (p == NULL) {
+            if (!retries) {
+                fuse_log(FUSE_LOG_WARNING,
+                         "%s: failed to find parent\n", __func__);
+            }
+            goto fail;
+        }
+    }
+    last++;
+    res = fstatat(p->fd, last, &stat, AT_SYMLINK_NOFOLLOW);
+    if (res == -1) {
+        if (!retries) {
+            fuse_log(FUSE_LOG_WARNING,
+                     "%s: failed to stat last\n", __func__);
+        }
+        goto fail_unref;
+    }
+    if (stat.st_dev != inode->key.dev || stat.st_ino != inode->key.ino) {
+        if (!retries) {
+            fuse_log(FUSE_LOG_WARNING,
+                     "%s: failed to match last\n", __func__);
+        }
+        goto fail_unref;
+    }
+    *parent = p;
+    memmove(path, last, strlen(last) + 1);
+
+    return 0;
+
+fail_unref:
+    unref_inode_lolocked(lo, p, 1);
+    lo_inode_put(lo, &p);
+fail:
+    if (retries) {
+        retries--;
+        goto retry;
+    }
+fail_noretry:
+    errno = EIO;
+    return -1;
+}
+
+static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode,
+                           const struct timespec *tv)
+{
+    int res;
+    struct lo_inode *parent;
+    char path[PATH_MAX];
+
+    if (inode->is_symlink) {
+        res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH);
+        if (res == -1 && errno == EINVAL) {
+            /* Sorry, no race free way to set times on symlink. */
+            if (lo->norace) {
+                errno = EPERM;
+            } else {
+                goto fallback;
+            }
+        }
+        return res;
+    }
+    sprintf(path, "%i", inode->fd);
+
+    return utimensat(lo->proc_self_fd, path, tv, 0);
+
+fallback:
+    res = lo_parent_and_name(lo, inode, path, &parent);
+    if (res != -1) {
+        res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW);
+        unref_inode_lolocked(lo, parent, 1);
+        lo_inode_put(lo, &parent);
+    }
+
+    return res;
+}
+
+static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi)
+{
+    struct lo_data *lo = lo_data(req);
+    struct lo_map_elem *elem;
+
+    pthread_mutex_lock(&lo->mutex);
+    elem = lo_map_get(&lo->fd_map, fi->fh);
+    pthread_mutex_unlock(&lo->mutex);
+
+    if (!elem) {
+        return -1;
+    }
+
+    return elem->fd;
+}
+
+static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
+                       int valid, struct fuse_file_info *fi)
+{
+    int saverr;
+    char procname[64];
+    struct lo_data *lo = lo_data(req);
+    struct lo_inode *inode;
+    int ifd;
+    int res;
+    int fd;
+
+    inode = lo_inode(req, ino);
+    if (!inode) {
+        fuse_reply_err(req, EBADF);
+        return;
+    }
+
+    ifd = inode->fd;
+
+    /* If fi->fh is invalid we'll report EBADF later */
+    if (fi) {
+        fd = lo_fi_fd(req, fi);
+    }
+
+    if (valid & FUSE_SET_ATTR_MODE) {
+        if (fi) {
+            res = fchmod(fd, attr->st_mode);
+        } else {
+            sprintf(procname, "%i", ifd);
+            res = fchmodat(lo->proc_self_fd, procname, attr->st_mode, 0);
+        }
+        if (res == -1) {
+            goto out_err;
+        }
+    }
+    if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) {
+        uid_t uid = (valid & FUSE_SET_ATTR_UID) ? attr->st_uid : (uid_t)-1;
+        gid_t gid = (valid & FUSE_SET_ATTR_GID) ? attr->st_gid : (gid_t)-1;
+
+        res = fchownat(ifd, "", uid, gid, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
+        if (res == -1) {
+            goto out_err;
+        }
+    }
+    if (valid & FUSE_SET_ATTR_SIZE) {
+        int truncfd;
+
+        if (fi) {
+            truncfd = fd;
+        } else {
+            sprintf(procname, "%i", ifd);
+            truncfd = openat(lo->proc_self_fd, procname, O_RDWR);
+            if (truncfd < 0) {
+                goto out_err;
+            }
+        }
+
+        res = ftruncate(truncfd, attr->st_size);
+        if (!fi) {
+            saverr = errno;
+            close(truncfd);
+            errno = saverr;
+        }
+        if (res == -1) {
+            goto out_err;
+        }
+    }
+    if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) {
+        struct timespec tv[2];
+
+        tv[0].tv_sec = 0;
+        tv[1].tv_sec = 0;
+        tv[0].tv_nsec = UTIME_OMIT;
+        tv[1].tv_nsec = UTIME_OMIT;
+
+        if (valid & FUSE_SET_ATTR_ATIME_NOW) {
+            tv[0].tv_nsec = UTIME_NOW;
+        } else if (valid & FUSE_SET_ATTR_ATIME) {
+            tv[0] = attr->st_atim;
+        }
+
+        if (valid & FUSE_SET_ATTR_MTIME_NOW) {
+            tv[1].tv_nsec = UTIME_NOW;
+        } else if (valid & FUSE_SET_ATTR_MTIME) {
+            tv[1] = attr->st_mtim;
+        }
+
+        if (fi) {
+            res = futimens(fd, tv);
+        } else {
+            res = utimensat_empty(lo, inode, tv);
+        }
+        if (res == -1) {
+            goto out_err;
+        }
+    }
+    lo_inode_put(lo, &inode);
+
+    return lo_getattr(req, ino, fi);
+
+out_err:
+    saverr = errno;
+    lo_inode_put(lo, &inode);
+    fuse_reply_err(req, saverr);
+}
+
+static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st)
+{
+    struct lo_inode *p;
+    struct lo_key key = {
+        .ino = st->st_ino,
+        .dev = st->st_dev,
+    };
+
+    pthread_mutex_lock(&lo->mutex);
+    p = g_hash_table_lookup(lo->inodes, &key);
+    if (p) {
+        assert(p->nlookup > 0);
+        p->nlookup++;
+        g_atomic_int_inc(&p->refcount);
+    }
+    pthread_mutex_unlock(&lo->mutex);
+
+    return p;
+}
+
+/* value_destroy_func for posix_locks GHashTable */
+static void posix_locks_value_destroy(gpointer data)
+{
+    struct lo_inode_plock *plock = data;
+
+    /*
+     * We had used open() for locks and had only one fd. So
+     * closing this fd should release all OFD locks.
+     */
+    close(plock->fd);
+    free(plock);
+}
+
+/*
+ * Increments nlookup and caller must release refcount using
+ * lo_inode_put(&parent).
+ */
+static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
+                        struct fuse_entry_param *e)
+{
+    int newfd;
+    int res;
+    int saverr;
+    struct lo_data *lo = lo_data(req);
+    struct lo_inode *inode = NULL;
+    struct lo_inode *dir = lo_inode(req, parent);
+
+    /*
+     * name_to_handle_at() and open_by_handle_at() can reach here with fuse
+     * mount point in guest, but we don't have its inode info in the
+     * ino_map.
+     */
+    if (!dir) {
+        return ENOENT;
+    }
+
+    memset(e, 0, sizeof(*e));
+    e->attr_timeout = lo->timeout;
+    e->entry_timeout = lo->timeout;
+
+    /* Do not allow escaping root directory */
+    if (dir == &lo->root && strcmp(name, "..") == 0) {
+        name = ".";
+    }
+
+    newfd = openat(dir->fd, name, O_PATH | O_NOFOLLOW);
+    if (newfd == -1) {
+        goto out_err;
+    }
+
+    res = fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
+    if (res == -1) {
+        goto out_err;
+    }
+
+    inode = lo_find(lo, &e->attr);
+    if (inode) {
+        close(newfd);
+        newfd = -1;
+    } else {
+        inode = calloc(1, sizeof(struct lo_inode));
+        if (!inode) {
+            goto out_err;
+        }
+
+        inode->is_symlink = S_ISLNK(e->attr.st_mode);
+
+        /*
+         * One for the caller and one for nlookup (released in
+         * unref_inode_lolocked())
+         */
+        g_atomic_int_set(&inode->refcount, 2);
+
+        inode->nlookup = 1;
+        inode->fd = newfd;
+        newfd = -1;
+        inode->key.ino = e->attr.st_ino;
+        inode->key.dev = e->attr.st_dev;
+        pthread_mutex_init(&inode->plock_mutex, NULL);
+        inode->posix_locks = g_hash_table_new_full(
+            g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy);
+
+        pthread_mutex_lock(&lo->mutex);
+        inode->fuse_ino = lo_add_inode_mapping(req, inode);
+        g_hash_table_insert(lo->inodes, &inode->key, inode);
+        pthread_mutex_unlock(&lo->mutex);
+    }
+    e->ino = inode->fuse_ino;
+    lo_inode_put(lo, &inode);
+    lo_inode_put(lo, &dir);
+
+    fuse_log(FUSE_LOG_DEBUG, "  %lli/%s -> %lli\n", (unsigned long long)parent,
+             name, (unsigned long long)e->ino);
+
+    return 0;
+
+out_err:
+    saverr = errno;
+    if (newfd != -1) {
+        close(newfd);
+    }
+    lo_inode_put(lo, &inode);
+    lo_inode_put(lo, &dir);
+    return saverr;
+}
+
+static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name)
+{
+    struct fuse_entry_param e;
+    int err;
+
+    fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", parent,
+             name);
+
+    /*
+     * Don't use is_safe_path_component(), allow "." and ".." for NFS export
+     * support.
+     */
+    if (strchr(name, '/')) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    err = lo_do_lookup(req, parent, name, &e);
+    if (err) {
+        fuse_reply_err(req, err);
+    } else {
+        fuse_reply_entry(req, &e);
+    }
+}
+
+/*
+ * On some archs, setres*id is limited to 2^16 but they
+ * provide setres*id32 variants that allow 2^32.
+ * Others just let setres*id do 2^32 anyway.
+ */
+#ifdef SYS_setresgid32
+#define OURSYS_setresgid SYS_setresgid32
+#else
+#define OURSYS_setresgid SYS_setresgid
+#endif
+
+#ifdef SYS_setresuid32
+#define OURSYS_setresuid SYS_setresuid32
+#else
+#define OURSYS_setresuid SYS_setresuid
+#endif
+
+/*
+ * Change to uid/gid of caller so that file is created with
+ * ownership of caller.
+ * TODO: What about selinux context?
+ */
+static int lo_change_cred(fuse_req_t req, struct lo_cred *old)
+{
+    int res;
+
+    old->euid = geteuid();
+    old->egid = getegid();
+
+    res = syscall(OURSYS_setresgid, -1, fuse_req_ctx(req)->gid, -1);
+    if (res == -1) {
+        return errno;
+    }
+
+    res = syscall(OURSYS_setresuid, -1, fuse_req_ctx(req)->uid, -1);
+    if (res == -1) {
+        int errno_save = errno;
+
+        syscall(OURSYS_setresgid, -1, old->egid, -1);
+        return errno_save;
+    }
+
+    return 0;
+}
+
+/* Regain Privileges */
+static void lo_restore_cred(struct lo_cred *old)
+{
+    int res;
+
+    res = syscall(OURSYS_setresuid, -1, old->euid, -1);
+    if (res == -1) {
+        fuse_log(FUSE_LOG_ERR, "seteuid(%u): %m\n", old->euid);
+        exit(1);
+    }
+
+    res = syscall(OURSYS_setresgid, -1, old->egid, -1);
+    if (res == -1) {
+        fuse_log(FUSE_LOG_ERR, "setegid(%u): %m\n", old->egid);
+        exit(1);
+    }
+}
+
+static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent,
+                             const char *name, mode_t mode, dev_t rdev,
+                             const char *link)
+{
+    int res;
+    int saverr;
+    struct lo_data *lo = lo_data(req);
+    struct lo_inode *dir;
+    struct fuse_entry_param e;
+    struct lo_cred old = {};
+
+    if (!is_safe_path_component(name)) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    dir = lo_inode(req, parent);
+    if (!dir) {
+        fuse_reply_err(req, EBADF);
+        return;
+    }
+
+    saverr = ENOMEM;
+
+    saverr = lo_change_cred(req, &old);
+    if (saverr) {
+        goto out;
+    }
+
+    res = mknod_wrapper(dir->fd, name, link, mode, rdev);
+
+    saverr = errno;
+
+    lo_restore_cred(&old);
+
+    if (res == -1) {
+        goto out;
+    }
+
+    saverr = lo_do_lookup(req, parent, name, &e);
+    if (saverr) {
+        goto out;
+    }
+
+    fuse_log(FUSE_LOG_DEBUG, "  %lli/%s -> %lli\n", (unsigned long long)parent,
+             name, (unsigned long long)e.ino);
+
+    fuse_reply_entry(req, &e);
+    lo_inode_put(lo, &dir);
+    return;
+
+out:
+    lo_inode_put(lo, &dir);
+    fuse_reply_err(req, saverr);
+}
+
+static void lo_mknod(fuse_req_t req, fuse_ino_t parent, const char *name,
+                     mode_t mode, dev_t rdev)
+{
+    lo_mknod_symlink(req, parent, name, mode, rdev, NULL);
+}
+
+static void lo_mkdir(fuse_req_t req, fuse_ino_t parent, const char *name,
+                     mode_t mode)
+{
+    lo_mknod_symlink(req, parent, name, S_IFDIR | mode, 0, NULL);
+}
+
+static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent,
+                       const char *name)
+{
+    lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link);
+}
+
+static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode,
+                                 int dfd, const char *name)
+{
+    int res;
+    struct lo_inode *parent;
+    char path[PATH_MAX];
+
+    if (inode->is_symlink) {
+        res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH);
+        if (res == -1 && (errno == ENOENT || errno == EINVAL)) {
+            /* Sorry, no race free way to hard-link a symlink. */
+            if (lo->norace) {
+                errno = EPERM;
+            } else {
+                goto fallback;
+            }
+        }
+        return res;
+    }
+
+    sprintf(path, "%i", inode->fd);
+
+    return linkat(lo->proc_self_fd, path, dfd, name, AT_SYMLINK_FOLLOW);
+
+fallback:
+    res = lo_parent_and_name(lo, inode, path, &parent);
+    if (res != -1) {
+        res = linkat(parent->fd, path, dfd, name, 0);
+        unref_inode_lolocked(lo, parent, 1);
+        lo_inode_put(lo, &parent);
+    }
+
+    return res;
+}
+
+static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
+                    const char *name)
+{
+    int res;
+    struct lo_data *lo = lo_data(req);
+    struct lo_inode *parent_inode;
+    struct lo_inode *inode;
+    struct fuse_entry_param e;
+    int saverr;
+
+    if (!is_safe_path_component(name)) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    parent_inode = lo_inode(req, parent);
+    inode = lo_inode(req, ino);
+    if (!parent_inode || !inode) {
+        errno = EBADF;
+        goto out_err;
+    }
+
+    memset(&e, 0, sizeof(struct fuse_entry_param));
+    e.attr_timeout = lo->timeout;
+    e.entry_timeout = lo->timeout;
+
+    res = linkat_empty_nofollow(lo, inode, parent_inode->fd, name);
+    if (res == -1) {
+        goto out_err;
+    }
+
+    res = fstatat(inode->fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
+    if (res == -1) {
+        goto out_err;
+    }
+
+    pthread_mutex_lock(&lo->mutex);
+    inode->nlookup++;
+    pthread_mutex_unlock(&lo->mutex);
+    e.ino = inode->fuse_ino;
+
+    fuse_log(FUSE_LOG_DEBUG, "  %lli/%s -> %lli\n", (unsigned long long)parent,
+             name, (unsigned long long)e.ino);
+
+    fuse_reply_entry(req, &e);
+    lo_inode_put(lo, &parent_inode);
+    lo_inode_put(lo, &inode);
+    return;
+
+out_err:
+    saverr = errno;
+    lo_inode_put(lo, &parent_inode);
+    lo_inode_put(lo, &inode);
+    fuse_reply_err(req, saverr);
+}
+
+/* Increments nlookup and caller must release refcount using lo_inode_put() */
+static struct lo_inode *lookup_name(fuse_req_t req, fuse_ino_t parent,
+                                    const char *name)
+{
+    int res;
+    struct stat attr;
+
+    res = fstatat(lo_fd(req, parent), name, &attr,
+                  AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
+    if (res == -1) {
+        return NULL;
+    }
+
+    return lo_find(lo_data(req), &attr);
+}
+
+static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name)
+{
+    int res;
+    struct lo_inode *inode;
+    struct lo_data *lo = lo_data(req);
+
+    if (!is_safe_path_component(name)) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    inode = lookup_name(req, parent, name);
+    if (!inode) {
+        fuse_reply_err(req, EIO);
+        return;
+    }
+
+    res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR);
+
+    fuse_reply_err(req, res == -1 ? errno : 0);
+    unref_inode_lolocked(lo, inode, 1);
+    lo_inode_put(lo, &inode);
+}
+
+static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
+                      fuse_ino_t newparent, const char *newname,
+                      unsigned int flags)
+{
+    int res;
+    struct lo_inode *parent_inode;
+    struct lo_inode *newparent_inode;
+    struct lo_inode *oldinode = NULL;
+    struct lo_inode *newinode = NULL;
+    struct lo_data *lo = lo_data(req);
+
+    if (!is_safe_path_component(name) || !is_safe_path_component(newname)) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    parent_inode = lo_inode(req, parent);
+    newparent_inode = lo_inode(req, newparent);
+    if (!parent_inode || !newparent_inode) {
+        fuse_reply_err(req, EBADF);
+        goto out;
+    }
+
+    oldinode = lookup_name(req, parent, name);
+    newinode = lookup_name(req, newparent, newname);
+
+    if (!oldinode) {
+        fuse_reply_err(req, EIO);
+        goto out;
+    }
+
+    if (flags) {
+#ifndef SYS_renameat2
+        fuse_reply_err(req, EINVAL);
+#else
+        res = syscall(SYS_renameat2, parent_inode->fd, name,
+                        newparent_inode->fd, newname, flags);
+        if (res == -1 && errno == ENOSYS) {
+            fuse_reply_err(req, EINVAL);
+        } else {
+            fuse_reply_err(req, res == -1 ? errno : 0);
+        }
+#endif
+        goto out;
+    }
+
+    res = renameat(parent_inode->fd, name, newparent_inode->fd, newname);
+
+    fuse_reply_err(req, res == -1 ? errno : 0);
+out:
+    unref_inode_lolocked(lo, oldinode, 1);
+    unref_inode_lolocked(lo, newinode, 1);
+    lo_inode_put(lo, &oldinode);
+    lo_inode_put(lo, &newinode);
+    lo_inode_put(lo, &parent_inode);
+    lo_inode_put(lo, &newparent_inode);
+}
+
+static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name)
+{
+    int res;
+    struct lo_inode *inode;
+    struct lo_data *lo = lo_data(req);
+
+    if (!is_safe_path_component(name)) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    inode = lookup_name(req, parent, name);
+    if (!inode) {
+        fuse_reply_err(req, EIO);
+        return;
+    }
+
+    res = unlinkat(lo_fd(req, parent), name, 0);
+
+    fuse_reply_err(req, res == -1 ? errno : 0);
+    unref_inode_lolocked(lo, inode, 1);
+    lo_inode_put(lo, &inode);
+}
+
+/* To be called with lo->mutex held */
+static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n)
+{
+    if (!inode) {
+        return;
+    }
+
+    assert(inode->nlookup >= n);
+    inode->nlookup -= n;
+    if (!inode->nlookup) {
+        lo_map_remove(&lo->ino_map, inode->fuse_ino);
+        g_hash_table_remove(lo->inodes, &inode->key);
+        if (g_hash_table_size(inode->posix_locks)) {
+            fuse_log(FUSE_LOG_WARNING, "Hash table is not empty\n");
+        }
+        g_hash_table_destroy(inode->posix_locks);
+        pthread_mutex_destroy(&inode->plock_mutex);
+
+        /* Drop our refcount from lo_do_lookup() */
+        lo_inode_put(lo, &inode);
+    }
+}
+
+static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode,
+                                 uint64_t n)
+{
+    if (!inode) {
+        return;
+    }
+
+    pthread_mutex_lock(&lo->mutex);
+    unref_inode(lo, inode, n);
+    pthread_mutex_unlock(&lo->mutex);
+}
+
+static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup)
+{
+    struct lo_data *lo = lo_data(req);
+    struct lo_inode *inode;
+
+    inode = lo_inode(req, ino);
+    if (!inode) {
+        return;
+    }
+
+    fuse_log(FUSE_LOG_DEBUG, "  forget %lli %lli -%lli\n",
+             (unsigned long long)ino, (unsigned long long)inode->nlookup,
+             (unsigned long long)nlookup);
+
+    unref_inode_lolocked(lo, inode, nlookup);
+    lo_inode_put(lo, &inode);
+}
+
+static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup)
+{
+    lo_forget_one(req, ino, nlookup);
+    fuse_reply_none(req);
+}
+
+static void lo_forget_multi(fuse_req_t req, size_t count,
+                            struct fuse_forget_data *forgets)
+{
+    int i;
+
+    for (i = 0; i < count; i++) {
+        lo_forget_one(req, forgets[i].ino, forgets[i].nlookup);
+    }
+    fuse_reply_none(req);
+}
+
+static void lo_readlink(fuse_req_t req, fuse_ino_t ino)
+{
+    char buf[PATH_MAX + 1];
+    int res;
+
+    res = readlinkat(lo_fd(req, ino), "", buf, sizeof(buf));
+    if (res == -1) {
+        return (void)fuse_reply_err(req, errno);
+    }
+
+    if (res == sizeof(buf)) {
+        return (void)fuse_reply_err(req, ENAMETOOLONG);
+    }
+
+    buf[res] = '\0';
+
+    fuse_reply_readlink(req, buf);
+}
+
+struct lo_dirp {
+    gint refcount;
+    DIR *dp;
+    struct dirent *entry;
+    off_t offset;
+};
+
+static void lo_dirp_put(struct lo_dirp **dp)
+{
+    struct lo_dirp *d = *dp;
+
+    if (!d) {
+        return;
+    }
+    *dp = NULL;
+
+    if (g_atomic_int_dec_and_test(&d->refcount)) {
+        closedir(d->dp);
+        free(d);
+    }
+}
+
+/* Call lo_dirp_put() on the return value when no longer needed */
+static struct lo_dirp *lo_dirp(fuse_req_t req, struct fuse_file_info *fi)
+{
+    struct lo_data *lo = lo_data(req);
+    struct lo_map_elem *elem;
+
+    pthread_mutex_lock(&lo->mutex);
+    elem = lo_map_get(&lo->dirp_map, fi->fh);
+    if (elem) {
+        g_atomic_int_inc(&elem->dirp->refcount);
+    }
+    pthread_mutex_unlock(&lo->mutex);
+    if (!elem) {
+        return NULL;
+    }
+
+    return elem->dirp;
+}
+
+static void lo_opendir(fuse_req_t req, fuse_ino_t ino,
+                       struct fuse_file_info *fi)
+{
+    int error = ENOMEM;
+    struct lo_data *lo = lo_data(req);
+    struct lo_dirp *d;
+    int fd;
+    ssize_t fh;
+
+    d = calloc(1, sizeof(struct lo_dirp));
+    if (d == NULL) {
+        goto out_err;
+    }
+
+    fd = openat(lo_fd(req, ino), ".", O_RDONLY);
+    if (fd == -1) {
+        goto out_errno;
+    }
+
+    d->dp = fdopendir(fd);
+    if (d->dp == NULL) {
+        goto out_errno;
+    }
+
+    d->offset = 0;
+    d->entry = NULL;
+
+    g_atomic_int_set(&d->refcount, 1); /* paired with lo_releasedir() */
+    pthread_mutex_lock(&lo->mutex);
+    fh = lo_add_dirp_mapping(req, d);
+    pthread_mutex_unlock(&lo->mutex);
+    if (fh == -1) {
+        goto out_err;
+    }
+
+    fi->fh = fh;
+    if (lo->cache == CACHE_ALWAYS) {
+        fi->cache_readdir = 1;
+    }
+    fuse_reply_open(req, fi);
+    return;
+
+out_errno:
+    error = errno;
+out_err:
+    if (d) {
+        if (d->dp) {
+            closedir(d->dp);
+        }
+        if (fd != -1) {
+            close(fd);
+        }
+        free(d);
+    }
+    fuse_reply_err(req, error);
+}
+
+static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
+                          off_t offset, struct fuse_file_info *fi, int plus)
+{
+    struct lo_data *lo = lo_data(req);
+    struct lo_dirp *d = NULL;
+    struct lo_inode *dinode;
+    char *buf = NULL;
+    char *p;
+    size_t rem = size;
+    int err = EBADF;
+
+    dinode = lo_inode(req, ino);
+    if (!dinode) {
+        goto error;
+    }
+
+    d = lo_dirp(req, fi);
+    if (!d) {
+        goto error;
+    }
+
+    err = ENOMEM;
+    buf = calloc(1, size);
+    if (!buf) {
+        goto error;
+    }
+    p = buf;
+
+    if (offset != d->offset) {
+        seekdir(d->dp, offset);
+        d->entry = NULL;
+        d->offset = offset;
+    }
+    while (1) {
+        size_t entsize;
+        off_t nextoff;
+        const char *name;
+
+        if (!d->entry) {
+            errno = 0;
+            d->entry = readdir(d->dp);
+            if (!d->entry) {
+                if (errno) { /* Error */
+                    err = errno;
+                    goto error;
+                } else { /* End of stream */
+                    break;
+                }
+            }
+        }
+        nextoff = d->entry->d_off;
+        name = d->entry->d_name;
+
+        fuse_ino_t entry_ino = 0;
+        struct fuse_entry_param e = (struct fuse_entry_param){
+            .attr.st_ino = d->entry->d_ino,
+            .attr.st_mode = d->entry->d_type << 12,
+        };
+
+        /* Hide root's parent directory */
+        if (dinode == &lo->root && strcmp(name, "..") == 0) {
+            e.attr.st_ino = lo->root.key.ino;
+            e.attr.st_mode = DT_DIR << 12;
+        }
+
+        if (plus) {
+            if (!is_dot_or_dotdot(name)) {
+                err = lo_do_lookup(req, ino, name, &e);
+                if (err) {
+                    goto error;
+                }
+                entry_ino = e.ino;
+            }
+
+            entsize = fuse_add_direntry_plus(req, p, rem, name, &e, nextoff);
+        } else {
+            entsize = fuse_add_direntry(req, p, rem, name, &e.attr, nextoff);
+        }
+        if (entsize > rem) {
+            if (entry_ino != 0) {
+                lo_forget_one(req, entry_ino, 1);
+            }
+            break;
+        }
+
+        p += entsize;
+        rem -= entsize;
+
+        d->entry = NULL;
+        d->offset = nextoff;
+    }
+
+    err = 0;
+error:
+    lo_dirp_put(&d);
+    lo_inode_put(lo, &dinode);
+
+    /*
+     * If there's an error, we can only signal it if we haven't stored
+     * any entries yet - otherwise we'd end up with wrong lookup
+     * counts for the entries that are already in the buffer. So we
+     * return what we've collected until that point.
+     */
+    if (err && rem == size) {
+        fuse_reply_err(req, err);
+    } else {
+        fuse_reply_buf(req, buf, size - rem);
+    }
+    free(buf);
+}
+
+static void lo_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
+                       off_t offset, struct fuse_file_info *fi)
+{
+    lo_do_readdir(req, ino, size, offset, fi, 0);
+}
+
+static void lo_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size,
+                           off_t offset, struct fuse_file_info *fi)
+{
+    lo_do_readdir(req, ino, size, offset, fi, 1);
+}
+
+static void lo_releasedir(fuse_req_t req, fuse_ino_t ino,
+                          struct fuse_file_info *fi)
+{
+    struct lo_data *lo = lo_data(req);
+    struct lo_map_elem *elem;
+    struct lo_dirp *d;
+
+    (void)ino;
+
+    pthread_mutex_lock(&lo->mutex);
+    elem = lo_map_get(&lo->dirp_map, fi->fh);
+    if (!elem) {
+        pthread_mutex_unlock(&lo->mutex);
+        fuse_reply_err(req, EBADF);
+        return;
+    }
+
+    d = elem->dirp;
+    lo_map_remove(&lo->dirp_map, fi->fh);
+    pthread_mutex_unlock(&lo->mutex);
+
+    lo_dirp_put(&d); /* paired with lo_opendir() */
+
+    fuse_reply_err(req, 0);
+}
+
+static void update_open_flags(int writeback, struct fuse_file_info *fi)
+{
+    /*
+     * With writeback cache, kernel may send read requests even
+     * when userspace opened write-only
+     */
+    if (writeback && (fi->flags & O_ACCMODE) == O_WRONLY) {
+        fi->flags &= ~O_ACCMODE;
+        fi->flags |= O_RDWR;
+    }
+
+    /*
+     * With writeback cache, O_APPEND is handled by the kernel.
+     * This breaks atomicity (since the file may change in the
+     * underlying filesystem, so that the kernel's idea of the
+     * end of the file isn't accurate anymore). In this example,
+     * we just accept that. A more rigorous filesystem may want
+     * to return an error here
+     */
+    if (writeback && (fi->flags & O_APPEND)) {
+        fi->flags &= ~O_APPEND;
+    }
+
+    /*
+     * O_DIRECT in guest should not necessarily mean bypassing page
+     * cache on host as well. If somebody needs that behavior, it
+     * probably should be a configuration knob in daemon.
+     */
+    fi->flags &= ~O_DIRECT;
+}
+
+static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
+                      mode_t mode, struct fuse_file_info *fi)
+{
+    int fd;
+    struct lo_data *lo = lo_data(req);
+    struct lo_inode *parent_inode;
+    struct fuse_entry_param e;
+    int err;
+    struct lo_cred old = {};
+
+    fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", parent,
+             name);
+
+    if (!is_safe_path_component(name)) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    parent_inode = lo_inode(req, parent);
+    if (!parent_inode) {
+        fuse_reply_err(req, EBADF);
+        return;
+    }
+
+    err = lo_change_cred(req, &old);
+    if (err) {
+        goto out;
+    }
+
+    update_open_flags(lo->writeback, fi);
+
+    fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW,
+                mode);
+    err = fd == -1 ? errno : 0;
+    lo_restore_cred(&old);
+
+    if (!err) {
+        ssize_t fh;
+
+        pthread_mutex_lock(&lo->mutex);
+        fh = lo_add_fd_mapping(req, fd);
+        pthread_mutex_unlock(&lo->mutex);
+        if (fh == -1) {
+            close(fd);
+            err = ENOMEM;
+            goto out;
+        }
+
+        fi->fh = fh;
+        err = lo_do_lookup(req, parent, name, &e);
+    }
+    if (lo->cache == CACHE_NONE) {
+        fi->direct_io = 1;
+    } else if (lo->cache == CACHE_ALWAYS) {
+        fi->keep_cache = 1;
+    }
+
+out:
+    lo_inode_put(lo, &parent_inode);
+
+    if (err) {
+        fuse_reply_err(req, err);
+    } else {
+        fuse_reply_create(req, &e, fi);
+    }
+}
+
+/* Should be called with inode->plock_mutex held */
+static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo,
+                                                      struct lo_inode *inode,
+                                                      uint64_t lock_owner,
+                                                      pid_t pid, int *err)
+{
+    struct lo_inode_plock *plock;
+    char procname[64];
+    int fd;
+
+    plock =
+        g_hash_table_lookup(inode->posix_locks, GUINT_TO_POINTER(lock_owner));
+
+    if (plock) {
+        return plock;
+    }
+
+    plock = malloc(sizeof(struct lo_inode_plock));
+    if (!plock) {
+        *err = ENOMEM;
+        return NULL;
+    }
+
+    /* Open another instance of file which can be used for ofd locks. */
+    sprintf(procname, "%i", inode->fd);
+
+    /* TODO: What if file is not writable? */
+    fd = openat(lo->proc_self_fd, procname, O_RDWR);
+    if (fd == -1) {
+        *err = errno;
+        free(plock);
+        return NULL;
+    }
+
+    plock->lock_owner = lock_owner;
+    plock->fd = fd;
+    g_hash_table_insert(inode->posix_locks, GUINT_TO_POINTER(plock->lock_owner),
+                        plock);
+    return plock;
+}
+
+static void lo_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
+                     struct flock *lock)
+{
+    struct lo_data *lo = lo_data(req);
+    struct lo_inode *inode;
+    struct lo_inode_plock *plock;
+    int ret, saverr = 0;
+
+    fuse_log(FUSE_LOG_DEBUG,
+             "lo_getlk(ino=%" PRIu64 ", flags=%d)"
+             " owner=0x%lx, l_type=%d l_start=0x%lx"
+             " l_len=0x%lx\n",
+             ino, fi->flags, fi->lock_owner, lock->l_type, lock->l_start,
+             lock->l_len);
+
+    inode = lo_inode(req, ino);
+    if (!inode) {
+        fuse_reply_err(req, EBADF);
+        return;
+    }
+
+    pthread_mutex_lock(&inode->plock_mutex);
+    plock =
+        lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret);
+    if (!plock) {
+        saverr = ret;
+        goto out;
+    }
+
+    ret = fcntl(plock->fd, F_OFD_GETLK, lock);
+    if (ret == -1) {
+        saverr = errno;
+    }
+
+out:
+    pthread_mutex_unlock(&inode->plock_mutex);
+    lo_inode_put(lo, &inode);
+
+    if (saverr) {
+        fuse_reply_err(req, saverr);
+    } else {
+        fuse_reply_lock(req, lock);
+    }
+}
+
+static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
+                     struct flock *lock, int sleep)
+{
+    struct lo_data *lo = lo_data(req);
+    struct lo_inode *inode;
+    struct lo_inode_plock *plock;
+    int ret, saverr = 0;
+
+    fuse_log(FUSE_LOG_DEBUG,
+             "lo_setlk(ino=%" PRIu64 ", flags=%d)"
+             " cmd=%d pid=%d owner=0x%lx sleep=%d l_whence=%d"
+             " l_start=0x%lx l_len=0x%lx\n",
+             ino, fi->flags, lock->l_type, lock->l_pid, fi->lock_owner, sleep,
+             lock->l_whence, lock->l_start, lock->l_len);
+
+    if (sleep) {
+        fuse_reply_err(req, EOPNOTSUPP);
+        return;
+    }
+
+    inode = lo_inode(req, ino);
+    if (!inode) {
+        fuse_reply_err(req, EBADF);
+        return;
+    }
+
+    pthread_mutex_lock(&inode->plock_mutex);
+    plock =
+        lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret);
+
+    if (!plock) {
+        saverr = ret;
+        goto out;
+    }
+
+    /* TODO: Is it alright to modify flock? */
+    lock->l_pid = 0;
+    ret = fcntl(plock->fd, F_OFD_SETLK, lock);
+    if (ret == -1) {
+        saverr = errno;
+    }
+
+out:
+    pthread_mutex_unlock(&inode->plock_mutex);
+    lo_inode_put(lo, &inode);
+
+    fuse_reply_err(req, saverr);
+}
+
+static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync,
+                        struct fuse_file_info *fi)
+{
+    int res;
+    struct lo_dirp *d;
+    int fd;
+
+    (void)ino;
+
+    d = lo_dirp(req, fi);
+    if (!d) {
+        fuse_reply_err(req, EBADF);
+        return;
+    }
+
+    fd = dirfd(d->dp);
+    if (datasync) {
+        res = fdatasync(fd);
+    } else {
+        res = fsync(fd);
+    }
+
+    lo_dirp_put(&d);
+
+    fuse_reply_err(req, res == -1 ? errno : 0);
+}
+
+static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
+{
+    int fd;
+    ssize_t fh;
+    char buf[64];
+    struct lo_data *lo = lo_data(req);
+
+    fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino,
+             fi->flags);
+
+    update_open_flags(lo->writeback, fi);
+
+    sprintf(buf, "%i", lo_fd(req, ino));
+    fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW);
+    if (fd == -1) {
+        return (void)fuse_reply_err(req, errno);
+    }
+
+    pthread_mutex_lock(&lo->mutex);
+    fh = lo_add_fd_mapping(req, fd);
+    pthread_mutex_unlock(&lo->mutex);
+    if (fh == -1) {
+        close(fd);
+        fuse_reply_err(req, ENOMEM);
+        return;
+    }
+
+    fi->fh = fh;
+    if (lo->cache == CACHE_NONE) {
+        fi->direct_io = 1;
+    } else if (lo->cache == CACHE_ALWAYS) {
+        fi->keep_cache = 1;
+    }
+    fuse_reply_open(req, fi);
+}
+
+static void lo_release(fuse_req_t req, fuse_ino_t ino,
+                       struct fuse_file_info *fi)
+{
+    struct lo_data *lo = lo_data(req);
+    struct lo_map_elem *elem;
+    int fd = -1;
+
+    (void)ino;
+
+    pthread_mutex_lock(&lo->mutex);
+    elem = lo_map_get(&lo->fd_map, fi->fh);
+    if (elem) {
+        fd = elem->fd;
+        elem = NULL;
+        lo_map_remove(&lo->fd_map, fi->fh);
+    }
+    pthread_mutex_unlock(&lo->mutex);
+
+    close(fd);
+    fuse_reply_err(req, 0);
+}
+
+static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
+{
+    int res;
+    (void)ino;
+    struct lo_inode *inode;
+
+    inode = lo_inode(req, ino);
+    if (!inode) {
+        fuse_reply_err(req, EBADF);
+        return;
+    }
+
+    /* An fd is going away. Cleanup associated posix locks */
+    pthread_mutex_lock(&inode->plock_mutex);
+    g_hash_table_remove(inode->posix_locks, GUINT_TO_POINTER(fi->lock_owner));
+    pthread_mutex_unlock(&inode->plock_mutex);
+
+    res = close(dup(lo_fi_fd(req, fi)));
+    lo_inode_put(lo_data(req), &inode);
+    fuse_reply_err(req, res == -1 ? errno : 0);
+}
+
+static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync,
+                     struct fuse_file_info *fi)
+{
+    int res;
+    int fd;
+    char *buf;
+
+    fuse_log(FUSE_LOG_DEBUG, "lo_fsync(ino=%" PRIu64 ", fi=0x%p)\n", ino,
+             (void *)fi);
+
+    if (!fi) {
+        struct lo_data *lo = lo_data(req);
+
+        res = asprintf(&buf, "%i", lo_fd(req, ino));
+        if (res == -1) {
+            return (void)fuse_reply_err(req, errno);
+        }
+
+        fd = openat(lo->proc_self_fd, buf, O_RDWR);
+        free(buf);
+        if (fd == -1) {
+            return (void)fuse_reply_err(req, errno);
+        }
+    } else {
+        fd = lo_fi_fd(req, fi);
+    }
+
+    if (datasync) {
+        res = fdatasync(fd);
+    } else {
+        res = fsync(fd);
+    }
+    if (!fi) {
+        close(fd);
+    }
+    fuse_reply_err(req, res == -1 ? errno : 0);
+}
+
+static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset,
+                    struct fuse_file_info *fi)
+{
+    struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size);
+
+    fuse_log(FUSE_LOG_DEBUG,
+             "lo_read(ino=%" PRIu64 ", size=%zd, "
+             "off=%lu)\n",
+             ino, size, (unsigned long)offset);
+
+    buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK;
+    buf.buf[0].fd = lo_fi_fd(req, fi);
+    buf.buf[0].pos = offset;
+
+    fuse_reply_data(req, &buf);
+}
+
+static void lo_write_buf(fuse_req_t req, fuse_ino_t ino,
+                         struct fuse_bufvec *in_buf, off_t off,
+                         struct fuse_file_info *fi)
+{
+    (void)ino;
+    ssize_t res;
+    struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf));
+    bool cap_fsetid_dropped = false;
+
+    out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK;
+    out_buf.buf[0].fd = lo_fi_fd(req, fi);
+    out_buf.buf[0].pos = off;
+
+    fuse_log(FUSE_LOG_DEBUG,
+             "lo_write_buf(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ino,
+             out_buf.buf[0].size, (unsigned long)off);
+
+    /*
+     * If kill_priv is set, drop CAP_FSETID which should lead to kernel
+     * clearing setuid/setgid on file.
+     */
+    if (fi->kill_priv) {
+        res = drop_effective_cap("FSETID", &cap_fsetid_dropped);
+        if (res != 0) {
+            fuse_reply_err(req, res);
+            return;
+        }
+    }
+
+    res = fuse_buf_copy(&out_buf, in_buf);
+    if (res < 0) {
+        fuse_reply_err(req, -res);
+    } else {
+        fuse_reply_write(req, (size_t)res);
+    }
+
+    if (cap_fsetid_dropped) {
+        res = gain_effective_cap("FSETID");
+        if (res) {
+            fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_FSETID\n");
+        }
+    }
+}
+
+static void lo_statfs(fuse_req_t req, fuse_ino_t ino)
+{
+    int res;
+    struct statvfs stbuf;
+
+    res = fstatvfs(lo_fd(req, ino), &stbuf);
+    if (res == -1) {
+        fuse_reply_err(req, errno);
+    } else {
+        fuse_reply_statfs(req, &stbuf);
+    }
+}
+
+static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset,
+                         off_t length, struct fuse_file_info *fi)
+{
+    int err = EOPNOTSUPP;
+    (void)ino;
+
+#ifdef CONFIG_FALLOCATE
+    err = fallocate(lo_fi_fd(req, fi), mode, offset, length);
+    if (err < 0) {
+        err = errno;
+    }
+
+#elif defined(CONFIG_POSIX_FALLOCATE)
+    if (mode) {
+        fuse_reply_err(req, EOPNOTSUPP);
+        return;
+    }
+
+    err = posix_fallocate(lo_fi_fd(req, fi), offset, length);
+#endif
+
+    fuse_reply_err(req, err);
+}
+
+static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
+                     int op)
+{
+    int res;
+    (void)ino;
+
+    res = flock(lo_fi_fd(req, fi), op);
+
+    fuse_reply_err(req, res == -1 ? errno : 0);
+}
+
+static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name,
+                        size_t size)
+{
+    struct lo_data *lo = lo_data(req);
+    char *value = NULL;
+    char procname[64];
+    struct lo_inode *inode;
+    ssize_t ret;
+    int saverr;
+    int fd = -1;
+
+    inode = lo_inode(req, ino);
+    if (!inode) {
+        fuse_reply_err(req, EBADF);
+        return;
+    }
+
+    saverr = ENOSYS;
+    if (!lo_data(req)->xattr) {
+        goto out;
+    }
+
+    fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n",
+             ino, name, size);
+
+    if (inode->is_symlink) {
+        /* Sorry, no race free way to getxattr on symlink. */
+        saverr = EPERM;
+        goto out;
+    }
+
+    sprintf(procname, "%i", inode->fd);
+    fd = openat(lo->proc_self_fd, procname, O_RDONLY);
+    if (fd < 0) {
+        goto out_err;
+    }
+
+    if (size) {
+        value = malloc(size);
+        if (!value) {
+            goto out_err;
+        }
+
+        ret = fgetxattr(fd, name, value, size);
+        if (ret == -1) {
+            goto out_err;
+        }
+        saverr = 0;
+        if (ret == 0) {
+            goto out;
+        }
+
+        fuse_reply_buf(req, value, ret);
+    } else {
+        ret = fgetxattr(fd, name, NULL, 0);
+        if (ret == -1) {
+            goto out_err;
+        }
+
+        fuse_reply_xattr(req, ret);
+    }
+out_free:
+    free(value);
+
+    if (fd >= 0) {
+        close(fd);
+    }
+
+    lo_inode_put(lo, &inode);
+    return;
+
+out_err:
+    saverr = errno;
+out:
+    lo_inode_put(lo, &inode);
+    fuse_reply_err(req, saverr);
+    goto out_free;
+}
+
+static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size)
+{
+    struct lo_data *lo = lo_data(req);
+    char *value = NULL;
+    char procname[64];
+    struct lo_inode *inode;
+    ssize_t ret;
+    int saverr;
+    int fd = -1;
+
+    inode = lo_inode(req, ino);
+    if (!inode) {
+        fuse_reply_err(req, EBADF);
+        return;
+    }
+
+    saverr = ENOSYS;
+    if (!lo_data(req)->xattr) {
+        goto out;
+    }
+
+    fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", ino,
+             size);
+
+    if (inode->is_symlink) {
+        /* Sorry, no race free way to listxattr on symlink. */
+        saverr = EPERM;
+        goto out;
+    }
+
+    sprintf(procname, "%i", inode->fd);
+    fd = openat(lo->proc_self_fd, procname, O_RDONLY);
+    if (fd < 0) {
+        goto out_err;
+    }
+
+    if (size) {
+        value = malloc(size);
+        if (!value) {
+            goto out_err;
+        }
+
+        ret = flistxattr(fd, value, size);
+        if (ret == -1) {
+            goto out_err;
+        }
+        saverr = 0;
+        if (ret == 0) {
+            goto out;
+        }
+
+        fuse_reply_buf(req, value, ret);
+    } else {
+        ret = flistxattr(fd, NULL, 0);
+        if (ret == -1) {
+            goto out_err;
+        }
+
+        fuse_reply_xattr(req, ret);
+    }
+out_free:
+    free(value);
+
+    if (fd >= 0) {
+        close(fd);
+    }
+
+    lo_inode_put(lo, &inode);
+    return;
+
+out_err:
+    saverr = errno;
+out:
+    lo_inode_put(lo, &inode);
+    fuse_reply_err(req, saverr);
+    goto out_free;
+}
+
+static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name,
+                        const char *value, size_t size, int flags)
+{
+    char procname[64];
+    struct lo_data *lo = lo_data(req);
+    struct lo_inode *inode;
+    ssize_t ret;
+    int saverr;
+    int fd = -1;
+
+    inode = lo_inode(req, ino);
+    if (!inode) {
+        fuse_reply_err(req, EBADF);
+        return;
+    }
+
+    saverr = ENOSYS;
+    if (!lo_data(req)->xattr) {
+        goto out;
+    }
+
+    fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64
+             ", name=%s value=%s size=%zd)\n", ino, name, value, size);
+
+    if (inode->is_symlink) {
+        /* Sorry, no race free way to setxattr on symlink. */
+        saverr = EPERM;
+        goto out;
+    }
+
+    sprintf(procname, "%i", inode->fd);
+    fd = openat(lo->proc_self_fd, procname, O_RDWR);
+    if (fd < 0) {
+        saverr = errno;
+        goto out;
+    }
+
+    ret = fsetxattr(fd, name, value, size, flags);
+    saverr = ret == -1 ? errno : 0;
+
+out:
+    if (fd >= 0) {
+        close(fd);
+    }
+
+    lo_inode_put(lo, &inode);
+    fuse_reply_err(req, saverr);
+}
+
+static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name)
+{
+    char procname[64];
+    struct lo_data *lo = lo_data(req);
+    struct lo_inode *inode;
+    ssize_t ret;
+    int saverr;
+    int fd = -1;
+
+    inode = lo_inode(req, ino);
+    if (!inode) {
+        fuse_reply_err(req, EBADF);
+        return;
+    }
+
+    saverr = ENOSYS;
+    if (!lo_data(req)->xattr) {
+        goto out;
+    }
+
+    fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", ino,
+             name);
+
+    if (inode->is_symlink) {
+        /* Sorry, no race free way to setxattr on symlink. */
+        saverr = EPERM;
+        goto out;
+    }
+
+    sprintf(procname, "%i", inode->fd);
+    fd = openat(lo->proc_self_fd, procname, O_RDWR);
+    if (fd < 0) {
+        saverr = errno;
+        goto out;
+    }
+
+    ret = fremovexattr(fd, name);
+    saverr = ret == -1 ? errno : 0;
+
+out:
+    if (fd >= 0) {
+        close(fd);
+    }
+
+    lo_inode_put(lo, &inode);
+    fuse_reply_err(req, saverr);
+}
+
+#ifdef HAVE_COPY_FILE_RANGE
+static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in,
+                               struct fuse_file_info *fi_in, fuse_ino_t ino_out,
+                               off_t off_out, struct fuse_file_info *fi_out,
+                               size_t len, int flags)
+{
+    int in_fd, out_fd;
+    ssize_t res;
+
+    in_fd = lo_fi_fd(req, fi_in);
+    out_fd = lo_fi_fd(req, fi_out);
+
+    fuse_log(FUSE_LOG_DEBUG,
+             "lo_copy_file_range(ino=%" PRIu64 "/fd=%d, "
+             "off=%lu, ino=%" PRIu64 "/fd=%d, "
+             "off=%lu, size=%zd, flags=0x%x)\n",
+             ino_in, in_fd, off_in, ino_out, out_fd, off_out, len, flags);
+
+    res = copy_file_range(in_fd, &off_in, out_fd, &off_out, len, flags);
+    if (res < 0) {
+        fuse_reply_err(req, errno);
+    } else {
+        fuse_reply_write(req, res);
+    }
+}
+#endif
+
+static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence,
+                     struct fuse_file_info *fi)
+{
+    off_t res;
+
+    (void)ino;
+    res = lseek(lo_fi_fd(req, fi), off, whence);
+    if (res != -1) {
+        fuse_reply_lseek(req, res);
+    } else {
+        fuse_reply_err(req, errno);
+    }
+}
+
+static void lo_destroy(void *userdata)
+{
+    struct lo_data *lo = (struct lo_data *)userdata;
+
+    pthread_mutex_lock(&lo->mutex);
+    while (true) {
+        GHashTableIter iter;
+        gpointer key, value;
+
+        g_hash_table_iter_init(&iter, lo->inodes);
+        if (!g_hash_table_iter_next(&iter, &key, &value)) {
+            break;
+        }
+
+        struct lo_inode *inode = value;
+        unref_inode(lo, inode, inode->nlookup);
+    }
+    pthread_mutex_unlock(&lo->mutex);
+}
+
+static struct fuse_lowlevel_ops lo_oper = {
+    .init = lo_init,
+    .lookup = lo_lookup,
+    .mkdir = lo_mkdir,
+    .mknod = lo_mknod,
+    .symlink = lo_symlink,
+    .link = lo_link,
+    .unlink = lo_unlink,
+    .rmdir = lo_rmdir,
+    .rename = lo_rename,
+    .forget = lo_forget,
+    .forget_multi = lo_forget_multi,
+    .getattr = lo_getattr,
+    .setattr = lo_setattr,
+    .readlink = lo_readlink,
+    .opendir = lo_opendir,
+    .readdir = lo_readdir,
+    .readdirplus = lo_readdirplus,
+    .releasedir = lo_releasedir,
+    .fsyncdir = lo_fsyncdir,
+    .create = lo_create,
+    .getlk = lo_getlk,
+    .setlk = lo_setlk,
+    .open = lo_open,
+    .release = lo_release,
+    .flush = lo_flush,
+    .fsync = lo_fsync,
+    .read = lo_read,
+    .write_buf = lo_write_buf,
+    .statfs = lo_statfs,
+    .fallocate = lo_fallocate,
+    .flock = lo_flock,
+    .getxattr = lo_getxattr,
+    .listxattr = lo_listxattr,
+    .setxattr = lo_setxattr,
+    .removexattr = lo_removexattr,
+#ifdef HAVE_COPY_FILE_RANGE
+    .copy_file_range = lo_copy_file_range,
+#endif
+    .lseek = lo_lseek,
+    .destroy = lo_destroy,
+};
+
+/* Print vhost-user.json backend program capabilities */
+static void print_capabilities(void)
+{
+    printf("{\n");
+    printf("  \"type\": \"fs\"\n");
+    printf("}\n");
+}
+
+/*
+ * Move to a new mount, net, and pid namespaces to isolate this process.
+ */
+static void setup_namespaces(struct lo_data *lo, struct fuse_session *se)
+{
+    pid_t child;
+
+    /*
+     * Create a new pid namespace for *child* processes.  We'll have to
+     * fork in order to enter the new pid namespace.  A new mount namespace
+     * is also needed so that we can remount /proc for the new pid
+     * namespace.
+     *
+     * Our UNIX domain sockets have been created.  Now we can move to
+     * an empty network namespace to prevent TCP/IP and other network
+     * activity in case this process is compromised.
+     */
+    if (unshare(CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWNET) != 0) {
+        fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWPID | CLONE_NEWNS): %m\n");
+        exit(1);
+    }
+
+    child = fork();
+    if (child < 0) {
+        fuse_log(FUSE_LOG_ERR, "fork() failed: %m\n");
+        exit(1);
+    }
+    if (child > 0) {
+        pid_t waited;
+        int wstatus;
+
+        /* The parent waits for the child */
+        do {
+            waited = waitpid(child, &wstatus, 0);
+        } while (waited < 0 && errno == EINTR && !se->exited);
+
+        /* We were terminated by a signal, see fuse_signals.c */
+        if (se->exited) {
+            exit(0);
+        }
+
+        if (WIFEXITED(wstatus)) {
+            exit(WEXITSTATUS(wstatus));
+        }
+
+        exit(1);
+    }
+
+    /* Send us SIGTERM when the parent thread terminates, see prctl(2) */
+    prctl(PR_SET_PDEATHSIG, SIGTERM);
+
+    /*
+     * If the mounts have shared propagation then we want to opt out so our
+     * mount changes don't affect the parent mount namespace.
+     */
+    if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) {
+        fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_SLAVE): %m\n");
+        exit(1);
+    }
+
+    /* The child must remount /proc to use the new pid namespace */
+    if (mount("proc", "/proc", "proc",
+              MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RELATIME, NULL) < 0) {
+        fuse_log(FUSE_LOG_ERR, "mount(/proc): %m\n");
+        exit(1);
+    }
+
+    /* Now we can get our /proc/self/fd directory file descriptor */
+    lo->proc_self_fd = open("/proc/self/fd", O_PATH);
+    if (lo->proc_self_fd == -1) {
+        fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n");
+        exit(1);
+    }
+}
+
+/*
+ * Capture the capability state, we'll need to restore this for individual
+ * threads later; see load_capng.
+ */
+static void setup_capng(void)
+{
+    /* Note this accesses /proc so has to happen before the sandbox */
+    if (capng_get_caps_process()) {
+        fuse_log(FUSE_LOG_ERR, "capng_get_caps_process\n");
+        exit(1);
+    }
+    pthread_mutex_init(&cap.mutex, NULL);
+    pthread_mutex_lock(&cap.mutex);
+    cap.saved = capng_save_state();
+    if (!cap.saved) {
+        fuse_log(FUSE_LOG_ERR, "capng_save_state\n");
+        exit(1);
+    }
+    pthread_mutex_unlock(&cap.mutex);
+}
+
+static void cleanup_capng(void)
+{
+    free(cap.saved);
+    cap.saved = NULL;
+    pthread_mutex_destroy(&cap.mutex);
+}
+
+
+/*
+ * Make the source directory our root so symlinks cannot escape and no other
+ * files are accessible.  Assumes unshare(CLONE_NEWNS) was already called.
+ */
+static void setup_mounts(const char *source)
+{
+    int oldroot;
+    int newroot;
+
+    if (mount(source, source, NULL, MS_BIND, NULL) < 0) {
+        fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source);
+        exit(1);
+    }
+
+    /* This magic is based on lxc's lxc_pivot_root() */
+    oldroot = open("/", O_DIRECTORY | O_RDONLY | O_CLOEXEC);
+    if (oldroot < 0) {
+        fuse_log(FUSE_LOG_ERR, "open(/): %m\n");
+        exit(1);
+    }
+
+    newroot = open(source, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
+    if (newroot < 0) {
+        fuse_log(FUSE_LOG_ERR, "open(%s): %m\n", source);
+        exit(1);
+    }
+
+    if (fchdir(newroot) < 0) {
+        fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n");
+        exit(1);
+    }
+
+    if (syscall(__NR_pivot_root, ".", ".") < 0) {
+        fuse_log(FUSE_LOG_ERR, "pivot_root(., .): %m\n");
+        exit(1);
+    }
+
+    if (fchdir(oldroot) < 0) {
+        fuse_log(FUSE_LOG_ERR, "fchdir(oldroot): %m\n");
+        exit(1);
+    }
+
+    if (mount("", ".", "", MS_SLAVE | MS_REC, NULL) < 0) {
+        fuse_log(FUSE_LOG_ERR, "mount(., MS_SLAVE | MS_REC): %m\n");
+        exit(1);
+    }
+
+    if (umount2(".", MNT_DETACH) < 0) {
+        fuse_log(FUSE_LOG_ERR, "umount2(., MNT_DETACH): %m\n");
+        exit(1);
+    }
+
+    if (fchdir(newroot) < 0) {
+        fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n");
+        exit(1);
+    }
+
+    close(newroot);
+    close(oldroot);
+}
+
+/*
+ * Lock down this process to prevent access to other processes or files outside
+ * source directory.  This reduces the impact of arbitrary code execution bugs.
+ */
+static void setup_sandbox(struct lo_data *lo, struct fuse_session *se,
+                          bool enable_syslog)
+{
+    setup_namespaces(lo, se);
+    setup_mounts(lo->source);
+    setup_seccomp(enable_syslog);
+}
+
+/* Raise the maximum number of open file descriptors */
+static void setup_nofile_rlimit(void)
+{
+    const rlim_t max_fds = 1000000;
+    struct rlimit rlim;
+
+    if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) {
+        fuse_log(FUSE_LOG_ERR, "getrlimit(RLIMIT_NOFILE): %m\n");
+        exit(1);
+    }
+
+    if (rlim.rlim_cur >= max_fds) {
+        return; /* nothing to do */
+    }
+
+    rlim.rlim_cur = max_fds;
+    rlim.rlim_max = max_fds;
+
+    if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) {
+        /* Ignore SELinux denials */
+        if (errno == EPERM) {
+            return;
+        }
+
+        fuse_log(FUSE_LOG_ERR, "setrlimit(RLIMIT_NOFILE): %m\n");
+        exit(1);
+    }
+}
+
+static void log_func(enum fuse_log_level level, const char *fmt, va_list ap)
+{
+    g_autofree char *localfmt = NULL;
+
+    if (current_log_level < level) {
+        return;
+    }
+
+    if (current_log_level == FUSE_LOG_DEBUG) {
+        if (!use_syslog) {
+            localfmt = g_strdup_printf("[%" PRId64 "] [ID: %08ld] %s",
+                                       get_clock(), syscall(__NR_gettid), fmt);
+        } else {
+            localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid),
+                                       fmt);
+        }
+        fmt = localfmt;
+    }
+
+    if (use_syslog) {
+        int priority = LOG_ERR;
+        switch (level) {
+        case FUSE_LOG_EMERG:
+            priority = LOG_EMERG;
+            break;
+        case FUSE_LOG_ALERT:
+            priority = LOG_ALERT;
+            break;
+        case FUSE_LOG_CRIT:
+            priority = LOG_CRIT;
+            break;
+        case FUSE_LOG_ERR:
+            priority = LOG_ERR;
+            break;
+        case FUSE_LOG_WARNING:
+            priority = LOG_WARNING;
+            break;
+        case FUSE_LOG_NOTICE:
+            priority = LOG_NOTICE;
+            break;
+        case FUSE_LOG_INFO:
+            priority = LOG_INFO;
+            break;
+        case FUSE_LOG_DEBUG:
+            priority = LOG_DEBUG;
+            break;
+        }
+        vsyslog(priority, fmt, ap);
+    } else {
+        vfprintf(stderr, fmt, ap);
+    }
+}
+
+static void setup_root(struct lo_data *lo, struct lo_inode *root)
+{
+    int fd, res;
+    struct stat stat;
+
+    fd = open("/", O_PATH);
+    if (fd == -1) {
+        fuse_log(FUSE_LOG_ERR, "open(%s, O_PATH): %m\n", lo->source);
+        exit(1);
+    }
+
+    res = fstatat(fd, "", &stat, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
+    if (res == -1) {
+        fuse_log(FUSE_LOG_ERR, "fstatat(%s): %m\n", lo->source);
+        exit(1);
+    }
+
+    root->is_symlink = false;
+    root->fd = fd;
+    root->key.ino = stat.st_ino;
+    root->key.dev = stat.st_dev;
+    root->nlookup = 2;
+    g_atomic_int_set(&root->refcount, 2);
+}
+
+static guint lo_key_hash(gconstpointer key)
+{
+    const struct lo_key *lkey = key;
+
+    return (guint)lkey->ino + (guint)lkey->dev;
+}
+
+static gboolean lo_key_equal(gconstpointer a, gconstpointer b)
+{
+    const struct lo_key *la = a;
+    const struct lo_key *lb = b;
+
+    return la->ino == lb->ino && la->dev == lb->dev;
+}
+
+static void fuse_lo_data_cleanup(struct lo_data *lo)
+{
+    if (lo->inodes) {
+        g_hash_table_destroy(lo->inodes);
+    }
+    lo_map_destroy(&lo->fd_map);
+    lo_map_destroy(&lo->dirp_map);
+    lo_map_destroy(&lo->ino_map);
+
+    if (lo->proc_self_fd >= 0) {
+        close(lo->proc_self_fd);
+    }
+
+    if (lo->root.fd >= 0) {
+        close(lo->root.fd);
+    }
+
+    free(lo->source);
+}
+
+int main(int argc, char *argv[])
+{
+    struct fuse_args args = FUSE_ARGS_INIT(argc, argv);
+    struct fuse_session *se;
+    struct fuse_cmdline_opts opts;
+    struct lo_data lo = {
+        .debug = 0,
+        .writeback = 0,
+        .posix_lock = 1,
+        .proc_self_fd = -1,
+    };
+    struct lo_map_elem *root_elem;
+    int ret = -1;
+
+    /* Don't mask creation mode, kernel already did that */
+    umask(0);
+
+    pthread_mutex_init(&lo.mutex, NULL);
+    lo.inodes = g_hash_table_new(lo_key_hash, lo_key_equal);
+    lo.root.fd = -1;
+    lo.root.fuse_ino = FUSE_ROOT_ID;
+    lo.cache = CACHE_AUTO;
+
+    /*
+     * Set up the ino map like this:
+     * [0] Reserved (will not be used)
+     * [1] Root inode
+     */
+    lo_map_init(&lo.ino_map);
+    lo_map_reserve(&lo.ino_map, 0)->in_use = false;
+    root_elem = lo_map_reserve(&lo.ino_map, lo.root.fuse_ino);
+    root_elem->inode = &lo.root;
+
+    lo_map_init(&lo.dirp_map);
+    lo_map_init(&lo.fd_map);
+
+    if (fuse_parse_cmdline(&args, &opts) != 0) {
+        goto err_out1;
+    }
+    fuse_set_log_func(log_func);
+    use_syslog = opts.syslog;
+    if (use_syslog) {
+        openlog("virtiofsd", LOG_PID, LOG_DAEMON);
+    }
+
+    if (opts.show_help) {
+        printf("usage: %s [options]\n\n", argv[0]);
+        fuse_cmdline_help();
+        printf("    -o source=PATH             shared directory tree\n");
+        fuse_lowlevel_help();
+        ret = 0;
+        goto err_out1;
+    } else if (opts.show_version) {
+        fuse_lowlevel_version();
+        ret = 0;
+        goto err_out1;
+    } else if (opts.print_capabilities) {
+        print_capabilities();
+        ret = 0;
+        goto err_out1;
+    }
+
+    if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) {
+        goto err_out1;
+    }
+
+    /*
+     * log_level is 0 if not configured via cmd options (0 is LOG_EMERG,
+     * and we don't use this log level).
+     */
+    if (opts.log_level != 0) {
+        current_log_level = opts.log_level;
+    }
+    lo.debug = opts.debug;
+    if (lo.debug) {
+        current_log_level = FUSE_LOG_DEBUG;
+    }
+    if (lo.source) {
+        struct stat stat;
+        int res;
+
+        res = lstat(lo.source, &stat);
+        if (res == -1) {
+            fuse_log(FUSE_LOG_ERR, "failed to stat source (\"%s\"): %m\n",
+                     lo.source);
+            exit(1);
+        }
+        if (!S_ISDIR(stat.st_mode)) {
+            fuse_log(FUSE_LOG_ERR, "source is not a directory\n");
+            exit(1);
+        }
+    } else {
+        lo.source = strdup("/");
+    }
+    if (!lo.timeout_set) {
+        switch (lo.cache) {
+        case CACHE_NONE:
+            lo.timeout = 0.0;
+            break;
+
+        case CACHE_AUTO:
+            lo.timeout = 1.0;
+            break;
+
+        case CACHE_ALWAYS:
+            lo.timeout = 86400.0;
+            break;
+        }
+    } else if (lo.timeout < 0) {
+        fuse_log(FUSE_LOG_ERR, "timeout is negative (%lf)\n", lo.timeout);
+        exit(1);
+    }
+
+    se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo);
+    if (se == NULL) {
+        goto err_out1;
+    }
+
+    if (fuse_set_signal_handlers(se) != 0) {
+        goto err_out2;
+    }
+
+    if (fuse_session_mount(se) != 0) {
+        goto err_out3;
+    }
+
+    fuse_daemonize(opts.foreground);
+
+    setup_nofile_rlimit();
+
+    /* Must be before sandbox since it wants /proc */
+    setup_capng();
+
+    setup_sandbox(&lo, se, opts.syslog);
+
+    setup_root(&lo, &lo.root);
+    /* Block until ctrl+c or fusermount -u */
+    ret = virtio_loop(se);
+
+    fuse_session_unmount(se);
+    cleanup_capng();
+err_out3:
+    fuse_remove_signal_handlers(se);
+err_out2:
+    fuse_session_destroy(se);
+err_out1:
+    fuse_opt_free_args(&args);
+
+    fuse_lo_data_cleanup(&lo);
+
+    return ret ? 1 : 0;
+}
diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c
new file mode 100644
index 0000000000..2d9d4a7ec0
--- /dev/null
+++ b/tools/virtiofsd/seccomp.c
@@ -0,0 +1,165 @@
+/*
+ * Seccomp sandboxing for virtiofsd
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "seccomp.h"
+#include "fuse_i.h"
+#include "fuse_log.h"
+#include <errno.h>
+#include <glib.h>
+#include <seccomp.h>
+#include <stdlib.h>
+
+/* Bodge for libseccomp 2.4.2 which broke ppoll */
+#if !defined(__SNR_ppoll) && defined(__SNR_brk)
+#ifdef __NR_ppoll
+#define __SNR_ppoll __NR_ppoll
+#else
+#define __SNR_ppoll __PNR_ppoll
+#endif
+#endif
+
+static const int syscall_whitelist[] = {
+    /* TODO ireg sem*() syscalls */
+    SCMP_SYS(brk),
+    SCMP_SYS(capget), /* For CAP_FSETID */
+    SCMP_SYS(capset),
+    SCMP_SYS(clock_gettime),
+    SCMP_SYS(clone),
+#ifdef __NR_clone3
+    SCMP_SYS(clone3),
+#endif
+    SCMP_SYS(close),
+    SCMP_SYS(copy_file_range),
+    SCMP_SYS(dup),
+    SCMP_SYS(eventfd2),
+    SCMP_SYS(exit),
+    SCMP_SYS(exit_group),
+    SCMP_SYS(fallocate),
+    SCMP_SYS(fchmodat),
+    SCMP_SYS(fchownat),
+    SCMP_SYS(fcntl),
+    SCMP_SYS(fdatasync),
+    SCMP_SYS(fgetxattr),
+    SCMP_SYS(flistxattr),
+    SCMP_SYS(flock),
+    SCMP_SYS(fremovexattr),
+    SCMP_SYS(fsetxattr),
+    SCMP_SYS(fstat),
+    SCMP_SYS(fstatfs),
+    SCMP_SYS(fsync),
+    SCMP_SYS(ftruncate),
+    SCMP_SYS(futex),
+    SCMP_SYS(getdents),
+    SCMP_SYS(getdents64),
+    SCMP_SYS(getegid),
+    SCMP_SYS(geteuid),
+    SCMP_SYS(getpid),
+    SCMP_SYS(gettid),
+    SCMP_SYS(gettimeofday),
+    SCMP_SYS(linkat),
+    SCMP_SYS(lseek),
+    SCMP_SYS(madvise),
+    SCMP_SYS(mkdirat),
+    SCMP_SYS(mknodat),
+    SCMP_SYS(mmap),
+    SCMP_SYS(mprotect),
+    SCMP_SYS(mremap),
+    SCMP_SYS(munmap),
+    SCMP_SYS(newfstatat),
+    SCMP_SYS(open),
+    SCMP_SYS(openat),
+    SCMP_SYS(ppoll),
+    SCMP_SYS(prctl), /* TODO restrict to just PR_SET_NAME? */
+    SCMP_SYS(preadv),
+    SCMP_SYS(pread64),
+    SCMP_SYS(pwritev),
+    SCMP_SYS(pwrite64),
+    SCMP_SYS(read),
+    SCMP_SYS(readlinkat),
+    SCMP_SYS(recvmsg),
+    SCMP_SYS(renameat),
+    SCMP_SYS(renameat2),
+    SCMP_SYS(rt_sigaction),
+    SCMP_SYS(rt_sigprocmask),
+    SCMP_SYS(rt_sigreturn),
+    SCMP_SYS(sendmsg),
+    SCMP_SYS(setresgid),
+    SCMP_SYS(setresuid),
+#ifdef __NR_setresgid32
+    SCMP_SYS(setresgid32),
+#endif
+#ifdef __NR_setresuid32
+    SCMP_SYS(setresuid32),
+#endif
+    SCMP_SYS(set_robust_list),
+    SCMP_SYS(symlinkat),
+    SCMP_SYS(time), /* Rarely needed, except on static builds */
+    SCMP_SYS(tgkill),
+    SCMP_SYS(unlinkat),
+    SCMP_SYS(utimensat),
+    SCMP_SYS(write),
+    SCMP_SYS(writev),
+};
+
+/* Syscalls used when --syslog is enabled */
+static const int syscall_whitelist_syslog[] = {
+    SCMP_SYS(sendto),
+};
+
+static void add_whitelist(scmp_filter_ctx ctx, const int syscalls[], size_t len)
+{
+    size_t i;
+
+    for (i = 0; i < len; i++) {
+        if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, syscalls[i], 0) != 0) {
+            fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d failed\n",
+                     syscalls[i]);
+            exit(1);
+        }
+    }
+}
+
+void setup_seccomp(bool enable_syslog)
+{
+    scmp_filter_ctx ctx;
+
+#ifdef SCMP_ACT_KILL_PROCESS
+    ctx = seccomp_init(SCMP_ACT_KILL_PROCESS);
+    /* Handle a newer libseccomp but an older kernel */
+    if (!ctx && errno == EOPNOTSUPP) {
+        ctx = seccomp_init(SCMP_ACT_TRAP);
+    }
+#else
+    ctx = seccomp_init(SCMP_ACT_TRAP);
+#endif
+    if (!ctx) {
+        fuse_log(FUSE_LOG_ERR, "seccomp_init() failed\n");
+        exit(1);
+    }
+
+    add_whitelist(ctx, syscall_whitelist, G_N_ELEMENTS(syscall_whitelist));
+    if (enable_syslog) {
+        add_whitelist(ctx, syscall_whitelist_syslog,
+                      G_N_ELEMENTS(syscall_whitelist_syslog));
+    }
+
+    /* libvhost-user calls this for post-copy migration, we don't need it */
+    if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOSYS),
+                         SCMP_SYS(userfaultfd), 0) != 0) {
+        fuse_log(FUSE_LOG_ERR, "seccomp_rule_add userfaultfd failed\n");
+        exit(1);
+    }
+
+    if (seccomp_load(ctx) < 0) {
+        fuse_log(FUSE_LOG_ERR, "seccomp_load() failed\n");
+        exit(1);
+    }
+
+    seccomp_release(ctx);
+}
diff --git a/tools/virtiofsd/seccomp.h b/tools/virtiofsd/seccomp.h
new file mode 100644
index 0000000000..d47c8eade6
--- /dev/null
+++ b/tools/virtiofsd/seccomp.h
@@ -0,0 +1,16 @@
+/*
+ * Seccomp sandboxing for virtiofsd
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef VIRTIOFSD_SECCOMP_H
+#define VIRTIOFSD_SECCOMP_H
+
+#include <stdbool.h>
+
+void setup_seccomp(bool enable_syslog);
+
+#endif /* VIRTIOFSD_SECCOMP_H */