summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--block.c8
-rw-r--r--block/sheepdog.c315
-rw-r--r--gdbstub.c5
-rw-r--r--hw/dataplane/Makefile.objs2
-rw-r--r--hw/dataplane/event-poll.c100
-rw-r--r--hw/dataplane/event-poll.h40
-rw-r--r--hw/dataplane/virtio-blk.c48
-rw-r--r--hw/macio.c2
-rw-r--r--hw/virtio-blk.c4
-rw-r--r--include/qemu/sockets.h1
-rw-r--r--qemu-char.c6
-rw-r--r--qemu-doc.texi22
-rw-r--r--qemu-options.hx18
-rw-r--r--slirp/tcp_subr.c139
-rw-r--r--util/osdep.c6
15 files changed, 328 insertions, 388 deletions
diff --git a/block.c b/block.c
index 4582961965..124a9ebf65 100644
--- a/block.c
+++ b/block.c
@@ -1640,9 +1640,11 @@ int bdrv_commit_all(void)
     BlockDriverState *bs;
 
     QTAILQ_FOREACH(bs, &bdrv_states, list) {
-        int ret = bdrv_commit(bs);
-        if (ret < 0) {
-            return ret;
+        if (bs->drv && bs->backing_hd) {
+            int ret = bdrv_commit(bs);
+            if (ret < 0) {
+                return ret;
+            }
         }
     }
     return 0;
diff --git a/block/sheepdog.c b/block/sheepdog.c
index d466b232d7..c711c28613 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -13,6 +13,7 @@
  */
 
 #include "qemu-common.h"
+#include "qemu/uri.h"
 #include "qemu/error-report.h"
 #include "qemu/sockets.h"
 #include "block/block_int.h"
@@ -21,7 +22,7 @@
 #define SD_PROTO_VER 0x01
 
 #define SD_DEFAULT_ADDR "localhost"
-#define SD_DEFAULT_PORT "7000"
+#define SD_DEFAULT_PORT 7000
 
 #define SD_OP_CREATE_AND_WRITE_OBJ  0x01
 #define SD_OP_READ_OBJ       0x02
@@ -297,8 +298,8 @@ typedef struct BDRVSheepdogState {
     bool is_snapshot;
     uint32_t cache_flags;
 
-    char *addr;
-    char *port;
+    char *host_spec;
+    bool is_unix;
     int fd;
 
     CoMutex lock;
@@ -446,56 +447,29 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
     return acb;
 }
 
-static int connect_to_sdog(const char *addr, const char *port)
+static int connect_to_sdog(BDRVSheepdogState *s)
 {
-    char hbuf[NI_MAXHOST], sbuf[NI_MAXSERV];
-    int fd, ret;
-    struct addrinfo hints, *res, *res0;
-
-    if (!addr) {
-        addr = SD_DEFAULT_ADDR;
-        port = SD_DEFAULT_PORT;
-    }
-
-    memset(&hints, 0, sizeof(hints));
-    hints.ai_socktype = SOCK_STREAM;
-
-    ret = getaddrinfo(addr, port, &hints, &res0);
-    if (ret) {
-        error_report("unable to get address info %s, %s",
-                     addr, strerror(errno));
-        return -errno;
-    }
-
-    for (res = res0; res; res = res->ai_next) {
-        ret = getnameinfo(res->ai_addr, res->ai_addrlen, hbuf, sizeof(hbuf),
-                          sbuf, sizeof(sbuf), NI_NUMERICHOST | NI_NUMERICSERV);
-        if (ret) {
-            continue;
-        }
+    int fd;
+    Error *err = NULL;
 
-        fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
-        if (fd < 0) {
-            continue;
-        }
+    if (s->is_unix) {
+        fd = unix_connect(s->host_spec, &err);
+    } else {
+        fd = inet_connect(s->host_spec, &err);
 
-    reconnect:
-        ret = connect(fd, res->ai_addr, res->ai_addrlen);
-        if (ret < 0) {
-            if (errno == EINTR) {
-                goto reconnect;
+        if (err == NULL) {
+            int ret = socket_set_nodelay(fd);
+            if (ret < 0) {
+                error_report("%s", strerror(errno));
             }
-            close(fd);
-            break;
         }
+    }
 
-        dprintf("connected to %s:%s\n", addr, port);
-        goto success;
+    if (err != NULL) {
+        qerror_report_err(err);
+        error_free(err);
     }
-    fd = -errno;
-    error_report("failed connect to %s:%s", addr, port);
-success:
-    freeaddrinfo(res0);
+
     return fd;
 }
 
@@ -787,15 +761,6 @@ static int aio_flush_request(void *opaque)
         !QLIST_EMPTY(&s->pending_aio_head);
 }
 
-static int set_nodelay(int fd)
-{
-    int ret, opt;
-
-    opt = 1;
-    ret = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *)&opt, sizeof(opt));
-    return ret;
-}
-
 /*
  * Return a socket discriptor to read/write objects.
  *
@@ -804,29 +769,88 @@ static int set_nodelay(int fd)
  */
 static int get_sheep_fd(BDRVSheepdogState *s)
 {
-    int ret, fd;
+    int fd;
 
-    fd = connect_to_sdog(s->addr, s->port);
+    fd = connect_to_sdog(s);
     if (fd < 0) {
-        error_report("%s", strerror(errno));
         return fd;
     }
 
     socket_set_nonblock(fd);
 
-    ret = set_nodelay(fd);
-    if (ret) {
-        error_report("%s", strerror(errno));
-        closesocket(fd);
-        return -errno;
-    }
-
     qemu_aio_set_fd_handler(fd, co_read_response, NULL, aio_flush_request, s);
     return fd;
 }
 
+static int sd_parse_uri(BDRVSheepdogState *s, const char *filename,
+                        char *vdi, uint32_t *snapid, char *tag)
+{
+    URI *uri;
+    QueryParams *qp = NULL;
+    int ret = 0;
+
+    uri = uri_parse(filename);
+    if (!uri) {
+        return -EINVAL;
+    }
+
+    /* transport */
+    if (!strcmp(uri->scheme, "sheepdog")) {
+        s->is_unix = false;
+    } else if (!strcmp(uri->scheme, "sheepdog+tcp")) {
+        s->is_unix = false;
+    } else if (!strcmp(uri->scheme, "sheepdog+unix")) {
+        s->is_unix = true;
+    } else {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    if (uri->path == NULL || !strcmp(uri->path, "/")) {
+        ret = -EINVAL;
+        goto out;
+    }
+    pstrcpy(vdi, SD_MAX_VDI_LEN, uri->path + 1);
+
+    qp = query_params_parse(uri->query);
+    if (qp->n > 1 || (s->is_unix && !qp->n) || (!s->is_unix && qp->n)) {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    if (s->is_unix) {
+        /* sheepdog+unix:///vdiname?socket=path */
+        if (uri->server || uri->port || strcmp(qp->p[0].name, "socket")) {
+            ret = -EINVAL;
+            goto out;
+        }
+        s->host_spec = g_strdup(qp->p[0].value);
+    } else {
+        /* sheepdog[+tcp]://[host:port]/vdiname */
+        s->host_spec = g_strdup_printf("%s:%d", uri->server ?: SD_DEFAULT_ADDR,
+                                       uri->port ?: SD_DEFAULT_PORT);
+    }
+
+    /* snapshot tag */
+    if (uri->fragment) {
+        *snapid = strtoul(uri->fragment, NULL, 10);
+        if (*snapid == 0) {
+            pstrcpy(tag, SD_MAX_VDI_TAG_LEN, uri->fragment);
+        }
+    } else {
+        *snapid = CURRENT_VDI_ID; /* search current vdi */
+    }
+
+out:
+    if (qp) {
+        query_params_free(qp);
+    }
+    uri_free(uri);
+    return ret;
+}
+
 /*
- * Parse a filename
+ * Parse a filename (old syntax)
  *
  * filename must be one of the following formats:
  *   1. [vdiname]
@@ -845,9 +869,11 @@ static int get_sheep_fd(BDRVSheepdogState *s)
 static int parse_vdiname(BDRVSheepdogState *s, const char *filename,
                          char *vdi, uint32_t *snapid, char *tag)
 {
-    char *p, *q;
-    int nr_sep;
+    char *p, *q, *uri;
+    const char *host_spec, *vdi_spec;
+    int nr_sep, ret;
 
+    strstart(filename, "sheepdog:", (const char **)&filename);
     p = q = g_strdup(filename);
 
     /* count the number of separators */
@@ -860,38 +886,32 @@ static int parse_vdiname(BDRVSheepdogState *s, const char *filename,
     }
     p = q;
 
-    /* use the first two tokens as hostname and port number. */
+    /* use the first two tokens as host_spec. */
     if (nr_sep >= 2) {
-        s->addr = p;
+        host_spec = p;
         p = strchr(p, ':');
-        *p++ = '\0';
-
-        s->port = p;
+        p++;
         p = strchr(p, ':');
         *p++ = '\0';
     } else {
-        s->addr = NULL;
-        s->port = 0;
+        host_spec = "";
     }
 
-    pstrcpy(vdi, SD_MAX_VDI_LEN, p);
+    vdi_spec = p;
 
-    p = strchr(vdi, ':');
+    p = strchr(vdi_spec, ':');
     if (p) {
-        *p++ = '\0';
-        *snapid = strtoul(p, NULL, 10);
-        if (*snapid == 0) {
-            pstrcpy(tag, SD_MAX_VDI_TAG_LEN, p);
-        }
-    } else {
-        *snapid = CURRENT_VDI_ID; /* search current vdi */
+        *p++ = '#';
     }
 
-    if (s->addr == NULL) {
-        g_free(q);
-    }
+    uri = g_strdup_printf("sheepdog://%s/%s", host_spec, vdi_spec);
 
-    return 0;
+    ret = sd_parse_uri(s, uri, vdi, snapid, tag);
+
+    g_free(q);
+    g_free(uri);
+
+    return ret;
 }
 
 static int find_vdi_name(BDRVSheepdogState *s, char *filename, uint32_t snapid,
@@ -903,7 +923,7 @@ static int find_vdi_name(BDRVSheepdogState *s, char *filename, uint32_t snapid,
     unsigned int wlen, rlen = 0;
     char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN];
 
-    fd = connect_to_sdog(s->addr, s->port);
+    fd = connect_to_sdog(s);
     if (fd < 0) {
         return fd;
     }
@@ -1106,16 +1126,19 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
     uint32_t snapid;
     char *buf = NULL;
 
-    strstart(filename, "sheepdog:", (const char **)&filename);
-
     QLIST_INIT(&s->inflight_aio_head);
     QLIST_INIT(&s->pending_aio_head);
     s->fd = -1;
 
     memset(vdi, 0, sizeof(vdi));
     memset(tag, 0, sizeof(tag));
-    if (parse_vdiname(s, filename, vdi, &snapid, tag) < 0) {
-        ret = -EINVAL;
+
+    if (strstr(filename, "://")) {
+        ret = sd_parse_uri(s, filename, vdi, &snapid, tag);
+    } else {
+        ret = parse_vdiname(s, filename, vdi, &snapid, tag);
+    }
+    if (ret < 0) {
         goto out;
     }
     s->fd = get_sheep_fd(s);
@@ -1143,9 +1166,8 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
         s->is_snapshot = true;
     }
 
-    fd = connect_to_sdog(s->addr, s->port);
+    fd = connect_to_sdog(s);
     if (fd < 0) {
-        error_report("failed to connect");
         ret = fd;
         goto out;
     }
@@ -1178,9 +1200,8 @@ out:
     return ret;
 }
 
-static int do_sd_create(char *filename, int64_t vdi_size,
-                        uint32_t base_vid, uint32_t *vdi_id, int snapshot,
-                        const char *addr, const char *port)
+static int do_sd_create(BDRVSheepdogState *s, char *filename, int64_t vdi_size,
+                        uint32_t base_vid, uint32_t *vdi_id, int snapshot)
 {
     SheepdogVdiReq hdr;
     SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
@@ -1188,7 +1209,7 @@ static int do_sd_create(char *filename, int64_t vdi_size,
     unsigned int wlen, rlen = 0;
     char buf[SD_MAX_VDI_LEN];
 
-    fd = connect_to_sdog(addr, port);
+    fd = connect_to_sdog(s);
     if (fd < 0) {
         return fd;
     }
@@ -1284,17 +1305,17 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
     char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN];
     uint32_t snapid;
     bool prealloc = false;
-    const char *vdiname;
 
     s = g_malloc0(sizeof(BDRVSheepdogState));
 
-    strstart(filename, "sheepdog:", &vdiname);
-
     memset(vdi, 0, sizeof(vdi));
     memset(tag, 0, sizeof(tag));
-    if (parse_vdiname(s, vdiname, vdi, &snapid, tag) < 0) {
-        error_report("invalid filename");
-        ret = -EINVAL;
+    if (strstr(filename, "://")) {
+        ret = sd_parse_uri(s, filename, vdi, &snapid, tag);
+    } else {
+        ret = parse_vdiname(s, filename, vdi, &snapid, tag);
+    }
+    if (ret < 0) {
         goto out;
     }
 
@@ -1355,7 +1376,7 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
         bdrv_delete(bs);
     }
 
-    ret = do_sd_create(vdi, vdi_size, base_vid, &vid, 0, s->addr, s->port);
+    ret = do_sd_create(s, vdi, vdi_size, base_vid, &vid, 0);
     if (!prealloc || ret) {
         goto out;
     }
@@ -1376,7 +1397,7 @@ static void sd_close(BlockDriverState *bs)
 
     dprintf("%s\n", s->name);
 
-    fd = connect_to_sdog(s->addr, s->port);
+    fd = connect_to_sdog(s);
     if (fd < 0) {
         return;
     }
@@ -1400,7 +1421,7 @@ static void sd_close(BlockDriverState *bs)
 
     qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL);
     closesocket(s->fd);
-    g_free(s->addr);
+    g_free(s->host_spec);
 }
 
 static int64_t sd_getlength(BlockDriverState *bs)
@@ -1424,7 +1445,7 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)
         return -EINVAL;
     }
 
-    fd = connect_to_sdog(s->addr, s->port);
+    fd = connect_to_sdog(s);
     if (fd < 0) {
         return fd;
     }
@@ -1500,17 +1521,15 @@ static int sd_create_branch(BDRVSheepdogState *s)
 
     buf = g_malloc(SD_INODE_SIZE);
 
-    ret = do_sd_create(s->name, s->inode.vdi_size, s->inode.vdi_id, &vid, 1,
-                       s->addr, s->port);
+    ret = do_sd_create(s, s->name, s->inode.vdi_size, s->inode.vdi_id, &vid, 1);
     if (ret) {
         goto out;
     }
 
     dprintf("%" PRIx32 " is created.\n", vid);
 
-    fd = connect_to_sdog(s->addr, s->port);
+    fd = connect_to_sdog(s);
     if (fd < 0) {
-        error_report("failed to connect");
         ret = fd;
         goto out;
     }
@@ -1769,7 +1788,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
     datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
 
     /* refresh inode. */
-    fd = connect_to_sdog(s->addr, s->port);
+    fd = connect_to_sdog(s);
     if (fd < 0) {
         ret = fd;
         goto cleanup;
@@ -1782,8 +1801,8 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
         goto cleanup;
     }
 
-    ret = do_sd_create(s->name, s->inode.vdi_size, s->inode.vdi_id, &new_vid, 1,
-                       s->addr, s->port);
+    ret = do_sd_create(s, s->name, s->inode.vdi_size, s->inode.vdi_id, &new_vid,
+                       1);
     if (ret < 0) {
         error_report("failed to create inode for snapshot. %s",
                      strerror(errno));
@@ -1838,9 +1857,8 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
         goto out;
     }
 
-    fd = connect_to_sdog(s->addr, s->port);
+    fd = connect_to_sdog(s);
     if (fd < 0) {
-        error_report("failed to connect");
         ret = fd;
         goto out;
     }
@@ -1902,7 +1920,7 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
 
     vdi_inuse = g_malloc(max);
 
-    fd = connect_to_sdog(s->addr, s->port);
+    fd = connect_to_sdog(s);
     if (fd < 0) {
         ret = fd;
         goto out;
@@ -1929,9 +1947,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
     hval = fnv_64a_buf(s->name, strlen(s->name), FNV1A_64_INIT);
     start_nr = hval & (SD_NR_VDIS - 1);
 
-    fd = connect_to_sdog(s->addr, s->port);
+    fd = connect_to_sdog(s);
     if (fd < 0) {
-        error_report("failed to connect");
         ret = fd;
         goto out;
     }
@@ -1988,7 +2005,7 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
     uint32_t vdi_index;
     uint64_t offset;
 
-    fd = connect_to_sdog(s->addr, s->port);
+    fd = connect_to_sdog(s);
     if (fd < 0) {
         return fd;
     }
@@ -2063,7 +2080,7 @@ static QEMUOptionParameter sd_create_options[] = {
     { NULL }
 };
 
-BlockDriver bdrv_sheepdog = {
+static BlockDriver bdrv_sheepdog = {
     .format_name    = "sheepdog",
     .protocol_name  = "sheepdog",
     .instance_size  = sizeof(BDRVSheepdogState),
@@ -2088,8 +2105,60 @@ BlockDriver bdrv_sheepdog = {
     .create_options = sd_create_options,
 };
 
+static BlockDriver bdrv_sheepdog_tcp = {
+    .format_name    = "sheepdog",
+    .protocol_name  = "sheepdog+tcp",
+    .instance_size  = sizeof(BDRVSheepdogState),
+    .bdrv_file_open = sd_open,
+    .bdrv_close     = sd_close,
+    .bdrv_create    = sd_create,
+    .bdrv_getlength = sd_getlength,
+    .bdrv_truncate  = sd_truncate,
+
+    .bdrv_co_readv  = sd_co_readv,
+    .bdrv_co_writev = sd_co_writev,
+    .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
+
+    .bdrv_snapshot_create   = sd_snapshot_create,
+    .bdrv_snapshot_goto     = sd_snapshot_goto,
+    .bdrv_snapshot_delete   = sd_snapshot_delete,
+    .bdrv_snapshot_list     = sd_snapshot_list,
+
+    .bdrv_save_vmstate  = sd_save_vmstate,
+    .bdrv_load_vmstate  = sd_load_vmstate,
+
+    .create_options = sd_create_options,
+};
+
+static BlockDriver bdrv_sheepdog_unix = {
+    .format_name    = "sheepdog",
+    .protocol_name  = "sheepdog+unix",
+    .instance_size  = sizeof(BDRVSheepdogState),
+    .bdrv_file_open = sd_open,
+    .bdrv_close     = sd_close,
+    .bdrv_create    = sd_create,
+    .bdrv_getlength = sd_getlength,
+    .bdrv_truncate  = sd_truncate,
+
+    .bdrv_co_readv  = sd_co_readv,
+    .bdrv_co_writev = sd_co_writev,
+    .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
+
+    .bdrv_snapshot_create   = sd_snapshot_create,
+    .bdrv_snapshot_goto     = sd_snapshot_goto,
+    .bdrv_snapshot_delete   = sd_snapshot_delete,
+    .bdrv_snapshot_list     = sd_snapshot_list,
+
+    .bdrv_save_vmstate  = sd_save_vmstate,
+    .bdrv_load_vmstate  = sd_load_vmstate,
+
+    .create_options = sd_create_options,
+};
+
 static void bdrv_sheepdog_init(void)
 {
     bdrv_register(&bdrv_sheepdog);
+    bdrv_register(&bdrv_sheepdog_tcp);
+    bdrv_register(&bdrv_sheepdog_unix);
 }
 block_init(bdrv_sheepdog_init);
diff --git a/gdbstub.c b/gdbstub.c
index 32dfea9ed0..e414ad9157 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -2841,7 +2841,7 @@ static void gdb_accept(void)
     GDBState *s;
     struct sockaddr_in sockaddr;
     socklen_t len;
-    int val, fd;
+    int fd;
 
     for(;;) {
         len = sizeof(sockaddr);
@@ -2858,8 +2858,7 @@ static void gdb_accept(void)
     }
 
     /* set short latency */
-    val = 1;
-    setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *)&val, sizeof(val));
+    socket_set_nodelay(fd);
 
     s = g_malloc0(sizeof(GDBState));
     s->c_cpu = first_cpu;
diff --git a/hw/dataplane/Makefile.objs b/hw/dataplane/Makefile.objs
index 3e47d0537e..701111ccb9 100644
--- a/hw/dataplane/Makefile.objs
+++ b/hw/dataplane/Makefile.objs
@@ -1 +1 @@
-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += hostmem.o vring.o event-poll.o ioq.o virtio-blk.o
+obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += hostmem.o vring.o ioq.o virtio-blk.o
diff --git a/hw/dataplane/event-poll.c b/hw/dataplane/event-poll.c
deleted file mode 100644
index 2b55c6e255..0000000000
--- a/hw/dataplane/event-poll.c
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Event loop with file descriptor polling
- *
- * Copyright 2012 IBM, Corp.
- * Copyright 2012 Red Hat, Inc. and/or its affiliates
- *
- * Authors:
- *   Stefan Hajnoczi <stefanha@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#include <sys/epoll.h>
-#include "hw/dataplane/event-poll.h"
-
-/* Add an event notifier and its callback for polling */
-void event_poll_add(EventPoll *poll, EventHandler *handler,
-                    EventNotifier *notifier, EventCallback *callback)
-{
-    struct epoll_event event = {
-        .events = EPOLLIN,
-        .data.ptr = handler,
-    };
-    handler->notifier = notifier;
-    handler->callback = callback;
-    if (epoll_ctl(poll->epoll_fd, EPOLL_CTL_ADD,
-                  event_notifier_get_fd(notifier), &event) != 0) {
-        fprintf(stderr, "failed to add event handler to epoll: %m\n");
-        exit(1);
-    }
-}
-
-/* Event callback for stopping event_poll() */
-static void handle_stop(EventHandler *handler)
-{
-    /* Do nothing */
-}
-
-void event_poll_init(EventPoll *poll)
-{
-    /* Create epoll file descriptor */
-    poll->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
-    if (poll->epoll_fd < 0) {
-        fprintf(stderr, "epoll_create1 failed: %m\n");
-        exit(1);
-    }
-
-    /* Set up stop notifier */
-    if (event_notifier_init(&poll->stop_notifier, 0) < 0) {
-        fprintf(stderr, "failed to init stop notifier\n");
-        exit(1);
-    }
-    event_poll_add(poll, &poll->stop_handler,
-                   &poll->stop_notifier, handle_stop);
-}
-
-void event_poll_cleanup(EventPoll *poll)
-{
-    event_notifier_cleanup(&poll->stop_notifier);
-    close(poll->epoll_fd);
-    poll->epoll_fd = -1;
-}
-
-/* Block until the next event and invoke its callback */
-void event_poll(EventPoll *poll)
-{
-    EventHandler *handler;
-    struct epoll_event event;
-    int nevents;
-
-    /* Wait for the next event.  Only do one event per call to keep the
-     * function simple, this could be changed later. */
-    do {
-        nevents = epoll_wait(poll->epoll_fd, &event, 1, -1);
-    } while (nevents < 0 && errno == EINTR);
-    if (unlikely(nevents != 1)) {
-        fprintf(stderr, "epoll_wait failed: %m\n");
-        exit(1); /* should never happen */
-    }
-
-    /* Find out which event handler has become active */
-    handler = event.data.ptr;
-
-    /* Clear the eventfd */
-    event_notifier_test_and_clear(handler->notifier);
-
-    /* Handle the event */
-    handler->callback(handler);
-}
-
-/* Stop event_poll()
- *
- * This function can be used from another thread.
- */
-void event_poll_notify(EventPoll *poll)
-{
-    event_notifier_set(&poll->stop_notifier);
-}
diff --git a/hw/dataplane/event-poll.h b/hw/dataplane/event-poll.h
deleted file mode 100644
index 3e8d3ec7d5..0000000000
--- a/hw/dataplane/event-poll.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Event loop with file descriptor polling
- *
- * Copyright 2012 IBM, Corp.
- * Copyright 2012 Red Hat, Inc. and/or its affiliates
- *
- * Authors:
- *   Stefan Hajnoczi <stefanha@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#ifndef EVENT_POLL_H
-#define EVENT_POLL_H
-
-#include "qemu/event_notifier.h"
-
-typedef struct EventHandler EventHandler;
-typedef void EventCallback(EventHandler *handler);
-struct EventHandler {
-    EventNotifier *notifier;        /* eventfd */
-    EventCallback *callback;        /* callback function */
-};
-
-typedef struct {
-    int epoll_fd;                   /* epoll(2) file descriptor */
-    EventNotifier stop_notifier;    /* stop poll notifier */
-    EventHandler stop_handler;      /* stop poll handler */
-} EventPoll;
-
-void event_poll_add(EventPoll *poll, EventHandler *handler,
-                    EventNotifier *notifier, EventCallback *callback);
-void event_poll_init(EventPoll *poll);
-void event_poll_cleanup(EventPoll *poll);
-void event_poll(EventPoll *poll);
-void event_poll_notify(EventPoll *poll);
-
-#endif /* EVENT_POLL_H */
diff --git a/hw/dataplane/virtio-blk.c b/hw/dataplane/virtio-blk.c
index 3f2da22669..aa9b04078b 100644
--- a/hw/dataplane/virtio-blk.c
+++ b/hw/dataplane/virtio-blk.c
@@ -14,13 +14,13 @@
 
 #include "trace.h"
 #include "qemu/iov.h"
-#include "event-poll.h"
 #include "qemu/thread.h"
 #include "vring.h"
 #include "ioq.h"
 #include "migration/migration.h"
 #include "hw/virtio-blk.h"
 #include "hw/dataplane/virtio-blk.h"
+#include "block/aio.h"
 
 enum {
     SEG_MAX = 126,                  /* maximum number of I/O segments */
@@ -51,9 +51,14 @@ struct VirtIOBlockDataPlane {
     Vring vring;                    /* virtqueue vring */
     EventNotifier *guest_notifier;  /* irq */
 
-    EventPoll event_poll;           /* event poller */
-    EventHandler io_handler;        /* Linux AIO completion handler */
-    EventHandler notify_handler;    /* virtqueue notify handler */
+    /* Note that these EventNotifiers are assigned by value.  This is
+     * fine as long as you do not call event_notifier_cleanup on them
+     * (because you don't own the file descriptor or handle; you just
+     * use it).
+     */
+    AioContext *ctx;
+    EventNotifier io_notifier;      /* Linux AIO completion */
+    EventNotifier host_notifier;    /* doorbell */
 
     IOQueue ioqueue;                /* Linux AIO queue (should really be per
                                        dataplane thread) */
@@ -256,10 +261,10 @@ static int process_request(IOQueue *ioq, struct iovec iov[],
     }
 }
 
-static void handle_notify(EventHandler *handler)
+static void handle_notify(EventNotifier *e)
 {
-    VirtIOBlockDataPlane *s = container_of(handler, VirtIOBlockDataPlane,
-                                           notify_handler);
+    VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane,
+                                           host_notifier);
 
     /* There is one array of iovecs into which all new requests are extracted
      * from the vring.  Requests are read from the vring and the translated
@@ -286,6 +291,7 @@ static void handle_notify(EventHandler *handler)
     unsigned int out_num = 0, in_num = 0;
     unsigned int num_queued;
 
+    event_notifier_test_and_clear(&s->host_notifier);
     for (;;) {
         /* Disable guest->host notifies to avoid unnecessary vmexits */
         vring_disable_notification(s->vdev, &s->vring);
@@ -334,11 +340,12 @@ static void handle_notify(EventHandler *handler)
     }
 }
 
-static void handle_io(EventHandler *handler)
+static void handle_io(EventNotifier *e)
 {
-    VirtIOBlockDataPlane *s = container_of(handler, VirtIOBlockDataPlane,
-                                           io_handler);
+    VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane,
+                                           io_notifier);
 
+    event_notifier_test_and_clear(&s->io_notifier);
     if (ioq_run_completion(&s->ioqueue, complete_request, s) > 0) {
         notify_guest(s);
     }
@@ -348,7 +355,7 @@ static void handle_io(EventHandler *handler)
      * requests.
      */
     if (unlikely(vring_more_avail(&s->vring))) {
-        handle_notify(&s->notify_handler);
+        handle_notify(&s->host_notifier);
     }
 }
 
@@ -357,7 +364,7 @@ static void *data_plane_thread(void *opaque)
     VirtIOBlockDataPlane *s = opaque;
 
     do {
-        event_poll(&s->event_poll);
+        aio_poll(s->ctx, true);
     } while (!s->stopping || s->num_reqs > 0);
     return NULL;
 }
@@ -445,7 +452,7 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
         return;
     }
 
-    event_poll_init(&s->event_poll);
+    s->ctx = aio_context_new();
 
     /* Set up guest notifier (irq) */
     if (s->vdev->binding->set_guest_notifiers(s->vdev->binding_opaque, 1,
@@ -462,17 +469,16 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
         fprintf(stderr, "virtio-blk failed to set host notifier\n");
         exit(1);
     }
-    event_poll_add(&s->event_poll, &s->notify_handler,
-                   virtio_queue_get_host_notifier(vq),
-                   handle_notify);
+    s->host_notifier = *virtio_queue_get_host_notifier(vq);
+    aio_set_event_notifier(s->ctx, &s->host_notifier, handle_notify, NULL);
 
     /* Set up ioqueue */
     ioq_init(&s->ioqueue, s->fd, REQ_MAX);
     for (i = 0; i < ARRAY_SIZE(s->requests); i++) {
         ioq_put_iocb(&s->ioqueue, &s->requests[i].iocb);
     }
-    event_poll_add(&s->event_poll, &s->io_handler,
-                   ioq_get_notifier(&s->ioqueue), handle_io);
+    s->io_notifier = *ioq_get_notifier(&s->ioqueue);
+    aio_set_event_notifier(s->ctx, &s->io_notifier, handle_io, NULL);
 
     s->started = true;
     trace_virtio_blk_data_plane_start(s);
@@ -498,15 +504,17 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s)
         qemu_bh_delete(s->start_bh);
         s->start_bh = NULL;
     } else {
-        event_poll_notify(&s->event_poll);
+        aio_notify(s->ctx);
         qemu_thread_join(&s->thread);
     }
 
+    aio_set_event_notifier(s->ctx, &s->io_notifier, NULL, NULL);
     ioq_cleanup(&s->ioqueue);
 
+    aio_set_event_notifier(s->ctx, &s->host_notifier, NULL, NULL);
     s->vdev->binding->set_host_notifier(s->vdev->binding_opaque, 0, false);
 
-    event_poll_cleanup(&s->event_poll);
+    aio_context_unref(s->ctx);
 
     /* Clean up guest notifier (irq) */
     s->vdev->binding->set_guest_notifiers(s->vdev->binding_opaque, 1, false);
diff --git a/hw/macio.c b/hw/macio.c
index 74bdcd1039..0c6a6b8e7a 100644
--- a/hw/macio.c
+++ b/hw/macio.c
@@ -188,7 +188,7 @@ static int macio_newworld_initfn(PCIDevice *d)
     sysbus_dev = SYS_BUS_DEVICE(&ns->ide[1]);
     sysbus_connect_irq(sysbus_dev, 0, ns->irqs[3]);
     sysbus_connect_irq(sysbus_dev, 1, ns->irqs[4]);
-    macio_ide_register_dma(&ns->ide[0], s->dbdma, 0x1a);
+    macio_ide_register_dma(&ns->ide[1], s->dbdma, 0x1a);
     ret = qdev_init(DEVICE(&ns->ide[1]));
     if (ret < 0) {
         return ret;
diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
index 34913ee40e..f5e6ee90b6 100644
--- a/hw/virtio-blk.c
+++ b/hw/virtio-blk.c
@@ -36,6 +36,7 @@ typedef struct VirtIOBlock
     VirtIOBlkConf *blk;
     unsigned short sector_mask;
     DeviceState *qdev;
+    VMChangeStateEntry *change;
 #ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
     VirtIOBlockDataPlane *dataplane;
 #endif
@@ -681,7 +682,7 @@ VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk)
     }
 #endif
 
-    qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
+    s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
     s->qdev = dev;
     register_savevm(dev, "virtio-blk", virtio_blk_id++, 2,
                     virtio_blk_save, virtio_blk_load, s);
@@ -702,6 +703,7 @@ void virtio_blk_exit(VirtIODevice *vdev)
     virtio_blk_data_plane_destroy(s->dataplane);
     s->dataplane = NULL;
 #endif
+    qemu_del_vm_change_state_handler(s->change);
     unregister_savevm(s->qdev, "virtio-blk", s);
     blockdev_mark_auto_del(s->bs);
     virtio_cleanup(vdev);
diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h
index 803ae1798c..6125bf7bdf 100644
--- a/include/qemu/sockets.h
+++ b/include/qemu/sockets.h
@@ -34,6 +34,7 @@ int inet_aton(const char *cp, struct in_addr *ia);
 int qemu_socket(int domain, int type, int protocol);
 int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen);
 int socket_set_cork(int fd, int v);
+int socket_set_nodelay(int fd);
 void socket_set_block(int fd);
 void socket_set_nonblock(int fd);
 int send_all(int fd, const void *buf, int len1);
diff --git a/qemu-char.c b/qemu-char.c
index 160decc2f0..36295b1bcd 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -2365,12 +2365,6 @@ static void tcp_chr_telnet_init(int fd)
     send(fd, (char *)buf, 3, 0);
 }
 
-static void socket_set_nodelay(int fd)
-{
-    int val = 1;
-    setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *)&val, sizeof(val));
-}
-
 static int tcp_chr_add_client(CharDriverState *chr, int fd)
 {
     TCPCharDriver *s = chr->opaque;
diff --git a/qemu-doc.texi b/qemu-doc.texi
index 747e052fcb..af84bef0e9 100644
--- a/qemu-doc.texi
+++ b/qemu-doc.texi
@@ -830,7 +830,7 @@ QEMU-based virtual machines.
 
 You can create a Sheepdog disk image with the command:
 @example
-qemu-img create sheepdog:@var{image} @var{size}
+qemu-img create sheepdog:///@var{image} @var{size}
 @end example
 where @var{image} is the Sheepdog image name and @var{size} is its
 size.
@@ -838,38 +838,44 @@ size.
 To import the existing @var{filename} to Sheepdog, you can use a
 convert command.
 @example
-qemu-img convert @var{filename} sheepdog:@var{image}
+qemu-img convert @var{filename} sheepdog:///@var{image}
 @end example
 
 You can boot from the Sheepdog disk image with the command:
 @example
-qemu-system-i386 sheepdog:@var{image}
+qemu-system-i386 sheepdog:///@var{image}
 @end example
 
 You can also create a snapshot of the Sheepdog image like qcow2.
 @example
-qemu-img snapshot -c @var{tag} sheepdog:@var{image}
+qemu-img snapshot -c @var{tag} sheepdog:///@var{image}
 @end example
 where @var{tag} is a tag name of the newly created snapshot.
 
 To boot from the Sheepdog snapshot, specify the tag name of the
 snapshot.
 @example
-qemu-system-i386 sheepdog:@var{image}:@var{tag}
+qemu-system-i386 sheepdog:///@var{image}#@var{tag}
 @end example
 
 You can create a cloned image from the existing snapshot.
 @example
-qemu-img create -b sheepdog:@var{base}:@var{tag} sheepdog:@var{image}
+qemu-img create -b sheepdog:///@var{base}#@var{tag} sheepdog:///@var{image}
 @end example
 where @var{base} is a image name of the source snapshot and @var{tag}
 is its tag name.
 
+You can use an unix socket instead of an inet socket:
+
+@example
+qemu-system-i386 sheepdog+unix:///@var{image}?socket=@var{path}
+@end example
+
 If the Sheepdog daemon doesn't run on the local host, you need to
 specify one of the Sheepdog servers to connect to.
 @example
-qemu-img create sheepdog:@var{hostname}:@var{port}:@var{image} @var{size}
-qemu-system-i386 sheepdog:@var{hostname}:@var{port}:@var{image}
+qemu-img create sheepdog://@var{hostname}:@var{port}/@var{image} @var{size}
+qemu-system-i386 sheepdog://@var{hostname}:@var{port}/@var{image}
 @end example
 
 @node disk_images_iscsi
diff --git a/qemu-options.hx b/qemu-options.hx
index 863069f293..6f9334a97f 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -2108,23 +2108,13 @@ QEMU supports using either local sheepdog devices or remote networked
 devices.
 
 Syntax for specifying a sheepdog device
-@table @list
-``sheepdog:<vdiname>''
-
-``sheepdog:<vdiname>:<snapid>''
-
-``sheepdog:<vdiname>:<tag>''
-
-``sheepdog:<host>:<port>:<vdiname>''
-
-``sheepdog:<host>:<port>:<vdiname>:<snapid>''
-
-``sheepdog:<host>:<port>:<vdiname>:<tag>''
-@end table
+@example
+sheepdog[+tcp|+unix]://[host:port]/vdiname[?socket=path][#snapid|#tag]
+@end example
 
 Example
 @example
-qemu-system-i386 --drive file=sheepdog:192.0.2.1:30000:MyVirtualMachine
+qemu-system-i386 --drive file=sheepdog://192.0.2.1:30000/MyVirtualMachine
 @end example
 
 See also @url{http://http://www.osrg.net/sheepdog/}.
diff --git a/slirp/tcp_subr.c b/slirp/tcp_subr.c
index 1542e43619..7b7ad60aea 100644
--- a/slirp/tcp_subr.c
+++ b/slirp/tcp_subr.c
@@ -384,83 +384,86 @@ int tcp_fconnect(struct socket *so)
  * the time it gets to accept(), so... We simply accept
  * here and SYN the local-host.
  */
-void
-tcp_connect(struct socket *inso)
+void tcp_connect(struct socket *inso)
 {
-	Slirp *slirp = inso->slirp;
-	struct socket *so;
-	struct sockaddr_in addr;
-	socklen_t addrlen = sizeof(struct sockaddr_in);
-	struct tcpcb *tp;
-	int s, opt;
+    Slirp *slirp = inso->slirp;
+    struct socket *so;
+    struct sockaddr_in addr;
+    socklen_t addrlen = sizeof(struct sockaddr_in);
+    struct tcpcb *tp;
+    int s, opt;
 
-	DEBUG_CALL("tcp_connect");
-	DEBUG_ARG("inso = %lx", (long)inso);
+    DEBUG_CALL("tcp_connect");
+    DEBUG_ARG("inso = %lx", (long)inso);
 
-	/*
-	 * If it's an SS_ACCEPTONCE socket, no need to socreate()
-	 * another socket, just use the accept() socket.
-	 */
-	if (inso->so_state & SS_FACCEPTONCE) {
-		/* FACCEPTONCE already have a tcpcb */
-		so = inso;
-	} else {
-		if ((so = socreate(slirp)) == NULL) {
-			/* If it failed, get rid of the pending connection */
-			closesocket(accept(inso->s,(struct sockaddr *)&addr,&addrlen));
-			return;
-		}
-		if (tcp_attach(so) < 0) {
-			free(so); /* NOT sofree */
-			return;
-		}
-		so->so_laddr = inso->so_laddr;
-		so->so_lport = inso->so_lport;
-	}
+    /*
+     * If it's an SS_ACCEPTONCE socket, no need to socreate()
+     * another socket, just use the accept() socket.
+     */
+    if (inso->so_state & SS_FACCEPTONCE) {
+        /* FACCEPTONCE already have a tcpcb */
+        so = inso;
+    } else {
+        so = socreate(slirp);
+        if (so == NULL) {
+            /* If it failed, get rid of the pending connection */
+            closesocket(accept(inso->s, (struct sockaddr *)&addr, &addrlen));
+            return;
+        }
+        if (tcp_attach(so) < 0) {
+            free(so); /* NOT sofree */
+            return;
+        }
+        so->so_laddr = inso->so_laddr;
+        so->so_lport = inso->so_lport;
+    }
 
-	(void) tcp_mss(sototcpcb(so), 0);
+    tcp_mss(sototcpcb(so), 0);
 
-	if ((s = accept(inso->s,(struct sockaddr *)&addr,&addrlen)) < 0) {
-		tcp_close(sototcpcb(so)); /* This will sofree() as well */
-		return;
-	}
-	socket_set_nonblock(s);
-	opt = 1;
-	setsockopt(s,SOL_SOCKET,SO_REUSEADDR,(char *)&opt,sizeof(int));
-	opt = 1;
-	setsockopt(s,SOL_SOCKET,SO_OOBINLINE,(char *)&opt,sizeof(int));
-	opt = 1;
-	setsockopt(s,IPPROTO_TCP,TCP_NODELAY,(char *)&opt,sizeof(int));
-
-	so->so_fport = addr.sin_port;
-	so->so_faddr = addr.sin_addr;
-	/* Translate connections from localhost to the real hostname */
-        if (so->so_faddr.s_addr == 0 ||
-            (so->so_faddr.s_addr & loopback_mask) ==
-            (loopback_addr.s_addr & loopback_mask)) {
-            so->so_faddr = slirp->vhost_addr;
-        }
+    s = accept(inso->s, (struct sockaddr *)&addr, &addrlen);
+    if (s < 0) {
+        tcp_close(sototcpcb(so)); /* This will sofree() as well */
+        return;
+    }
+    socket_set_nonblock(s);
+    opt = 1;
+    setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&opt, sizeof(int));
+    opt = 1;
+    setsockopt(s, SOL_SOCKET, SO_OOBINLINE, (char *)&opt, sizeof(int));
+    socket_set_nodelay(s);
+
+    so->so_fport = addr.sin_port;
+    so->so_faddr = addr.sin_addr;
+    /* Translate connections from localhost to the real hostname */
+    if (so->so_faddr.s_addr == 0 ||
+        (so->so_faddr.s_addr & loopback_mask) ==
+        (loopback_addr.s_addr & loopback_mask)) {
+        so->so_faddr = slirp->vhost_addr;
+    }
 
-	/* Close the accept() socket, set right state */
-	if (inso->so_state & SS_FACCEPTONCE) {
-		closesocket(so->s); /* If we only accept once, close the accept() socket */
-		so->so_state = SS_NOFDREF; /* Don't select it yet, even though we have an FD */
-					   /* if it's not FACCEPTONCE, it's already NOFDREF */
-	}
-	so->s = s;
-	so->so_state |= SS_INCOMING;
+    /* Close the accept() socket, set right state */
+    if (inso->so_state & SS_FACCEPTONCE) {
+        /* If we only accept once, close the accept() socket */
+        closesocket(so->s);
+
+        /* Don't select it yet, even though we have an FD */
+        /* if it's not FACCEPTONCE, it's already NOFDREF */
+        so->so_state = SS_NOFDREF;
+    }
+    so->s = s;
+    so->so_state |= SS_INCOMING;
 
-	so->so_iptos = tcp_tos(so);
-	tp = sototcpcb(so);
+    so->so_iptos = tcp_tos(so);
+    tp = sototcpcb(so);
 
-	tcp_template(tp);
+    tcp_template(tp);
 
-	tp->t_state = TCPS_SYN_SENT;
-	tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
-	tp->iss = slirp->tcp_iss;
-	slirp->tcp_iss += TCP_ISSINCR/2;
-	tcp_sendseqinit(tp);
-	tcp_output(tp);
+    tp->t_state = TCPS_SYN_SENT;
+    tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
+    tp->iss = slirp->tcp_iss;
+    slirp->tcp_iss += TCP_ISSINCR/2;
+    tcp_sendseqinit(tp);
+    tcp_output(tp);
 }
 
 /*
diff --git a/util/osdep.c b/util/osdep.c
index 5b51a0322e..c4082610df 100644
--- a/util/osdep.c
+++ b/util/osdep.c
@@ -63,6 +63,12 @@ int socket_set_cork(int fd, int v)
 #endif
 }
 
+int socket_set_nodelay(int fd)
+{
+    int v = 1;
+    return setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v));
+}
+
 int qemu_madvise(void *addr, size_t len, int advice)
 {
     if (advice == QEMU_MADV_INVALID) {