86 files changed, 3407 insertions, 2759 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 81e7fac2fc..3c949d5a97 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -165,6 +165,7 @@ F: hw/openrisc/
 F: tests/tcg/openrisc/
 
 PowerPC
+M: David Gibson <david@gibson.dropbear.id.au>
 M: Alexander Graf <agraf@suse.de>
 L: qemu-ppc@nongnu.org
 S: Maintained
@@ -597,7 +598,7 @@ F: hw/pci-host/grackle.c
 F: hw/misc/macio/
 
 PReP
-M: Andreas Färber <andreas.faerber@web.de>
+L: qemu-devel@nongnu.org
 L: qemu-ppc@nongnu.org
 S: Odd Fixes
 F: hw/ppc/prep.c
@@ -1046,7 +1047,7 @@ S: Supported
 F: scripts/coverity-model.c
 
 CPU
-M: Andreas Färber <afaerber@suse.de>
+L: qemu-devel@nongnu.org
 S: Supported
 F: qom/cpu.c
 F: include/qom/cpu.h
@@ -1105,7 +1106,6 @@ F: ui/
 F: include/ui/
 
 Cocoa graphics
-M: Andreas Färber <andreas.faerber@web.de>
 M: Peter Maydell <peter.maydell@linaro.org>
 S: Odd Fixes
 F: ui/cocoa.m
@@ -1400,9 +1400,8 @@ S: Orphan
 
 Stable 0.15
 L: qemu-stable@nongnu.org
-M: Andreas Färber <afaerber@suse.de>
 T: git git://git.qemu-project.org/qemu-stable-0.15.git
-S: Supported
+S: Orphan
 
 Stable 0.14
 L: qemu-stable@nongnu.org
diff --git a/block.c b/block.c
index 1205ef8860..736432f67e 100644
--- a/block.c
+++ b/block.c
@@ -64,16 +64,16 @@ static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
     QLIST_HEAD_INITIALIZER(bdrv_drivers);
 
-static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
-                             const char *reference, QDict *options, int flags,
-                             BlockDriverState *parent,
-                             const BdrvChildRole *child_role, Error **errp);
+static BlockDriverState *bdrv_open_inherit(const char *filename,
+                                           const char *reference,
+                                           QDict *options, int flags,
+                                           BlockDriverState *parent,
+                                           const BdrvChildRole *child_role,
+                                           Error **errp);
 
 /* If non-zero, use only whitelisted block drivers */
 static int use_bdrv_whitelist;
 
-static void bdrv_close(BlockDriverState *bs);
-
 #ifdef _WIN32
 static int is_windows_drive_prefix(const char *filename)
 {
@@ -220,11 +220,6 @@ void bdrv_register(BlockDriver *bdrv)
     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
 }
 
-BlockDriverState *bdrv_new_root(void)
-{
-    return bdrv_new();
-}
-
 BlockDriverState *bdrv_new(void)
 {
     BlockDriverState *bs;
@@ -664,6 +659,18 @@ int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
     return 0;
 }
 
+static void bdrv_child_cb_drained_begin(BdrvChild *child)
+{
+    BlockDriverState *bs = child->opaque;
+    bdrv_drained_begin(bs);
+}
+
+static void bdrv_child_cb_drained_end(BdrvChild *child)
+{
+    BlockDriverState *bs = child->opaque;
+    bdrv_drained_end(bs);
+}
+
 /*
  * Returns the options and flags that a temporary snapshot should get, based on
  * the originally requested flags (the originally requested image will have
@@ -710,6 +717,8 @@ static void bdrv_inherited_options(int *child_flags, QDict *child_options,
 
 const BdrvChildRole child_file = {
     .inherit_options = bdrv_inherited_options,
+    .drained_begin   = bdrv_child_cb_drained_begin,
+    .drained_end     = bdrv_child_cb_drained_end,
 };
 
 /*
@@ -728,6 +737,8 @@ static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options,
 
 const BdrvChildRole child_format = {
     .inherit_options = bdrv_inherited_fmt_options,
+    .drained_begin   = bdrv_child_cb_drained_begin,
+    .drained_end     = bdrv_child_cb_drained_end,
 };
 
 /*
@@ -755,6 +766,8 @@ static void bdrv_backing_options(int *child_flags, QDict *child_options,
 
 static const BdrvChildRole child_backing = {
     .inherit_options = bdrv_backing_options,
+    .drained_begin   = bdrv_child_cb_drained_begin,
+    .drained_end     = bdrv_child_cb_drained_end,
 };
 
 static int bdrv_open_flags(BlockDriverState *bs, int flags)
@@ -1155,18 +1168,41 @@ static int bdrv_fill_options(QDict **options, const char *filename,
     return 0;
 }
 
+static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
+{
+    BlockDriverState *old_bs = child->bs;
+
+    if (old_bs) {
+        if (old_bs->quiesce_counter && child->role->drained_end) {
+            child->role->drained_end(child);
+        }
+        QLIST_REMOVE(child, next_parent);
+    }
+
+    child->bs = new_bs;
+
+    if (new_bs) {
+        QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
+        if (new_bs->quiesce_counter && child->role->drained_begin) {
+            child->role->drained_begin(child);
+        }
+    }
+}
+
 BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
                                   const char *child_name,
-                                  const BdrvChildRole *child_role)
+                                  const BdrvChildRole *child_role,
+                                  void *opaque)
 {
     BdrvChild *child = g_new(BdrvChild, 1);
     *child = (BdrvChild) {
-        .bs     = child_bs,
+        .bs     = NULL,
         .name   = g_strdup(child_name),
         .role   = child_role,
+        .opaque = opaque,
     };
 
-    QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent);
+    bdrv_replace_child(child, child_bs);
 
     return child;
 }
@@ -1176,7 +1212,8 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
                              const char *child_name,
                              const BdrvChildRole *child_role)
 {
-    BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role);
+    BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role,
+                                              parent_bs);
     QLIST_INSERT_HEAD(&parent_bs->children, child, next);
     return child;
 }
@@ -1187,7 +1224,9 @@ static void bdrv_detach_child(BdrvChild *child)
         QLIST_REMOVE(child, next);
         child->next.le_prev = NULL;
     }
-    QLIST_REMOVE(child, next_parent);
+
+    bdrv_replace_child(child, NULL);
+
     g_free(child->name);
     g_free(child);
 }
@@ -1341,14 +1380,13 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
         qdict_put(options, "driver", qstring_from_str(bs->backing_format));
     }
 
-    backing_hd = NULL;
-    ret = bdrv_open_inherit(&backing_hd,
-                            *backing_filename ? backing_filename : NULL,
-                            reference, options, 0, bs, &child_backing,
-                            errp);
-    if (ret < 0) {
+    backing_hd = bdrv_open_inherit(*backing_filename ? backing_filename : NULL,
+                                   reference, options, 0, bs, &child_backing,
+                                   errp);
+    if (!backing_hd) {
         bs->open_flags |= BDRV_O_NO_BACKING;
         error_prepend(errp, "Could not open backing file: ");
+        ret = -EINVAL;
         goto free_exit;
     }
 
@@ -1388,7 +1426,6 @@ BdrvChild *bdrv_open_child(const char *filename,
     BdrvChild *c = NULL;
     BlockDriverState *bs;
     QDict *image_options;
-    int ret;
     char *bdref_key_dot;
     const char *reference;
 
@@ -1408,10 +1445,9 @@ BdrvChild *bdrv_open_child(const char *filename,
         goto done;
     }
 
-    bs = NULL;
-    ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
-                            parent, child_role, errp);
-    if (ret < 0) {
+    bs = bdrv_open_inherit(filename, reference, image_options, 0,
+                           parent, child_role, errp);
+    if (!bs) {
         goto done;
     }
 
@@ -1422,15 +1458,16 @@ done:
     return c;
 }
 
-static int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags,
-                                     QDict *snapshot_options, Error **errp)
+static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
+                                                   int flags,
+                                                   QDict *snapshot_options,
+                                                   Error **errp)
 {
     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
     char *tmp_filename = g_malloc0(PATH_MAX + 1);
     int64_t total_size;
     QemuOpts *opts = NULL;
     BlockDriverState *bs_snapshot;
-    Error *local_err = NULL;
     int ret;
 
     /* if snapshot, we create a temporary backing file and open it
@@ -1439,7 +1476,6 @@ static int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags,
     /* Get the required size from the image */
     total_size = bdrv_getlength(bs);
     if (total_size < 0) {
-        ret = total_size;
         error_setg_errno(errp, -total_size, "Could not get image size");
         goto out;
     }
@@ -1470,22 +1506,26 @@ static int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags,
     qdict_put(snapshot_options, "driver",
               qstring_from_str("qcow2"));
 
-    bs_snapshot = bdrv_new();
-
-    ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
-                    flags, &local_err);
+    bs_snapshot = bdrv_open(NULL, NULL, snapshot_options, flags, errp);
     snapshot_options = NULL;
-    if (ret < 0) {
-        error_propagate(errp, local_err);
+    if (!bs_snapshot) {
+        ret = -EINVAL;
         goto out;
     }
 
+    /* bdrv_append() consumes a strong reference to bs_snapshot (i.e. it will
+     * call bdrv_unref() on it), so in order to be able to return one, we have
+     * to increase bs_snapshot's refcount here */
+    bdrv_ref(bs_snapshot);
     bdrv_append(bs_snapshot, bs);
 
+    g_free(tmp_filename);
+    return bs_snapshot;
+
 out:
     QDECREF(snapshot_options);
     g_free(tmp_filename);
-    return ret;
+    return NULL;
 }
 
 /*
@@ -1503,10 +1543,12 @@ out:
  * should be opened. If specified, neither options nor a filename may be given,
  * nor can an existing BDS be reused (that is, *pbs has to be NULL).
  */
-static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
-                             const char *reference, QDict *options, int flags,
-                             BlockDriverState *parent,
-                             const BdrvChildRole *child_role, Error **errp)
+static BlockDriverState *bdrv_open_inherit(const char *filename,
+                                           const char *reference,
+                                           QDict *options, int flags,
+                                           BlockDriverState *parent,
+                                           const BdrvChildRole *child_role,
+                                           Error **errp)
 {
     int ret;
     BdrvChild *file = NULL;
@@ -1518,7 +1560,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
     QDict *snapshot_options = NULL;
     int snapshot_flags = 0;
 
-    assert(pbs);
     assert(!child_role || !flags);
     assert(!child_role == !parent);
 
@@ -1526,33 +1567,22 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
         bool options_non_empty = options ? qdict_size(options) : false;
         QDECREF(options);
 
-        if (*pbs) {
-            error_setg(errp, "Cannot reuse an existing BDS when referencing "
-                       "another block device");
-            return -EINVAL;
-        }
-
         if (filename || options_non_empty) {
             error_setg(errp, "Cannot reference an existing block device with "
                        "additional options or a new filename");
-            return -EINVAL;
+            return NULL;
         }
 
         bs = bdrv_lookup_bs(reference, reference, errp);
         if (!bs) {
-            return -ENODEV;
+            return NULL;
         }
 
         bdrv_ref(bs);
-        *pbs = bs;
-        return 0;
+        return bs;
     }
 
-    if (*pbs) {
-        bs = *pbs;
-    } else {
-        bs = bdrv_new();
-    }
+    bs = bdrv_new();
 
     /* NULL means an empty set of options */
     if (options == NULL) {
@@ -1562,7 +1592,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
     /* json: syntax counts as explicit options, as if in the QDict */
     parse_json_protocol(options, &filename, &local_err);
     if (local_err) {
-        ret = -EINVAL;
         goto fail;
     }
 
@@ -1589,7 +1618,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
         drv = bdrv_find_format(drvname);
         if (!drv) {
             error_setg(errp, "Unknown driver: '%s'", drvname);
-            ret = -EINVAL;
             goto fail;
         }
     }
@@ -1619,7 +1647,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
         file = bdrv_open_child(filename, options, "file", bs,
                                &child_file, true, &local_err);
         if (local_err) {
-            ret = -EINVAL;
             goto fail;
         }
     }
@@ -1646,7 +1673,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
         qdict_put(options, "driver", qstring_from_str(drv->format_name));
     } else if (!drv) {
         error_setg(errp, "Must specify either driver or file");
-        ret = -EINVAL;
         goto fail;
     }
 
@@ -1689,7 +1715,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
                        drv->format_name, entry->key);
         }
 
-        ret = -EINVAL;
         goto close_and_fail;
     }
 
@@ -1700,25 +1725,30 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
                && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
         error_setg(errp,
                    "Guest must be stopped for opening of encrypted image");
-        ret = -EBUSY;
         goto close_and_fail;
     }
 
     QDECREF(options);
-    *pbs = bs;
 
     /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
      * temporary snapshot afterwards. */
     if (snapshot_flags) {
-        ret = bdrv_append_temp_snapshot(bs, snapshot_flags, snapshot_options,
-                                        &local_err);
+        BlockDriverState *snapshot_bs;
+        snapshot_bs = bdrv_append_temp_snapshot(bs, snapshot_flags,
+                                                snapshot_options, &local_err);
         snapshot_options = NULL;
         if (local_err) {
             goto close_and_fail;
         }
+        /* We are not going to return bs but the overlay on top of it
+         * (snapshot_bs); thus, we have to drop the strong reference to bs
+         * (which we obtained by calling bdrv_new()). bs will not be deleted,
+         * though, because the overlay still has a reference to it. */
+        bdrv_unref(bs);
+        bs = snapshot_bs;
     }
 
-    return 0;
+    return bs;
 
 fail:
     if (file != NULL) {
@@ -1729,36 +1759,26 @@ fail:
     QDECREF(bs->options);
     QDECREF(options);
     bs->options = NULL;
-    if (!*pbs) {
-        /* If *pbs is NULL, a new BDS has been created in this function and
-           needs to be freed now. Otherwise, it does not need to be closed,
-           since it has not really been opened yet. */
-        bdrv_unref(bs);
-    }
+    bdrv_unref(bs);
     if (local_err) {
         error_propagate(errp, local_err);
     }
-    return ret;
+    return NULL;
 
 close_and_fail:
-    /* See fail path, but now the BDS has to be always closed */
-    if (*pbs) {
-        bdrv_close(bs);
-    } else {
-        bdrv_unref(bs);
-    }
+    bdrv_unref(bs);
     QDECREF(snapshot_options);
     QDECREF(options);
     if (local_err) {
         error_propagate(errp, local_err);
     }
-    return ret;
+    return NULL;
 }
 
-int bdrv_open(BlockDriverState **pbs, const char *filename,
-              const char *reference, QDict *options, int flags, Error **errp)
+BlockDriverState *bdrv_open(const char *filename, const char *reference,
+                            QDict *options, int flags, Error **errp)
 {
-    return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
+    return bdrv_open_inherit(filename, reference, options, flags, NULL,
                              NULL, errp);
 }
 
@@ -2132,6 +2152,7 @@ static void bdrv_close(BlockDriverState *bs)
     BdrvAioNotifier *ban, *ban_next;
 
     assert(!bs->job);
+    assert(!bs->refcnt);
 
     bdrv_drained_begin(bs); /* complete I/O */
     bdrv_flush(bs);
@@ -2140,8 +2161,6 @@ static void bdrv_close(BlockDriverState *bs)
     bdrv_release_named_dirty_bitmaps(bs);
     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
 
-    bdrv_parent_cb_change_media(bs, false);
-
     if (bs->drv) {
         BdrvChild *child, *next;
 
@@ -2190,8 +2209,7 @@ static void bdrv_close(BlockDriverState *bs)
 
 void bdrv_close_all(void)
 {
-    BlockDriverState *bs;
-    AioContext *aio_context;
+    block_job_cancel_sync_all();
 
     /* Drop references from requests still in flight, such as canceled block
      * jobs whose AIO context has not been polled yet */
@@ -2200,25 +2218,7 @@ void bdrv_close_all(void)
     blk_remove_all_bs();
     blockdev_close_all_bdrv_states();
 
-    /* Cancel all block jobs */
-    while (!QTAILQ_EMPTY(&all_bdrv_states)) {
-        QTAILQ_FOREACH(bs, &all_bdrv_states, bs_list) {
-            aio_context = bdrv_get_aio_context(bs);
-
-            aio_context_acquire(aio_context);
-            if (bs->job) {
-                block_job_cancel_sync(bs->job);
-                aio_context_release(aio_context);
-                break;
-            }
-            aio_context_release(aio_context);
-        }
-
-        /* All the remaining BlockDriverStates are referenced directly or
-         * indirectly from block jobs, so there needs to be at least one BDS
-         * directly used by a block job */
-        assert(bs);
-    }
+    assert(QTAILQ_EMPTY(&all_bdrv_states));
 }
 
 static void change_parent_backing_link(BlockDriverState *from,
@@ -2228,10 +2228,8 @@ static void change_parent_backing_link(BlockDriverState *from,
 
     QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
         assert(c->role != &child_backing);
-        c->bs = to;
-        QLIST_REMOVE(c, next_parent);
-        QLIST_INSERT_HEAD(&to->parents, c, next_parent);
         bdrv_ref(to);
+        bdrv_replace_child(c, to);
         bdrv_unref(from);
     }
 }
@@ -3195,9 +3193,9 @@ void bdrv_invalidate_cache_all(Error **errp)
 {
     BlockDriverState *bs;
     Error *local_err = NULL;
-    BdrvNextIterator *it = NULL;
+    BdrvNextIterator it;
 
-    while ((it = bdrv_next(it, &bs)) != NULL) {
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         AioContext *aio_context = bdrv_get_aio_context(bs);
 
         aio_context_acquire(aio_context);
@@ -3239,11 +3237,11 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs,
 int bdrv_inactivate_all(void)
 {
     BlockDriverState *bs = NULL;
-    BdrvNextIterator *it = NULL;
+    BdrvNextIterator it;
     int ret = 0;
     int pass;
 
-    while ((it = bdrv_next(it, &bs)) != NULL) {
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         aio_context_acquire(bdrv_get_aio_context(bs));
     }
 
@@ -3252,8 +3250,7 @@ int bdrv_inactivate_all(void)
      * the second pass sets the BDRV_O_INACTIVE flag so that no further write
      * is allowed. */
     for (pass = 0; pass < 2; pass++) {
-        it = NULL;
-        while ((it = bdrv_next(it, &bs)) != NULL) {
+        for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
             ret = bdrv_inactivate_recurse(bs, pass);
             if (ret < 0) {
                 goto out;
@@ -3262,8 +3259,7 @@ int bdrv_inactivate_all(void)
     }
 
 out:
-    it = NULL;
-    while ((it = bdrv_next(it, &bs)) != NULL) {
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         aio_context_release(bdrv_get_aio_context(bs));
     }
 
@@ -3547,11 +3543,10 @@ void bdrv_img_create(const char *filename, const char *fmt,
                           qstring_from_str(backing_fmt));
             }
 
-            bs = NULL;
-            ret = bdrv_open(&bs, full_backing, NULL, backing_options,
-                            back_flags, &local_err);
+            bs = bdrv_open(full_backing, NULL, backing_options, back_flags,
+                           &local_err);
             g_free(full_backing);
-            if (ret < 0) {
+            if (!bs) {
                 goto out;
             }
             size = bdrv_getlength(bs);
@@ -3753,10 +3748,10 @@ bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
 {
     BlockDriverState *bs;
-    BdrvNextIterator *it = NULL;
+    BdrvNextIterator it;
 
     /* walk down the bs forest recursively */
-    while ((it = bdrv_next(it, &bs)) != NULL) {
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         bool perm;
 
         /* try to recurse in this top level bs */
diff --git a/block/backup.c b/block/backup.c
index fec45e8212..feeb9f8bf2 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -36,7 +36,7 @@ typedef struct CowRequest {
 
 typedef struct BackupBlockJob {
     BlockJob common;
-    BlockDriverState *target;
+    BlockBackend *target;
     /* bitmap for sync=incremental */
     BdrvDirtyBitmap *sync_bitmap;
     MirrorSyncMode sync_mode;
@@ -47,6 +47,7 @@ typedef struct BackupBlockJob {
     uint64_t sectors_read;
     unsigned long *done_bitmap;
     int64_t cluster_size;
+    NotifierWithReturn before_write;
     QLIST_HEAD(, CowRequest) inflight_reqs;
 } BackupBlockJob;
 
@@ -93,12 +94,12 @@ static void cow_request_end(CowRequest *req)
     qemu_co_queue_restart_all(&req->wait_queue);
 }
 
-static int coroutine_fn backup_do_cow(BlockDriverState *bs,
+static int coroutine_fn backup_do_cow(BackupBlockJob *job,
                                       int64_t sector_num, int nb_sectors,
                                       bool *error_is_read,
                                       bool is_write_notifier)
 {
-    BackupBlockJob *job = (BackupBlockJob *)bs->job;
+    BlockBackend *blk = job->common.blk;
     CowRequest cow_request;
     struct iovec iov;
     QEMUIOVector bounce_qiov;
@@ -131,20 +132,15 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
                 start * sectors_per_cluster);
 
         if (!bounce_buffer) {
-            bounce_buffer = qemu_blockalign(bs, job->cluster_size);
+            bounce_buffer = blk_blockalign(blk, job->cluster_size);
         }
         iov.iov_base = bounce_buffer;
         iov.iov_len = n * BDRV_SECTOR_SIZE;
         qemu_iovec_init_external(&bounce_qiov, &iov, 1);
 
-        if (is_write_notifier) {
-            ret = bdrv_co_readv_no_serialising(bs,
-                                           start * sectors_per_cluster,
-                                           n, &bounce_qiov);
-        } else {
-            ret = bdrv_co_readv(bs, start * sectors_per_cluster, n,
-                                &bounce_qiov);
-        }
+        ret = blk_co_preadv(blk, start * job->cluster_size,
+                            bounce_qiov.size, &bounce_qiov,
+                            is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
         if (ret < 0) {
             trace_backup_do_cow_read_fail(job, start, ret);
             if (error_is_read) {
@@ -154,13 +150,11 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
         }
 
         if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
-            ret = bdrv_co_write_zeroes(job->target,
-                                       start * sectors_per_cluster,
-                                       n, BDRV_REQ_MAY_UNMAP);
+            ret = blk_co_pwrite_zeroes(job->target, start * job->cluster_size,
+                                       bounce_qiov.size, BDRV_REQ_MAY_UNMAP);
         } else {
-            ret = bdrv_co_writev(job->target,
-                                 start * sectors_per_cluster, n,
-                                 &bounce_qiov);
+            ret = blk_co_pwritev(job->target, start * job->cluster_size,
+                                 bounce_qiov.size, &bounce_qiov, 0);
         }
         if (ret < 0) {
             trace_backup_do_cow_write_fail(job, start, ret);
@@ -197,14 +191,16 @@ static int coroutine_fn backup_before_write_notify(
         NotifierWithReturn *notifier,
         void *opaque)
 {
+    BackupBlockJob *job = container_of(notifier, BackupBlockJob, before_write);
     BdrvTrackedRequest *req = opaque;
     int64_t sector_num = req->offset >> BDRV_SECTOR_BITS;
     int nb_sectors = req->bytes >> BDRV_SECTOR_BITS;
 
+    assert(req->bs == blk_bs(job->common.blk));
     assert((req->offset & (BDRV_SECTOR_SIZE - 1)) == 0);
     assert((req->bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
 
-    return backup_do_cow(req->bs, sector_num, nb_sectors, NULL, true);
+    return backup_do_cow(job, sector_num, nb_sectors, NULL, true);
 }
 
 static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
@@ -221,7 +217,7 @@ static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
 static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
 {
     BdrvDirtyBitmap *bm;
-    BlockDriverState *bs = job->common.bs;
+    BlockDriverState *bs = blk_bs(job->common.blk);
 
     if (ret < 0 || block_job_is_cancelled(&job->common)) {
         /* Merge the successor back into the parent, delete nothing. */
@@ -279,7 +275,7 @@ static void backup_complete(BlockJob *job, void *opaque)
     BackupBlockJob *s = container_of(job, BackupBlockJob, common);
     BackupCompleteData *data = opaque;
 
-    bdrv_unref(s->target);
+    blk_unref(s->target);
 
     block_job_completed(job, data->ret);
     g_free(data);
@@ -321,7 +317,6 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
     int64_t end;
     int64_t last_cluster = -1;
     int64_t sectors_per_cluster = cluster_size_sectors(job);
-    BlockDriverState *bs = job->common.bs;
     HBitmapIter hbi;
 
     granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap);
@@ -343,7 +338,7 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
                 if (yield_and_check(job)) {
                     return ret;
                 }
-                ret = backup_do_cow(bs, cluster * sectors_per_cluster,
+                ret = backup_do_cow(job, cluster * sectors_per_cluster,
                                     sectors_per_cluster, &error_is_read,
                                     false);
                 if ((ret < 0) &&
@@ -376,11 +371,8 @@ static void coroutine_fn backup_run(void *opaque)
 {
     BackupBlockJob *job = opaque;
     BackupCompleteData *data;
-    BlockDriverState *bs = job->common.bs;
-    BlockDriverState *target = job->target;
-    NotifierWithReturn before_write = {
-        .notify = backup_before_write_notify,
-    };
+    BlockDriverState *bs = blk_bs(job->common.blk);
+    BlockBackend *target = job->target;
     int64_t start, end;
     int64_t sectors_per_cluster = cluster_size_sectors(job);
     int ret = 0;
@@ -393,7 +385,8 @@ static void coroutine_fn backup_run(void *opaque)
 
     job->done_bitmap = bitmap_new(end);
 
-    bdrv_add_before_write_notifier(bs, &before_write);
+    job->before_write.notify = backup_before_write_notify;
+    bdrv_add_before_write_notifier(bs, &job->before_write);
 
     if (job->sync_mode == MIRROR_SYNC_MODE_NONE) {
         while (!block_job_is_cancelled(&job->common)) {
@@ -445,7 +438,7 @@ static void coroutine_fn backup_run(void *opaque)
                 }
             }
             /* FULL sync mode we copy the whole drive. */
-            ret = backup_do_cow(bs, start * sectors_per_cluster,
+            ret = backup_do_cow(job, start * sectors_per_cluster,
                                 sectors_per_cluster, &error_is_read, false);
             if (ret < 0) {
                 /* Depending on error action, fail now or retry cluster */
@@ -461,14 +454,14 @@ static void coroutine_fn backup_run(void *opaque)
         }
     }
 
-    notifier_with_return_remove(&before_write);
+    notifier_with_return_remove(&job->before_write);
 
     /* wait until pending backup_do_cow() calls have completed */
     qemu_co_rwlock_wrlock(&job->flush_rwlock);
     qemu_co_rwlock_unlock(&job->flush_rwlock);
     g_free(job->done_bitmap);
 
-    bdrv_op_unblock_all(target, job->common.blocker);
+    bdrv_op_unblock_all(blk_bs(target), job->common.blocker);
 
     data = g_malloc(sizeof(*data));
     data->ret = ret;
@@ -485,6 +478,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
 {
     int64_t len;
     BlockDriverInfo bdi;
+    BackupBlockJob *job = NULL;
     int ret;
 
     assert(bs);
@@ -542,15 +536,16 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
         goto error;
     }
 
-    BackupBlockJob *job = block_job_create(&backup_job_driver, bs, speed,
-                                           cb, opaque, errp);
+    job = block_job_create(&backup_job_driver, bs, speed, cb, opaque, errp);
     if (!job) {
         goto error;
     }
 
+    job->target = blk_new();
+    blk_insert_bs(job->target, target);
+
     job->on_source_error = on_source_error;
     job->on_target_error = on_target_error;
-    job->target = target;
     job->sync_mode = sync_mode;
     job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_INCREMENTAL ?
                        sync_bitmap : NULL;
@@ -558,7 +553,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
     /* If there is no backing file on the target, we cannot rely on COW if our
      * backup cluster size is smaller than the target cluster size. Even for
      * targets with a backing file, try to avoid COW if possible. */
-    ret = bdrv_get_info(job->target, &bdi);
+    ret = bdrv_get_info(target, &bdi);
     if (ret < 0 && !target->backing) {
         error_setg_errno(errp, -ret,
             "Couldn't determine the cluster size of the target image, "
@@ -584,4 +579,8 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
     if (sync_bitmap) {
         bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL);
     }
+    if (job) {
+        blk_unref(job->target);
+        block_job_unref(&job->common);
+    }
 }
diff --git a/block/block-backend.c b/block/block-backend.c
index 6928d61de4..34500e6080 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -19,6 +19,7 @@
 #include "sysemu/sysemu.h"
 #include "qapi-event.h"
 #include "qemu/id.h"
+#include "trace.h"
 
 /* Number of coroutines to reserve per attached device model */
 #define COROUTINE_POOL_RESERVATION 64
@@ -119,12 +120,14 @@ static const BdrvChildRole child_root = {
  * Store an error through @errp on failure, unless it's null.
  * Return the new BlockBackend on success, null on failure.
  */
-BlockBackend *blk_new(Error **errp)
+BlockBackend *blk_new(void)
 {
     BlockBackend *blk;
 
     blk = g_new0(BlockBackend, 1);
     blk->refcnt = 1;
+    blk_set_enable_write_cache(blk, true);
+
     qemu_co_queue_init(&blk->public.throttled_reqs[0]);
     qemu_co_queue_init(&blk->public.throttled_reqs[1]);
 
@@ -136,27 +139,7 @@ BlockBackend *blk_new(Error **errp)
 }
 
 /*
- * Create a new BlockBackend with a new BlockDriverState attached.
- * Otherwise just like blk_new(), which see.
- */
-BlockBackend *blk_new_with_bs(Error **errp)
-{
-    BlockBackend *blk;
-    BlockDriverState *bs;
-
-    blk = blk_new(errp);
-    if (!blk) {
-        return NULL;
-    }
-
-    bs = bdrv_new_root();
-    blk->root = bdrv_root_attach_child(bs, "root", &child_root);
-    blk->root->opaque = blk;
-    return blk;
-}
-
-/*
- * Calls blk_new_with_bs() and then calls bdrv_open() on the BlockDriverState.
+ * Creates a new BlockBackend, opens a new BlockDriverState, and connects both.
  *
  * Just as with bdrv_open(), after having called this function the reference to
  * @options belongs to the block layer (even on failure).
@@ -171,21 +154,16 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
                            QDict *options, int flags, Error **errp)
 {
     BlockBackend *blk;
-    int ret;
-
-    blk = blk_new_with_bs(errp);
-    if (!blk) {
-        QDECREF(options);
-        return NULL;
-    }
+    BlockDriverState *bs;
 
-    ret = bdrv_open(&blk->root->bs, filename, reference, options, flags, errp);
-    if (ret < 0) {
+    blk = blk_new();
+    bs = bdrv_open(filename, reference, options, flags, errp);
+    if (!bs) {
         blk_unref(blk);
         return NULL;
     }
 
-    blk_set_enable_write_cache(blk, true);
+    blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
 
     return blk;
 }
@@ -286,25 +264,11 @@ BlockBackend *blk_next(BlockBackend *blk)
                : QTAILQ_FIRST(&monitor_block_backends);
 }
 
-struct BdrvNextIterator {
-    enum {
-        BDRV_NEXT_BACKEND_ROOTS,
-        BDRV_NEXT_MONITOR_OWNED,
-    } phase;
-    BlockBackend *blk;
-    BlockDriverState *bs;
-};
-
 /* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
  * the monitor or attached to a BlockBackend */
-BdrvNextIterator *bdrv_next(BdrvNextIterator *it, BlockDriverState **bs)
+BlockDriverState *bdrv_next(BdrvNextIterator *it)
 {
-    if (!it) {
-        it = g_new(BdrvNextIterator, 1);
-        *it = (BdrvNextIterator) {
-            .phase = BDRV_NEXT_BACKEND_ROOTS,
-        };
-    }
+    BlockDriverState *bs;
 
     /* First, return all root nodes of BlockBackends. In order to avoid
      * returning a BDS twice when multiple BBs refer to it, we only return it
@@ -312,11 +276,11 @@ BdrvNextIterator *bdrv_next(BdrvNextIterator *it, BlockDriverState **bs)
     if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
         do {
             it->blk = blk_all_next(it->blk);
-            *bs = it->blk ? blk_bs(it->blk) : NULL;
-        } while (it->blk && (*bs == NULL || bdrv_first_blk(*bs) != it->blk));
+            bs = it->blk ? blk_bs(it->blk) : NULL;
+        } while (it->blk && (bs == NULL || bdrv_first_blk(bs) != it->blk));
 
-        if (*bs) {
-            return it;
+        if (bs) {
+            return bs;
         }
         it->phase = BDRV_NEXT_MONITOR_OWNED;
     }
@@ -326,10 +290,19 @@ BdrvNextIterator *bdrv_next(BdrvNextIterator *it, BlockDriverState **bs)
      * by the above block already */
     do {
         it->bs = bdrv_next_monitor_owned(it->bs);
-        *bs = it->bs;
-    } while (*bs && bdrv_has_blk(*bs));
+        bs = it->bs;
+    } while (bs && bdrv_has_blk(bs));
 
-    return *bs ? it : NULL;
+    return bs;
+}
+
+BlockDriverState *bdrv_first(BdrvNextIterator *it)
+{
+    *it = (BdrvNextIterator) {
+        .phase = BDRV_NEXT_BACKEND_ROOTS,
+    };
+
+    return bdrv_next(it);
 }
 
 /*
@@ -509,8 +482,7 @@ void blk_remove_bs(BlockBackend *blk)
 void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
 {
     bdrv_ref(bs);
-    blk->root = bdrv_root_attach_child(bs, "root", &child_root);
-    blk->root->opaque = blk;
+    blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
 
     notifier_list_notify(&blk->insert_bs_notifiers, blk);
     if (blk->public.throttle_state) {
@@ -770,11 +742,15 @@ static int blk_check_request(BlockBackend *blk, int64_t sector_num,
                                   nb_sectors * BDRV_SECTOR_SIZE);
 }
 
-static int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
-                                      unsigned int bytes, QEMUIOVector *qiov,
-                                      BdrvRequestFlags flags)
+int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
+                               unsigned int bytes, QEMUIOVector *qiov,
+                               BdrvRequestFlags flags)
 {
-    int ret = blk_check_byte_request(blk, offset, bytes);
+    int ret;
+
+    trace_blk_co_preadv(blk, blk_bs(blk), offset, bytes, flags);
+
+    ret = blk_check_byte_request(blk, offset, bytes);
     if (ret < 0) {
         return ret;
     }
@@ -787,12 +763,14 @@ static int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
     return bdrv_co_preadv(blk_bs(blk), offset, bytes, qiov, flags);
 }
 
-static int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
-                                      unsigned int bytes, QEMUIOVector *qiov,
-                                      BdrvRequestFlags flags)
+int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
+                                unsigned int bytes, QEMUIOVector *qiov,
+                                BdrvRequestFlags flags)
 {
     int ret;
 
+    trace_blk_co_pwritev(blk, blk_bs(blk), offset, bytes, flags);
+
     ret = blk_check_byte_request(blk, offset, bytes);
     if (ret < 0) {
         return ret;
@@ -885,8 +863,8 @@ int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
     return ret;
 }
 
-int blk_write_zeroes(BlockBackend *blk, int64_t offset,
-                     int count, BdrvRequestFlags flags)
+int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+                      int count, BdrvRequestFlags flags)
 {
     return blk_prw(blk, offset, NULL, count, blk_write_entry,
                    flags | BDRV_REQ_ZERO_WRITE);
@@ -1001,9 +979,9 @@ static void blk_aio_write_entry(void *opaque)
     blk_aio_complete(acb);
 }
 
-BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t offset,
-                                 int count, BdrvRequestFlags flags,
-                                 BlockCompletionFunc *cb, void *opaque)
+BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+                                  int count, BdrvRequestFlags flags,
+                                  BlockCompletionFunc *cb, void *opaque)
 {
     return blk_aio_prwv(blk, offset, count, NULL, blk_aio_write_entry,
                         flags | BDRV_REQ_ZERO_WRITE, cb, opaque);
@@ -1492,8 +1470,8 @@ void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
     return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque);
 }
 
-int coroutine_fn blk_co_write_zeroes(BlockBackend *blk, int64_t offset,
-                                     int count, BdrvRequestFlags flags)
+int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+                                      int count, BdrvRequestFlags flags)
 {
     return blk_co_pwritev(blk, offset, count, NULL,
                           flags | BDRV_REQ_ZERO_WRITE);
@@ -1704,6 +1682,9 @@ static void blk_root_drained_begin(BdrvChild *child)
 {
     BlockBackend *blk = child->opaque;
 
+    /* Note that blk->root may not be accessible here yet if we are just
+     * attaching to a BlockDriverState that is drained. Use child instead. */
+
     if (blk->public.io_limits_disabled++ == 0) {
         throttle_group_restart_blk(blk);
     }
diff --git a/block/commit.c b/block/commit.c
index f308c8c6f0..8a00e1146c 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -36,28 +36,36 @@ typedef struct CommitBlockJob {
     BlockJob common;
     RateLimit limit;
     BlockDriverState *active;
-    BlockDriverState *top;
-    BlockDriverState *base;
+    BlockBackend *top;
+    BlockBackend *base;
     BlockdevOnError on_error;
     int base_flags;
     int orig_overlay_flags;
     char *backing_file_str;
 } CommitBlockJob;
 
-static int coroutine_fn commit_populate(BlockDriverState *bs,
-                                        BlockDriverState *base,
+static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base,
                                         int64_t sector_num, int nb_sectors,
                                         void *buf)
 {
     int ret = 0;
+    QEMUIOVector qiov;
+    struct iovec iov = {
+        .iov_base = buf,
+        .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
+    };
 
-    ret = bdrv_read(bs, sector_num, buf, nb_sectors);
-    if (ret) {
+    qemu_iovec_init_external(&qiov, &iov, 1);
+
+    ret = blk_co_preadv(bs, sector_num * BDRV_SECTOR_SIZE,
+                        qiov.size, &qiov, 0);
+    if (ret < 0) {
         return ret;
     }
 
-    ret = bdrv_write(base, sector_num, buf, nb_sectors);
-    if (ret) {
+    ret = blk_co_pwritev(base, sector_num * BDRV_SECTOR_SIZE,
+                         qiov.size, &qiov, 0);
+    if (ret < 0) {
         return ret;
     }
 
@@ -73,8 +81,8 @@ static void commit_complete(BlockJob *job, void *opaque)
     CommitBlockJob *s = container_of(job, CommitBlockJob, common);
     CommitCompleteData *data = opaque;
     BlockDriverState *active = s->active;
-    BlockDriverState *top = s->top;
-    BlockDriverState *base = s->base;
+    BlockDriverState *top = blk_bs(s->top);
+    BlockDriverState *base = blk_bs(s->base);
     BlockDriverState *overlay_bs;
     int ret = data->ret;
 
@@ -94,6 +102,8 @@ static void commit_complete(BlockJob *job, void *opaque)
         bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
     }
     g_free(s->backing_file_str);
+    blk_unref(s->top);
+    blk_unref(s->base);
     block_job_completed(&s->common, ret);
     g_free(data);
 }
@@ -102,8 +112,6 @@ static void coroutine_fn commit_run(void *opaque)
 {
     CommitBlockJob *s = opaque;
     CommitCompleteData *data;
-    BlockDriverState *top = s->top;
-    BlockDriverState *base = s->base;
     int64_t sector_num, end;
     int ret = 0;
     int n = 0;
@@ -111,27 +119,27 @@ static void coroutine_fn commit_run(void *opaque)
     int bytes_written = 0;
     int64_t base_len;
 
-    ret = s->common.len = bdrv_getlength(top);
+    ret = s->common.len = blk_getlength(s->top);
 
 
     if (s->common.len < 0) {
         goto out;
     }
 
-    ret = base_len = bdrv_getlength(base);
+    ret = base_len = blk_getlength(s->base);
     if (base_len < 0) {
         goto out;
     }
 
     if (base_len < s->common.len) {
-        ret = bdrv_truncate(base, s->common.len);
+        ret = blk_truncate(s->base, s->common.len);
         if (ret) {
             goto out;
         }
     }
 
     end = s->common.len >> BDRV_SECTOR_BITS;
-    buf = qemu_blockalign(top, COMMIT_BUFFER_SIZE);
+    buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE);
 
     for (sector_num = 0; sector_num < end; sector_num += n) {
         uint64_t delay_ns = 0;
@@ -146,7 +154,8 @@ wait:
             break;
         }
         /* Copy if allocated above the base */
-        ret = bdrv_is_allocated_above(top, base, sector_num,
+        ret = bdrv_is_allocated_above(blk_bs(s->top), blk_bs(s->base),
+                                      sector_num,
                                       COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE,
                                       &n);
         copy = (ret == 1);
@@ -158,7 +167,7 @@ wait:
                     goto wait;
                 }
             }
-            ret = commit_populate(top, base, sector_num, n, buf);
+            ret = commit_populate(s->top, s->base, sector_num, n, buf);
             bytes_written += n * BDRV_SECTOR_SIZE;
         }
         if (ret < 0) {
@@ -253,8 +262,12 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
         return;
     }
 
-    s->base   = base;
-    s->top    = top;
+    s->base = blk_new();
+    blk_insert_bs(s->base, base);
+
+    s->top = blk_new();
+    blk_insert_bs(s->top, top);
+
     s->active = bs;
 
     s->base_flags          = orig_base_flags;
diff --git a/block/io.c b/block/io.c
index 60a6bd8bdb..2d832aa532 100644
--- a/block/io.c
+++ b/block/io.c
@@ -225,6 +225,34 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
     assert(data.done);
 }
 
+void bdrv_drained_begin(BlockDriverState *bs)
+{
+    if (!bs->quiesce_counter++) {
+        aio_disable_external(bdrv_get_aio_context(bs));
+        bdrv_parent_drained_begin(bs);
+    }
+
+    bdrv_io_unplugged_begin(bs);
+    bdrv_drain_recurse(bs);
+    if (qemu_in_coroutine()) {
+        bdrv_co_yield_to_drain(bs);
+    } else {
+        bdrv_drain_poll(bs);
+    }
+    bdrv_io_unplugged_end(bs);
+}
+
+void bdrv_drained_end(BlockDriverState *bs)
+{
+    assert(bs->quiesce_counter > 0);
+    if (--bs->quiesce_counter > 0) {
+        return;
+    }
+
+    bdrv_parent_drained_end(bs);
+    aio_enable_external(bdrv_get_aio_context(bs));
+}
+
 /*
  * Wait for pending requests to complete on a single BlockDriverState subtree,
  * and suspend block driver's internal I/O until next request arrives.
@@ -238,26 +266,15 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
  */
 void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
 {
-    bdrv_parent_drained_begin(bs);
-    bdrv_io_unplugged_begin(bs);
-    bdrv_drain_recurse(bs);
-    bdrv_co_yield_to_drain(bs);
-    bdrv_io_unplugged_end(bs);
-    bdrv_parent_drained_end(bs);
+    assert(qemu_in_coroutine());
+    bdrv_drained_begin(bs);
+    bdrv_drained_end(bs);
 }
 
 void bdrv_drain(BlockDriverState *bs)
 {
-    bdrv_parent_drained_begin(bs);
-    bdrv_io_unplugged_begin(bs);
-    bdrv_drain_recurse(bs);
-    if (qemu_in_coroutine()) {
-        bdrv_co_yield_to_drain(bs);
-    } else {
-        bdrv_drain_poll(bs);
-    }
-    bdrv_io_unplugged_end(bs);
-    bdrv_parent_drained_end(bs);
+    bdrv_drained_begin(bs);
+    bdrv_drained_end(bs);
 }
 
 /*
@@ -271,10 +288,10 @@ void bdrv_drain_all(void)
     /* Always run first iteration so any pending completion BHs run */
     bool busy = true;
     BlockDriverState *bs;
-    BdrvNextIterator *it = NULL;
+    BdrvNextIterator it;
     GSList *aio_ctxs = NULL, *ctx;
 
-    while ((it = bdrv_next(it, &bs))) {
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         AioContext *aio_context = bdrv_get_aio_context(bs);
 
         aio_context_acquire(aio_context);
@@ -302,10 +319,9 @@ void bdrv_drain_all(void)
 
         for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) {
             AioContext *aio_context = ctx->data;
-            it = NULL;
 
             aio_context_acquire(aio_context);
-            while ((it = bdrv_next(it, &bs))) {
+            for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
                 if (aio_context == bdrv_get_aio_context(bs)) {
                     if (bdrv_requests_pending(bs)) {
                         busy = true;
@@ -318,8 +334,7 @@ void bdrv_drain_all(void)
         }
     }
 
-    it = NULL;
-    while ((it = bdrv_next(it, &bs))) {
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         AioContext *aio_context = bdrv_get_aio_context(bs);
 
         aio_context_acquire(aio_context);
@@ -1093,24 +1108,6 @@ int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
     return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
 }
 
-int coroutine_fn bdrv_co_readv_no_serialising(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
-{
-    trace_bdrv_co_readv_no_serialising(bs, sector_num, nb_sectors);
-
-    return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
-                            BDRV_REQ_NO_SERIALISING);
-}
-
-int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
-{
-    trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
-
-    return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
-                            BDRV_REQ_COPY_ON_READ);
-}
-
 #define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768
 
 static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
@@ -2543,23 +2540,3 @@ void bdrv_io_unplugged_end(BlockDriverState *bs)
         }
     }
 }
-
-void bdrv_drained_begin(BlockDriverState *bs)
-{
-    if (!bs->quiesce_counter++) {
-        aio_disable_external(bdrv_get_aio_context(bs));
-    }
-    bdrv_parent_drained_begin(bs);
-    bdrv_drain(bs);
-}
-
-void bdrv_drained_end(BlockDriverState *bs)
-{
-    bdrv_parent_drained_end(bs);
-
-    assert(bs->quiesce_counter > 0);
-    if (--bs->quiesce_counter > 0) {
-        return;
-    }
-    aio_enable_external(bdrv_get_aio_context(bs));
-}
diff --git a/block/mirror.c b/block/mirror.c
index b9986d8218..80fd3c7469 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -20,7 +20,6 @@
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
 #include "qemu/bitmap.h"
-#include "qemu/error-report.h"
 
 #define SLICE_TIME    100000000ULL /* ns */
 #define MAX_IN_FLIGHT 16
@@ -36,7 +35,7 @@ typedef struct MirrorBuffer {
 typedef struct MirrorBlockJob {
     BlockJob common;
     RateLimit limit;
-    BlockDriverState *target;
+    BlockBackend *target;
     BlockDriverState *base;
     /* The name of the graph node to replace */
     char *replaces;
@@ -157,7 +156,8 @@ static void mirror_read_complete(void *opaque, int ret)
         mirror_iteration_done(op, ret);
         return;
     }
-    bdrv_aio_writev(s->target, op->sector_num, &op->qiov, op->nb_sectors,
+    blk_aio_pwritev(s->target, op->sector_num * BDRV_SECTOR_SIZE, &op->qiov,
+                    op->nb_sectors * BDRV_SECTOR_SIZE,
                     mirror_write_complete, op);
 }
 
@@ -186,7 +186,7 @@ static int mirror_cow_align(MirrorBlockJob *s,
     need_cow |= !test_bit((*sector_num + *nb_sectors - 1) / chunk_sectors,
                           s->cow_bitmap);
     if (need_cow) {
-        bdrv_round_to_clusters(s->target, *sector_num, *nb_sectors,
+        bdrv_round_to_clusters(blk_bs(s->target), *sector_num, *nb_sectors,
                                &align_sector_num, &align_nb_sectors);
     }
 
@@ -224,7 +224,7 @@ static inline void mirror_wait_for_io(MirrorBlockJob *s)
 static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
                           int nb_sectors)
 {
-    BlockDriverState *source = s->common.bs;
+    BlockBackend *source = s->common.blk;
     int sectors_per_chunk, nb_chunks;
     int ret = nb_sectors;
     MirrorOp *op;
@@ -274,7 +274,8 @@ static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
     s->sectors_in_flight += nb_sectors;
     trace_mirror_one_iteration(s, sector_num, nb_sectors);
 
-    bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
+    blk_aio_preadv(source, sector_num * BDRV_SECTOR_SIZE, &op->qiov,
+                   nb_sectors * BDRV_SECTOR_SIZE,
                    mirror_read_complete, op);
     return ret;
 }
@@ -296,10 +297,11 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s,
     s->in_flight++;
     s->sectors_in_flight += nb_sectors;
     if (is_discard) {
-        bdrv_aio_discard(s->target, sector_num, op->nb_sectors,
-                         mirror_write_complete, op);
+        blk_aio_discard(s->target, sector_num, op->nb_sectors,
+                        mirror_write_complete, op);
     } else {
-        bdrv_aio_write_zeroes(s->target, sector_num, op->nb_sectors,
+        blk_aio_pwrite_zeroes(s->target, sector_num * BDRV_SECTOR_SIZE,
+                              op->nb_sectors * BDRV_SECTOR_SIZE,
                               s->unmap ? BDRV_REQ_MAY_UNMAP : 0,
                               mirror_write_complete, op);
     }
@@ -307,7 +309,7 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s,
 
 static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
 {
-    BlockDriverState *source = s->common.bs;
+    BlockDriverState *source = blk_bs(s->common.blk);
     int64_t sector_num, first_chunk;
     uint64_t delay_ns = 0;
     /* At least the first dirty chunk is mirrored in one iteration. */
@@ -384,7 +386,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
         } else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) {
             int64_t target_sector_num;
             int target_nb_sectors;
-            bdrv_round_to_clusters(s->target, sector_num, io_sectors,
+            bdrv_round_to_clusters(blk_bs(s->target), sector_num, io_sectors,
                                    &target_sector_num, &target_nb_sectors);
             if (target_sector_num == sector_num &&
                 target_nb_sectors == io_sectors) {
@@ -449,7 +451,8 @@ static void mirror_exit(BlockJob *job, void *opaque)
     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
     MirrorExitData *data = opaque;
     AioContext *replace_aio_context = NULL;
-    BlockDriverState *src = s->common.bs;
+    BlockDriverState *src = blk_bs(s->common.blk);
+    BlockDriverState *target_bs = blk_bs(s->target);
 
     /* Make sure that the source BDS doesn't go away before we called
      * block_job_completed(). */
@@ -461,26 +464,25 @@ static void mirror_exit(BlockJob *job, void *opaque)
     }
 
     if (s->should_complete && data->ret == 0) {
-        BlockDriverState *to_replace = s->common.bs;
+        BlockDriverState *to_replace = src;
         if (s->to_replace) {
             to_replace = s->to_replace;
         }
 
-        /* This was checked in mirror_start_job(), but meanwhile one of the
-         * nodes could have been newly attached to a BlockBackend. */
-        if (bdrv_has_blk(to_replace) && bdrv_has_blk(s->target)) {
-            error_report("block job: Can't create node with two BlockBackends");
-            data->ret = -EINVAL;
-            goto out;
+        if (bdrv_get_flags(target_bs) != bdrv_get_flags(to_replace)) {
+            bdrv_reopen(target_bs, bdrv_get_flags(to_replace), NULL);
         }
 
-        if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
-            bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
-        }
-        bdrv_replace_in_backing_chain(to_replace, s->target);
-    }
+        /* The mirror job has no requests in flight any more, but we need to
+         * drain potential other users of the BDS before changing the graph. */
+        bdrv_drained_begin(target_bs);
+        bdrv_replace_in_backing_chain(to_replace, target_bs);
+        bdrv_drained_end(target_bs);
 
-out:
+        /* We just changed the BDS the job BB refers to */
+        blk_remove_bs(job->blk);
+        blk_insert_bs(job->blk, src);
+    }
     if (s->to_replace) {
         bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
         error_free(s->replace_blocker);
@@ -490,8 +492,8 @@ out:
         aio_context_release(replace_aio_context);
     }
     g_free(s->replaces);
-    bdrv_op_unblock_all(s->target, s->common.blocker);
-    bdrv_unref(s->target);
+    bdrv_op_unblock_all(target_bs, s->common.blocker);
+    blk_unref(s->target);
     block_job_completed(&s->common, data->ret);
     g_free(data);
     bdrv_drained_end(src);
@@ -505,7 +507,8 @@ static void coroutine_fn mirror_run(void *opaque)
 {
     MirrorBlockJob *s = opaque;
     MirrorExitData *data;
-    BlockDriverState *bs = s->common.bs;
+    BlockDriverState *bs = blk_bs(s->common.blk);
+    BlockDriverState *target_bs = blk_bs(s->target);
     int64_t sector_num, end, length;
     uint64_t last_pause_ns;
     BlockDriverInfo bdi;
@@ -541,18 +544,18 @@ static void coroutine_fn mirror_run(void *opaque)
      * the destination do COW.  Instead, we copy sectors around the
      * dirty data if needed.  We need a bitmap to do that.
      */
-    bdrv_get_backing_filename(s->target, backing_filename,
+    bdrv_get_backing_filename(target_bs, backing_filename,
                               sizeof(backing_filename));
-    if (!bdrv_get_info(s->target, &bdi) && bdi.cluster_size) {
+    if (!bdrv_get_info(target_bs, &bdi) && bdi.cluster_size) {
         target_cluster_size = bdi.cluster_size;
     }
-    if (backing_filename[0] && !s->target->backing
+    if (backing_filename[0] && !target_bs->backing
         && s->granularity < target_cluster_size) {
         s->buf_size = MAX(s->buf_size, target_cluster_size);
         s->cow_bitmap = bitmap_new(length);
     }
     s->target_cluster_sectors = target_cluster_size >> BDRV_SECTOR_BITS;
-    s->max_iov = MIN(s->common.bs->bl.max_iov, s->target->bl.max_iov);
+    s->max_iov = MIN(bs->bl.max_iov, target_bs->bl.max_iov);
 
     end = s->bdev_length / BDRV_SECTOR_SIZE;
     s->buf = qemu_try_blockalign(bs, s->buf_size);
@@ -567,7 +570,7 @@ static void coroutine_fn mirror_run(void *opaque)
     if (!s->is_none_mode) {
         /* First part, loop on the sectors and initialize the dirty bitmap.  */
         BlockDriverState *base = s->base;
-        bool mark_all_dirty = s->base == NULL && !bdrv_has_zero_init(s->target);
+        bool mark_all_dirty = s->base == NULL && !bdrv_has_zero_init(target_bs);
 
         for (sector_num = 0; sector_num < end; ) {
             /* Just to make sure we are not exceeding int limit. */
@@ -637,7 +640,7 @@ static void coroutine_fn mirror_run(void *opaque)
         should_complete = false;
         if (s->in_flight == 0 && cnt == 0) {
             trace_mirror_before_flush(s);
-            ret = bdrv_flush(s->target);
+            ret = blk_flush(s->target);
             if (ret < 0) {
                 if (mirror_error_action(s, false, -ret) ==
                     BLOCK_ERROR_ACTION_REPORT) {
@@ -715,7 +718,7 @@ immediate_exit:
     data->ret = ret;
     /* Before we switch to target in mirror_exit, make sure data doesn't
      * change. */
-    bdrv_drained_begin(s->common.bs);
+    bdrv_drained_begin(bs);
     if (qemu_get_aio_context() == bdrv_get_aio_context(bs)) {
         /* FIXME: virtio host notifiers run on iohandler_ctx, therefore the
          * above bdrv_drained_end isn't enough to quiesce it. This is ugly, we
@@ -742,7 +745,8 @@ static void mirror_complete(BlockJob *job, Error **errp)
     Error *local_err = NULL;
     int ret;
 
-    ret = bdrv_open_backing_file(s->target, NULL, "backing", &local_err);
+    ret = bdrv_open_backing_file(blk_bs(s->target), NULL, "backing",
+                                 &local_err);
     if (ret < 0) {
         error_propagate(errp, local_err);
         return;
@@ -804,7 +808,6 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
                              bool is_none_mode, BlockDriverState *base)
 {
     MirrorBlockJob *s;
-    BlockDriverState *replaced_bs;
 
     if (granularity == 0) {
         granularity = bdrv_get_default_bitmap_granularity(target);
@@ -821,30 +824,17 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
         buf_size = DEFAULT_MIRROR_BUF_SIZE;
     }
 
-    /* We can't support this case as long as the block layer can't handle
-     * multiple BlockBackends per BlockDriverState. */
-    if (replaces) {
-        replaced_bs = bdrv_lookup_bs(replaces, replaces, errp);
-        if (replaced_bs == NULL) {
-            return;
-        }
-    } else {
-        replaced_bs = bs;
-    }
-    if (bdrv_has_blk(replaced_bs) && bdrv_has_blk(target)) {
-        error_setg(errp, "Can't create node with two BlockBackends");
-        return;
-    }
-
     s = block_job_create(driver, bs, speed, cb, opaque, errp);
     if (!s) {
         return;
     }
 
+    s->target = blk_new();
+    blk_insert_bs(s->target, target);
+
     s->replaces = g_strdup(replaces);
     s->on_source_error = on_source_error;
     s->on_target_error = on_target_error;
-    s->target = target;
     s->is_none_mode = is_none_mode;
     s->base = base;
     s->granularity = granularity;
@@ -854,11 +844,12 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
     s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
     if (!s->dirty_bitmap) {
         g_free(s->replaces);
+        blk_unref(s->target);
         block_job_unref(&s->common);
         return;
     }
 
-    bdrv_op_block_all(s->target, s->common.blocker);
+    bdrv_op_block_all(target, s->common.blocker);
 
     s->common.co = qemu_coroutine_create(mirror_run);
     trace_mirror_start(bs, s, s->common.co, opaque);
@@ -931,7 +922,6 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
         }
     }
 
-    bdrv_ref(base);
     mirror_start_job(bs, base, NULL, speed, 0, 0,
                      on_error, on_error, false, cb, opaque, &local_err,
                      &commit_active_job_driver, false, base);
diff --git a/block/parallels.c b/block/parallels.c
index 88cfacebe3..99fc0f77ef 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -517,8 +517,8 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
     if (ret < 0) {
         goto exit;
     }
-    ret = blk_write_zeroes(file, BDRV_SECTOR_SIZE,
-                           (bat_sectors - 1) << BDRV_SECTOR_BITS, 0);
+    ret = blk_pwrite_zeroes(file, BDRV_SECTOR_SIZE,
+                            (bat_sectors - 1) << BDRV_SECTOR_BITS, 0);
     if (ret < 0) {
         goto exit;
     }
diff --git a/block/snapshot.c b/block/snapshot.c
index 3917ec5c91..6e6e34fcf4 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -374,9 +374,9 @@ bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs)
 {
     bool ok = true;
     BlockDriverState *bs;
-    BdrvNextIterator *it = NULL;
+    BdrvNextIterator it;
 
-    while (ok && (it = bdrv_next(it, &bs))) {
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         AioContext *ctx = bdrv_get_aio_context(bs);
 
         aio_context_acquire(ctx);
@@ -384,8 +384,12 @@ bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs)
             ok = bdrv_can_snapshot(bs);
         }
         aio_context_release(ctx);
+        if (!ok) {
+            goto fail;
+        }
     }
 
+fail:
     *first_bad_bs = bs;
     return ok;
 }
@@ -395,20 +399,27 @@ int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs,
 {
     int ret = 0;
     BlockDriverState *bs;
-    BdrvNextIterator *it = NULL;
+    BdrvNextIterator it;
     QEMUSnapshotInfo sn1, *snapshot = &sn1;
 
-    while (ret == 0 && (it = bdrv_next(it, &bs))) {
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         AioContext *ctx = bdrv_get_aio_context(bs);
 
         aio_context_acquire(ctx);
         if (bdrv_can_snapshot(bs) &&
                 bdrv_snapshot_find(bs, snapshot, name) >= 0) {
             ret = bdrv_snapshot_delete_by_id_or_name(bs, name, err);
+            if (ret < 0) {
+                goto fail;
+            }
         }
         aio_context_release(ctx);
+        if (ret < 0) {
+            goto fail;
+        }
     }
 
+fail:
     *first_bad_bs = bs;
     return ret;
 }
@@ -418,9 +429,9 @@ int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs)
 {
     int err = 0;
     BlockDriverState *bs;
-    BdrvNextIterator *it = NULL;
+    BdrvNextIterator it;
 
-    while (err == 0 && (it = bdrv_next(it, &bs))) {
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         AioContext *ctx = bdrv_get_aio_context(bs);
 
         aio_context_acquire(ctx);
@@ -428,8 +439,12 @@ int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs)
             err = bdrv_snapshot_goto(bs, name);
         }
         aio_context_release(ctx);
+        if (err < 0) {
+            goto fail;
+        }
     }
 
+fail:
     *first_bad_bs = bs;
     return err;
 }
@@ -439,9 +454,9 @@ int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs)
     QEMUSnapshotInfo sn;
     int err = 0;
     BlockDriverState *bs;
-    BdrvNextIterator *it = NULL;
+    BdrvNextIterator it;
 
-    while (err == 0 && (it = bdrv_next(it, &bs))) {
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         AioContext *ctx = bdrv_get_aio_context(bs);
 
         aio_context_acquire(ctx);
@@ -449,8 +464,12 @@ int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs)
             err = bdrv_snapshot_find(bs, &sn, name);
         }
         aio_context_release(ctx);
+        if (err < 0) {
+            goto fail;
+        }
     }
 
+fail:
     *first_bad_bs = bs;
     return err;
 }
@@ -462,9 +481,9 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn,
 {
     int err = 0;
     BlockDriverState *bs;
-    BdrvNextIterator *it = NULL;
+    BdrvNextIterator it;
 
-    while (err == 0 && (it = bdrv_next(it, &bs))) {
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         AioContext *ctx = bdrv_get_aio_context(bs);
 
         aio_context_acquire(ctx);
@@ -476,24 +495,32 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn,
             err = bdrv_snapshot_create(bs, sn);
         }
         aio_context_release(ctx);
+        if (err < 0) {
+            goto fail;
+        }
     }
 
+fail:
     *first_bad_bs = bs;
     return err;
 }
 
 BlockDriverState *bdrv_all_find_vmstate_bs(void)
 {
-    bool not_found = true;
     BlockDriverState *bs;
-    BdrvNextIterator *it = NULL;
+    BdrvNextIterator it;
 
-    while (not_found && (it = bdrv_next(it, &bs))) {
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         AioContext *ctx = bdrv_get_aio_context(bs);
+        bool found;
 
         aio_context_acquire(ctx);
-        not_found = !bdrv_can_snapshot(bs);
+        found = bdrv_can_snapshot(bs);
         aio_context_release(ctx);
+
+        if (found) {
+            break;
+        }
     }
     return bs;
 }
diff --git a/block/stream.c b/block/stream.c
index 40aa32212e..c0efbda34e 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -39,7 +39,7 @@ typedef struct StreamBlockJob {
     char *backing_file_str;
 } StreamBlockJob;
 
-static int coroutine_fn stream_populate(BlockDriverState *bs,
+static int coroutine_fn stream_populate(BlockBackend *blk,
                                         int64_t sector_num, int nb_sectors,
                                         void *buf)
 {
@@ -52,7 +52,8 @@ static int coroutine_fn stream_populate(BlockDriverState *bs,
     qemu_iovec_init_external(&qiov, &iov, 1);
 
     /* Copy-on-read the unallocated clusters */
-    return bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, &qiov);
+    return blk_co_preadv(blk, sector_num * BDRV_SECTOR_SIZE, qiov.size, &qiov,
+                         BDRV_REQ_COPY_ON_READ);
 }
 
 typedef struct {
@@ -64,6 +65,7 @@ static void stream_complete(BlockJob *job, void *opaque)
 {
     StreamBlockJob *s = container_of(job, StreamBlockJob, common);
     StreamCompleteData *data = opaque;
+    BlockDriverState *bs = blk_bs(job->blk);
     BlockDriverState *base = s->base;
 
     if (!block_job_is_cancelled(&s->common) && data->reached_end &&
@@ -75,8 +77,8 @@ static void stream_complete(BlockJob *job, void *opaque)
                 base_fmt = base->drv->format_name;
             }
         }
-        data->ret = bdrv_change_backing_file(job->bs, base_id, base_fmt);
-        bdrv_set_backing_hd(job->bs, base);
+        data->ret = bdrv_change_backing_file(bs, base_id, base_fmt);
+        bdrv_set_backing_hd(bs, base);
     }
 
     g_free(s->backing_file_str);
@@ -88,7 +90,8 @@ static void coroutine_fn stream_run(void *opaque)
 {
     StreamBlockJob *s = opaque;
     StreamCompleteData *data;
-    BlockDriverState *bs = s->common.bs;
+    BlockBackend *blk = s->common.blk;
+    BlockDriverState *bs = blk_bs(blk);
     BlockDriverState *base = s->base;
     int64_t sector_num = 0;
     int64_t end = -1;
@@ -159,7 +162,7 @@ wait:
                     goto wait;
                 }
             }
-            ret = stream_populate(bs, sector_num, n, buf);
+            ret = stream_populate(blk, sector_num, n, buf);
         }
         if (ret < 0) {
             BlockErrorAction action =
diff --git a/block/vvfat.c b/block/vvfat.c
index 3e484a1dcc..a39dbe67e2 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -2998,12 +2998,12 @@ static int enable_write_target(BDRVVVFATState *s, Error **errp)
         goto err;
     }
 
-    s->qcow = NULL;
     options = qdict_new();
     qdict_put(options, "driver", qstring_from_str("qcow"));
-    ret = bdrv_open(&s->qcow, s->qcow_filename, NULL, options,
-                    BDRV_O_RDWR | BDRV_O_NO_FLUSH, errp);
-    if (ret < 0) {
+    s->qcow = bdrv_open(s->qcow_filename, NULL, options,
+                        BDRV_O_RDWR | BDRV_O_NO_FLUSH, errp);
+    if (!s->qcow) {
+        ret = -EINVAL;
         goto err;
     }
 
diff --git a/blockdev.c b/blockdev.c
index 40e4e6fc6f..717785eb8d 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -567,11 +567,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
     if ((!file || !*file) && !qdict_size(bs_opts)) {
         BlockBackendRootState *blk_rs;
 
-        blk = blk_new(errp);
-        if (!blk) {
-            goto early_err;
-        }
-
+        blk = blk_new();
         blk_rs = blk_get_root_state(blk);
         blk_rs->open_flags    = bdrv_flags;
         blk_rs->read_only     = !(bdrv_flags & BDRV_O_RDWR);
@@ -657,7 +653,6 @@ static BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
     QemuOpts *opts;
     Error *local_error = NULL;
     BlockdevDetectZeroesOptions detect_zeroes;
-    int ret;
     int bdrv_flags = 0;
 
     opts = qemu_opts_create(&qemu_root_bds_opts, NULL, 1, errp);
@@ -688,9 +683,8 @@ static BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
         bdrv_flags |= BDRV_O_INACTIVE;
     }
 
-    bs = NULL;
-    ret = bdrv_open(&bs, NULL, NULL, bs_opts, bdrv_flags, errp);
-    if (ret < 0) {
+    bs = bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp);
+    if (!bs) {
         goto fail_no_bs_opts;
     }
 
@@ -1643,7 +1637,7 @@ typedef struct ExternalSnapshotState {
 static void external_snapshot_prepare(BlkActionState *common,
                                       Error **errp)
 {
-    int flags = 0, ret;
+    int flags = 0;
     QDict *options = NULL;
     Error *local_err = NULL;
     /* Device and node name of the image to generate the snapshot from */
@@ -1768,11 +1762,10 @@ static void external_snapshot_prepare(BlkActionState *common,
         flags |= BDRV_O_NO_BACKING;
     }
 
-    assert(state->new_bs == NULL);
-    ret = bdrv_open(&state->new_bs, new_image_file, snapshot_ref, options,
-                    flags, errp);
+    state->new_bs = bdrv_open(new_image_file, snapshot_ref, options, flags,
+                              errp);
     /* We will manually add the backing_hd field to the bs later */
-    if (ret != 0) {
+    if (!state->new_bs) {
         return;
     }
 
@@ -2540,7 +2533,7 @@ void qmp_blockdev_change_medium(const char *device, const char *filename,
 {
     BlockBackend *blk;
     BlockDriverState *medium_bs = NULL;
-    int bdrv_flags, ret;
+    int bdrv_flags;
     QDict *options = NULL;
     Error *err = NULL;
 
@@ -2584,9 +2577,8 @@ void qmp_blockdev_change_medium(const char *device, const char *filename,
         qdict_put(options, "driver", qstring_from_str(format));
     }
 
-    assert(!medium_bs);
-    ret = bdrv_open(&medium_bs, filename, NULL, options, bdrv_flags, errp);
-    if (ret < 0) {
+    medium_bs = bdrv_open(filename, NULL, options, bdrv_flags, errp);
+    if (!medium_bs) {
         goto fail;
     }
 
@@ -3199,7 +3191,6 @@ static void do_drive_backup(const char *device, const char *target,
     Error *local_err = NULL;
     int flags;
     int64_t size;
-    int ret;
 
     if (!has_speed) {
         speed = 0;
@@ -3283,10 +3274,8 @@ static void do_drive_backup(const char *device, const char *target,
         qdict_put(options, "driver", qstring_from_str(format));
     }
 
-    target_bs = NULL;
-    ret = bdrv_open(&target_bs, target, NULL, options, flags, &local_err);
-    if (ret < 0) {
-        error_propagate(errp, local_err);
+    target_bs = bdrv_open(target, NULL, options, flags, errp);
+    if (!target_bs) {
         goto out;
     }
 
@@ -3304,8 +3293,8 @@ static void do_drive_backup(const char *device, const char *target,
     backup_start(bs, target_bs, speed, sync, bmap,
                  on_source_error, on_target_error,
                  block_job_cb, bs, txn, &local_err);
+    bdrv_unref(target_bs);
     if (local_err != NULL) {
-        bdrv_unref(target_bs);
         error_propagate(errp, local_err);
         goto out;
     }
@@ -3389,12 +3378,10 @@ void do_blockdev_backup(const char *device, const char *target,
     }
     target_bs = blk_bs(target_blk);
 
-    bdrv_ref(target_bs);
     bdrv_set_aio_context(target_bs, aio_context);
     backup_start(bs, target_bs, speed, sync, NULL, on_source_error,
                  on_target_error, block_job_cb, bs, txn, &local_err);
     if (local_err != NULL) {
-        bdrv_unref(target_bs);
         error_propagate(errp, local_err);
     }
 out:
@@ -3470,10 +3457,6 @@ static void blockdev_mirror_common(BlockDriverState *bs,
     if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_MIRROR_TARGET, errp)) {
         return;
     }
-    if (bdrv_has_blk(target)) {
-        error_setg(errp, "Cannot mirror to an attached block device");
-        return;
-    }
 
     if (!bs->backing && sync == MIRROR_SYNC_MODE_TOP) {
         sync = MIRROR_SYNC_MODE_FULL;
@@ -3511,7 +3494,6 @@ void qmp_drive_mirror(const char *device, const char *target,
     QDict *options = NULL;
     int flags;
     int64_t size;
-    int ret;
 
     blk = blk_by_name(device);
     if (!blk) {
@@ -3620,11 +3602,9 @@ void qmp_drive_mirror(const char *device, const char *target,
     /* Mirroring takes care of copy-on-write using the source's backing
      * file.
      */
-    target_bs = NULL;
-    ret = bdrv_open(&target_bs, target, NULL, options,
-                    flags | BDRV_O_NO_BACKING, &local_err);
-    if (ret < 0) {
-        error_propagate(errp, local_err);
+    target_bs = bdrv_open(target, NULL, options, flags | BDRV_O_NO_BACKING,
+                          errp);
+    if (!target_bs) {
         goto out;
     }
 
@@ -3639,9 +3619,9 @@ void qmp_drive_mirror(const char *device, const char *target,
                            has_on_target_error, on_target_error,
                            has_unmap, unmap,
                            &local_err);
+    bdrv_unref(target_bs);
     if (local_err) {
         error_propagate(errp, local_err);
-        bdrv_unref(target_bs);
     }
 out:
     aio_context_release(aio_context);
@@ -3685,7 +3665,6 @@ void qmp_blockdev_mirror(const char *device, const char *target,
     aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
 
-    bdrv_ref(target_bs);
     bdrv_set_aio_context(target_bs, aio_context);
 
     blockdev_mirror_common(bs, target_bs,
@@ -3699,7 +3678,6 @@ void qmp_blockdev_mirror(const char *device, const char *target,
                            &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
-        bdrv_unref(target_bs);
     }
 
     aio_context_release(aio_context);
@@ -4164,9 +4142,9 @@ BlockJobInfoList *qmp_query_block_jobs(Error **errp)
 {
     BlockJobInfoList *head = NULL, **p_next = &head;
     BlockDriverState *bs;
-    BdrvNextIterator *it = NULL;
+    BdrvNextIterator it;
 
-    while ((it = bdrv_next(it, &bs))) {
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         AioContext *aio_context = bdrv_get_aio_context(bs);
 
         aio_context_acquire(aio_context);
diff --git a/blockjob.c b/blockjob.c
index 5b840a7df6..c095cc57cb 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -50,17 +50,31 @@ struct BlockJobTxn {
     int refcnt;
 };
 
+static QLIST_HEAD(, BlockJob) block_jobs = QLIST_HEAD_INITIALIZER(block_jobs);
+
+BlockJob *block_job_next(BlockJob *job)
+{
+    if (!job) {
+        return QLIST_FIRST(&block_jobs);
+    }
+    return QLIST_NEXT(job, job_list);
+}
+
 void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
                        int64_t speed, BlockCompletionFunc *cb,
                        void *opaque, Error **errp)
 {
+    BlockBackend *blk;
     BlockJob *job;
 
     if (bs->job) {
         error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
         return NULL;
     }
-    bdrv_ref(bs);
+
+    blk = blk_new();
+    blk_insert_bs(blk, bs);
+
     job = g_malloc0(driver->instance_size);
     error_setg(&job->blocker, "block device is in use by block job: %s",
                BlockJobType_lookup[driver->job_type]);
@@ -69,13 +83,15 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
 
     job->driver        = driver;
     job->id            = g_strdup(bdrv_get_device_name(bs));
-    job->bs            = bs;
+    job->blk           = blk;
     job->cb            = cb;
     job->opaque        = opaque;
     job->busy          = true;
     job->refcnt        = 1;
     bs->job = job;
 
+    QLIST_INSERT_HEAD(&block_jobs, job, job_list);
+
     /* Only set speed when necessary to avoid NotSupported error */
     if (speed != 0) {
         Error *local_err = NULL;
@@ -98,11 +114,13 @@ void block_job_ref(BlockJob *job)
 void block_job_unref(BlockJob *job)
 {
     if (--job->refcnt == 0) {
-        job->bs->job = NULL;
-        bdrv_op_unblock_all(job->bs, job->blocker);
-        bdrv_unref(job->bs);
+        BlockDriverState *bs = blk_bs(job->blk);
+        bs->job = NULL;
+        bdrv_op_unblock_all(bs, job->blocker);
+        blk_unref(job->blk);
         error_free(job->blocker);
         g_free(job->id);
+        QLIST_REMOVE(job, job_list);
         g_free(job);
     }
 }
@@ -140,7 +158,7 @@ static void block_job_completed_txn_abort(BlockJob *job)
     txn->aborting = true;
     /* We are the first failed job. Cancel other jobs. */
     QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
-        ctx = bdrv_get_aio_context(other_job->bs);
+        ctx = blk_get_aio_context(other_job->blk);
         aio_context_acquire(ctx);
     }
     QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
@@ -157,7 +175,7 @@ static void block_job_completed_txn_abort(BlockJob *job)
         assert(other_job->completed);
     }
     QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) {
-        ctx = bdrv_get_aio_context(other_job->bs);
+        ctx = blk_get_aio_context(other_job->blk);
         block_job_completed_single(other_job);
         aio_context_release(ctx);
     }
@@ -179,7 +197,7 @@ static void block_job_completed_txn_success(BlockJob *job)
     }
     /* We are the last completed job, commit the transaction. */
     QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) {
-        ctx = bdrv_get_aio_context(other_job->bs);
+        ctx = blk_get_aio_context(other_job->blk);
         aio_context_acquire(ctx);
         assert(other_job->ret == 0);
         block_job_completed_single(other_job);
@@ -189,9 +207,7 @@ static void block_job_completed_txn_success(BlockJob *job)
 
 void block_job_completed(BlockJob *job, int ret)
 {
-    BlockDriverState *bs = job->bs;
-
-    assert(bs->job == job);
+    assert(blk_bs(job->blk)->job == job);
     assert(!job->completed);
     job->completed = true;
     job->ret = ret;
@@ -282,11 +298,10 @@ static int block_job_finish_sync(BlockJob *job,
                                  void (*finish)(BlockJob *, Error **errp),
                                  Error **errp)
 {
-    BlockDriverState *bs = job->bs;
     Error *local_err = NULL;
     int ret;
 
-    assert(bs->job == job);
+    assert(blk_bs(job->blk)->job == job);
 
     block_job_ref(job);
     finish(job, &local_err);
@@ -297,7 +312,7 @@ static int block_job_finish_sync(BlockJob *job,
     }
     while (!job->completed) {
         aio_poll(job->deferred_to_main_loop ? qemu_get_aio_context() :
-                                              bdrv_get_aio_context(bs),
+                                              blk_get_aio_context(job->blk),
                  true);
     }
     ret = (job->cancelled && job->ret == 0) ? -ECANCELED : job->ret;
@@ -318,6 +333,19 @@ int block_job_cancel_sync(BlockJob *job)
     return block_job_finish_sync(job, &block_job_cancel_err, NULL);
 }
 
+void block_job_cancel_sync_all(void)
+{
+    BlockJob *job;
+    AioContext *aio_context;
+
+    while ((job = QLIST_FIRST(&block_jobs))) {
+        aio_context = blk_get_aio_context(job->blk);
+        aio_context_acquire(aio_context);
+        block_job_cancel_sync(job);
+        aio_context_release(aio_context);
+    }
+}
+
 int block_job_complete_sync(BlockJob *job, Error **errp)
 {
     return block_job_finish_sync(job, &block_job_complete, errp);
@@ -336,7 +364,7 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns)
     if (block_job_is_paused(job)) {
         qemu_coroutine_yield();
     } else {
-        co_aio_sleep_ns(bdrv_get_aio_context(job->bs), type, ns);
+        co_aio_sleep_ns(blk_get_aio_context(job->blk), type, ns);
     }
     job->busy = true;
 }
@@ -465,7 +493,7 @@ static void block_job_defer_to_main_loop_bh(void *opaque)
     aio_context_acquire(data->aio_context);
 
     /* Fetch BDS AioContext again, in case it has changed */
-    aio_context = bdrv_get_aio_context(data->job->bs);
+    aio_context = blk_get_aio_context(data->job->blk);
     aio_context_acquire(aio_context);
 
     data->job->deferred_to_main_loop = false;
@@ -485,7 +513,7 @@ void block_job_defer_to_main_loop(BlockJob *job,
     BlockJobDeferToMainLoopData *data = g_malloc(sizeof(*data));
     data->job = job;
     data->bh = qemu_bh_new(block_job_defer_to_main_loop_bh, data);
-    data->aio_context = bdrv_get_aio_context(job->bs);
+    data->aio_context = blk_get_aio_context(job->blk);
     data->fn = fn;
     data->opaque = opaque;
     job->deferred_to_main_loop = true;
diff --git a/cpu-exec.c b/cpu-exec.c
index 602d0c4d0c..f7c642f4a9 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -345,6 +345,15 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu,
         *last_tb = NULL;
         cpu->tb_flushed = false;
     }
+#ifndef CONFIG_USER_ONLY
+    /* We don't take care of direct jumps when address mapping changes in
+     * system emulation. So it's not safe to make a direct jump to a TB
+     * spanning two pages because the mapping for the second page can change.
+     */
+    if (tb->page_addr[1] != -1) {
+        *last_tb = NULL;
+    }
+#endif
     /* See if we can patch the calling TB. */
     if (*last_tb && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
         tb_add_jump(*last_tb, tb_exit, tb);
diff --git a/dma-helpers.c b/dma-helpers.c
index a6cc15f534..b521d84ebd 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -70,7 +70,7 @@ void qemu_sglist_destroy(QEMUSGList *qsg)
 
 typedef struct {
     BlockAIOCB common;
-    BlockBackend *blk;
+    AioContext *ctx;
     BlockAIOCB *acb;
     QEMUSGList *sg;
     uint64_t offset;
@@ -80,6 +80,7 @@ typedef struct {
     QEMUIOVector iov;
     QEMUBH *bh;
     DMAIOFunc *io_func;
+    void *io_func_opaque;
 } DMAAIOCB;
 
 static void dma_blk_cb(void *opaque, int ret);
@@ -154,8 +155,7 @@ static void dma_blk_cb(void *opaque, int ret)
 
     if (dbs->iov.size == 0) {
         trace_dma_map_wait(dbs);
-        dbs->bh = aio_bh_new(blk_get_aio_context(dbs->blk),
-                             reschedule_dma, dbs);
+        dbs->bh = aio_bh_new(dbs->ctx, reschedule_dma, dbs);
         cpu_register_map_client(dbs->bh);
         return;
     }
@@ -164,8 +164,8 @@ static void dma_blk_cb(void *opaque, int ret)
         qemu_iovec_discard_back(&dbs->iov, dbs->iov.size & ~BDRV_SECTOR_MASK);
     }
 
-    dbs->acb = dbs->io_func(dbs->blk, dbs->offset, &dbs->iov, 0,
-                            dma_blk_cb, dbs);
+    dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
+                            dma_blk_cb, dbs, dbs->io_func_opaque);
     assert(dbs->acb);
 }
 
@@ -191,23 +191,25 @@ static const AIOCBInfo dma_aiocb_info = {
     .cancel_async       = dma_aio_cancel,
 };
 
-BlockAIOCB *dma_blk_io(
-    BlockBackend *blk, QEMUSGList *sg, uint64_t sector_num,
-    DMAIOFunc *io_func, BlockCompletionFunc *cb,
+BlockAIOCB *dma_blk_io(AioContext *ctx,
+    QEMUSGList *sg, uint64_t offset,
+    DMAIOFunc *io_func, void *io_func_opaque,
+    BlockCompletionFunc *cb,
     void *opaque, DMADirection dir)
 {
-    DMAAIOCB *dbs = blk_aio_get(&dma_aiocb_info, blk, cb, opaque);
+    DMAAIOCB *dbs = qemu_aio_get(&dma_aiocb_info, NULL, cb, opaque);
 
-    trace_dma_blk_io(dbs, blk, sector_num, (dir == DMA_DIRECTION_TO_DEVICE));
+    trace_dma_blk_io(dbs, io_func_opaque, offset, (dir == DMA_DIRECTION_TO_DEVICE));
 
     dbs->acb = NULL;
-    dbs->blk = blk;
     dbs->sg = sg;
-    dbs->offset = sector_num << BDRV_SECTOR_BITS;
+    dbs->ctx = ctx;
+    dbs->offset = offset;
     dbs->sg_cur_index = 0;
     dbs->sg_cur_byte = 0;
     dbs->dir = dir;
     dbs->io_func = io_func;
+    dbs->io_func_opaque = io_func_opaque;
     dbs->bh = NULL;
     qemu_iovec_init(&dbs->iov, sg->nsg);
     dma_blk_cb(dbs, 0);
@@ -215,19 +217,39 @@ BlockAIOCB *dma_blk_io(
 }
 
 
+static
+BlockAIOCB *dma_blk_read_io_func(int64_t offset, QEMUIOVector *iov,
+                                 BlockCompletionFunc *cb, void *cb_opaque,
+                                 void *opaque)
+{
+    BlockBackend *blk = opaque;
+    return blk_aio_preadv(blk, offset, iov, 0, cb, cb_opaque);
+}
+
 BlockAIOCB *dma_blk_read(BlockBackend *blk,
-                         QEMUSGList *sg, uint64_t sector,
+                         QEMUSGList *sg, uint64_t offset,
                          void (*cb)(void *opaque, int ret), void *opaque)
 {
-    return dma_blk_io(blk, sg, sector, blk_aio_preadv, cb, opaque,
+    return dma_blk_io(blk_get_aio_context(blk),
+                      sg, offset, dma_blk_read_io_func, blk, cb, opaque,
                       DMA_DIRECTION_FROM_DEVICE);
 }
 
+static
+BlockAIOCB *dma_blk_write_io_func(int64_t offset, QEMUIOVector *iov,
+                                  BlockCompletionFunc *cb, void *cb_opaque,
+                                  void *opaque)
+{
+    BlockBackend *blk = opaque;
+    return blk_aio_pwritev(blk, offset, iov, 0, cb, cb_opaque);
+}
+
 BlockAIOCB *dma_blk_write(BlockBackend *blk,
-                          QEMUSGList *sg, uint64_t sector,
+                          QEMUSGList *sg, uint64_t offset,
                           void (*cb)(void *opaque, int ret), void *opaque)
 {
-    return dma_blk_io(blk, sg, sector, blk_aio_pwritev, cb, opaque,
+    return dma_blk_io(blk_get_aio_context(blk),
+                      sg, offset, dma_blk_write_io_func, blk, cb, opaque,
                       DMA_DIRECTION_TO_DEVICE);
 }
 
diff --git a/docs/igd-assign.txt b/docs/igd-assign.txt
new file mode 100644
index 0000000000..e17bb50789
--- /dev/null
+++ b/docs/igd-assign.txt
@@ -0,0 +1,133 @@
+Intel Graphics Device (IGD) assignment with vfio-pci
+====================================================
+
+IGD has two different modes for assignment using vfio-pci:
+
+1) Universal Pass-Through (UPT) mode:
+
+   In this mode the IGD device is added as a *secondary* (ie. non-primary)
+   graphics device in combination with an emulated primary graphics device.
+   This mode *requires* guest driver support to remove the external
+   dependencies generally associated with IGD (see below).  Those guest
+   drivers only support this mode for Broadwell and newer IGD, according to
+   Intel.  Additionally, this mode by default, and as officially supported
+   by Intel, does not support direct video output.  The intention is to use
+   this mode either to provide hardware acceleration to the emulated graphics
+   or to use this mode in combination with guest-based remote access software,
+   for example VNC (see below for optional output support).  This mode
+   theoretically has no device specific handling dependencies on vfio-pci or
+   the VM firmware.
+
+2) "Legacy" mode:
+
+   In this mode the IGD device is intended to be the primary and exclusive
+   graphics device in the VM[1], as such QEMU does not facilitate any sort
+   of remote graphics to the VM in this mode.  A connected physical monitor
+   is the intended output device for IGD.  This mode includes several
+   requirements and restrictions:
+
+    * IGD must be given address 02.0 on the PCI root bus in the VM
+    * The host kernel must support vfio extensions for IGD (v4.6)
+    * vfio VGA support very likely needs to be enabled in the host kernel
+    * The VM firmware must support specific fw_cfg enablers for IGD
+    * The VM machine type must support a PCI host bridge at 00.0 (standard)
+    * The VM machine type must provide or allow to be created a special
+      ISA/LPC bridge device (vfio-pci-igd-lpc-bridge) on the root bus at
+      PCI address 1f.0.
+    * The IGD device must have a VGA ROM, either provided via the romfile
+      option or loaded automatically through vfio (standard).  rombar=0
+      will disable legacy mode support.
+    * Hotplug of the IGD device is not supported.
+    * The IGD device must be a SandyBridge or newer model device.
+
+For either mode, depending on the host kernel, the i915 driver in the host
+may generate faults and errors upon re-binding to an IGD device after it
+has been assigned to a VM.  It's therefore generally recommended to prevent
+such driver binding unless the host driver is known to work well for this.
+There are numerous ways to do this, i915 can be blacklisted on the host,
+the driver_override option can be used to ensure that only vfio-pci can bind
+to the device on the host[2], virsh nodedev-detach can be used to bind the
+device to vfio drivers and then managed='no' set in the VM xml to prevent
+re-binding to i915, etc.  Also note that IGD is also typically the primary
+graphics in the host and special options may be required beyond simply
+blacklisting i915 or using pci-stub/vfio-pci to take ownership of IGD as a
+PCI class device.  Lower level drivers exist that may still claim the device.
+It may therefore be necessary to use kernel boot options video=vesafb:off or
+video=efifb:off (depending on host BIOS/UEFI) or these can be combined to
+a catch-all, video=vesafb:off,efifb:off.  Error messages such as:
+
+    Failed to mmap 0000:00:02.0 BAR <>. Performance may be slow
+
+are a good indicator that such a problem exists.  The host files /proc/iomem
+and /proc/ioports are often useful for identifying drivers consuming ranges
+of the device to cause such conflicts.
+
+Additionally, IGD device are known to generate small numbers of DMAR faults
+when initially assigned.  It is believed that this is simply the IGD attempting
+to access the reserved GTT space after reset, which it no longer has access to
+when accessed from userspace.  So long as the DMAR faults are small in number
+and most importantly, not ongoing, these are not an indication of an error.
+
+Additionally++, analog VGA output (as opposed to digital outputs like HDMI,
+DVI, or DisplayPort) may be unsupported in some use cases.  In the author's
+experience, even DP to VGA adapters can be troublesome while adapters between
+digital formats work well.
+
+Usage
+=====
+The intention is for IGD assignment to be transparent for users and thus for
+management tools like libvirt.  To make use of legacy mode, simply remove all
+other graphics options and use "-nographic" and either "-vga none" or
+"-nodefaults", along with adding the device using vfio-pci:
+
+    -device vfio-pci,host=00:02.0,id=hostdev0,bus=pci.0,addr=0x2
+
+For UPT mode, retain the default emulated graphics and simply add the vfio-pci
+device making use of any other bus address other than 02.0.  libvirt will
+default to assigning the device a UPT compatible address while legacy mode
+users will need to manually edit the XML if using a tool like virt-manager
+where the VM device address is not expressly specified.
+
+An experimental vfio-pci option also exists to enable OpRegion, and thus
+external monitor support, for UPT mode.  This can be enabled by adding
+"x-igd-opregion=on" to the vfio-pci device options for the IGD device.  As
+with legacy mode, this requires the host to support features introduced in
+the v4.6 kernel.  If Intel chooses to embrace this support, the option may
+be made non-experimental in the future, opening it to libvirt support.
+
+Developer ABI
+=============
+Legacy mode IGD support imposes two fw_cfg requirements on the VM firmware:
+
+1) "etc/igd-opregion"
+
+   This fw_cfg file exposes the OpRegion for the IGD device.  A reserved
+   region should be created below 4GB (recommended 4KB alignment), sized
+   sufficient for the fw_cfg file size, and the content of this file copied
+   to it.  The dword based address of this reserved memory region must also
+   be written to the ASLS register at offset 0xFC on the IGD device.  It is
+   recommended that firmware should make use of this fw_cfg entry for any
+   PCI class VGA device with Intel vendor ID.  Multiple of such devices
+   within a VM is undefined.
+
+2) "etc/igd-bdsm-size"
+
+   This fw_cfg file contains an 8-byte, little endian integer indicating
+   the size of the reserved memory region required for IGD stolen memory.
+   Firmware must allocate a reserved memory below 4GB with required 1MB
+   alignment equal to this size.  Additionally the base address of this
+   reserved region must be written to the dword BDSM register in PCI config
+   space of the IGD device at offset 0x5C.  As this support is related to
+   running the IGD ROM, which has other dependencies on the device appearing
+   at guest address 00:02.0, it's expected that this fw_cfg file is only
+   relevant to a single PCI class VGA device with Intel vendor ID, appearing
+   at PCI bus address 00:02.0.
+
+Footnotes
+=========
+[1] Nothing precludes adding additional emulated or assigned graphics devices
+    as non-primary, other than the combination typically not working.  I only
+    intend to set user expectations, others are welcome to find working
+    combinations or fix whatever issues prevent this from working in the common
+    case.
+[2] # echo "vfio-pci" > /sys/bus/pci/devices/0000:00:02.0/driver_override
diff --git a/docs/migration.txt b/docs/migration.txt
index 90209ab294..6503c17685 100644
--- a/docs/migration.txt
+++ b/docs/migration.txt
@@ -403,8 +403,8 @@ listen thread:                     --- page -- page -- page -- page -- page --
 
 On receipt of CMD_PACKAGED (1)
    All the data associated with the package - the ( ... ) section in the
-diagram - is read into memory (into a QEMUSizedBuffer), and the main thread
-recurses into qemu_loadvm_state_main to process the contents of the package (2)
+diagram - is read into memory, and the main thread recurses into
+qemu_loadvm_state_main to process the contents of the package (2)
 which contains commands (3,6) and devices (4...)
 
 On receipt of 'postcopy listen' - 3 -(i.e. the 1st command in the package)
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 4f4f60a0df..98b4b1a82c 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1008,7 +1008,7 @@ ETEXI
 
     {
         .name       = "migrate_set_parameter",
-        .args_type  = "parameter:s,value:i",
+        .args_type  = "parameter:s,value:s",
         .params     = "parameter value",
         .help       = "Set the parameter for migration",
         .mhandler.cmd = hmp_migrate_set_parameter,
diff --git a/hmp.c b/hmp.c
index 9f9bcf9d83..a4b1d3d220 100644
--- a/hmp.c
+++ b/hmp.c
@@ -35,6 +35,7 @@
 #include "block/qapi.h"
 #include "qemu-io.h"
 #include "qemu/cutils.h"
+#include "qemu/error-report.h"
 
 #ifdef CONFIG_SPICE
 #include <spice/enums.h>
@@ -168,8 +169,15 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
     }
 
     if (info->has_status) {
-        monitor_printf(mon, "Migration status: %s\n",
+        monitor_printf(mon, "Migration status: %s",
                        MigrationStatus_lookup[info->status]);
+        if (info->status == MIGRATION_STATUS_FAILED &&
+            info->has_error_desc) {
+            monitor_printf(mon, " (%s)\n", info->error_desc);
+        } else {
+            monitor_printf(mon, "\n");
+        }
+
         monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n",
                        info->total_time);
         if (info->has_expected_downtime) {
@@ -286,6 +294,12 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict)
         monitor_printf(mon, " %s: %" PRId64,
             MigrationParameter_lookup[MIGRATION_PARAMETER_CPU_THROTTLE_INCREMENT],
             params->cpu_throttle_increment);
+        monitor_printf(mon, " %s: '%s'",
+            MigrationParameter_lookup[MIGRATION_PARAMETER_TLS_CREDS],
+            params->tls_creds ? : "");
+        monitor_printf(mon, " %s: '%s'",
+            MigrationParameter_lookup[MIGRATION_PARAMETER_TLS_HOSTNAME],
+            params->tls_hostname ? : "");
         monitor_printf(mon, "\n");
     }
 
@@ -1235,13 +1249,17 @@ void hmp_migrate_set_capability(Monitor *mon, const QDict *qdict)
 void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
 {
     const char *param = qdict_get_str(qdict, "parameter");
-    int value = qdict_get_int(qdict, "value");
+    const char *valuestr = qdict_get_str(qdict, "value");
+    long valueint = 0;
     Error *err = NULL;
     bool has_compress_level = false;
     bool has_compress_threads = false;
     bool has_decompress_threads = false;
     bool has_cpu_throttle_initial = false;
     bool has_cpu_throttle_increment = false;
+    bool has_tls_creds = false;
+    bool has_tls_hostname = false;
+    bool use_int_value = false;
     int i;
 
     for (i = 0; i < MIGRATION_PARAMETER__MAX; i++) {
@@ -1249,25 +1267,46 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
             switch (i) {
             case MIGRATION_PARAMETER_COMPRESS_LEVEL:
                 has_compress_level = true;
+                use_int_value = true;
                 break;
             case MIGRATION_PARAMETER_COMPRESS_THREADS:
                 has_compress_threads = true;
+                use_int_value = true;
                 break;
             case MIGRATION_PARAMETER_DECOMPRESS_THREADS:
                 has_decompress_threads = true;
+                use_int_value = true;
                 break;
             case MIGRATION_PARAMETER_CPU_THROTTLE_INITIAL:
                 has_cpu_throttle_initial = true;
+                use_int_value = true;
                 break;
             case MIGRATION_PARAMETER_CPU_THROTTLE_INCREMENT:
                 has_cpu_throttle_increment = true;
                 break;
+            case MIGRATION_PARAMETER_TLS_CREDS:
+                has_tls_creds = true;
+                break;
+            case MIGRATION_PARAMETER_TLS_HOSTNAME:
+                has_tls_hostname = true;
+                break;
             }
-            qmp_migrate_set_parameters(has_compress_level, value,
-                                       has_compress_threads, value,
-                                       has_decompress_threads, value,
-                                       has_cpu_throttle_initial, value,
-                                       has_cpu_throttle_increment, value,
+
+            if (use_int_value) {
+                if (qemu_strtol(valuestr, NULL, 10, &valueint) < 0) {
+                    error_setg(&err, "Unable to parse '%s' as an int",
+                               valuestr);
+                    goto cleanup;
+                }
+            }
+
+            qmp_migrate_set_parameters(has_compress_level, valueint,
+                                       has_compress_threads, valueint,
+                                       has_decompress_threads, valueint,
+                                       has_cpu_throttle_initial, valueint,
+                                       has_cpu_throttle_increment, valueint,
+                                       has_tls_creds, valuestr,
+                                       has_tls_hostname, valuestr,
                                        &err);
             break;
         }
@@ -1277,6 +1316,7 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
         error_setg(&err, QERR_INVALID_PARAMETER, param);
     }
 
+ cleanup:
     if (err) {
         error_report_err(err);
     }
@@ -1533,6 +1573,9 @@ static void hmp_migrate_status_cb(void *opaque)
         if (status->is_block_migration) {
             monitor_printf(status->mon, "\n");
         }
+        if (info->has_error_desc) {
+            error_report("%s", info->error_desc);
+        }
         monitor_resume(status->mon);
         timer_del(status->timer);
         g_free(status);
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 173988ee84..9faad29fad 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -239,7 +239,7 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
     uint8_t lba_index  = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
     uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
     uint64_t data_size = (uint64_t)nlb << data_shift;
-    uint64_t aio_slba  = slba << (data_shift - BDRV_SECTOR_BITS);
+    uint64_t data_offset = slba << data_shift;
     int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0;
     enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ;
 
@@ -258,8 +258,8 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
     req->has_sg = true;
     dma_acct_start(n->conf.blk, &req->acct, &req->qsg, acct);
     req->aiocb = is_write ?
-        dma_blk_write(n->conf.blk, &req->qsg, aio_slba, nvme_rw_cb, req) :
-        dma_blk_read(n->conf.blk, &req->qsg, aio_slba, nvme_rw_cb, req);
+        dma_blk_write(n->conf.blk, &req->qsg, data_offset, nvme_rw_cb, req) :
+        dma_blk_read(n->conf.blk, &req->qsg, data_offset, nvme_rw_cb, req);
 
     return NVME_NO_COMPLETE;
 }
diff --git a/hw/core/Makefile.objs b/hw/core/Makefile.objs
index 70951d4137..82a9ef84f8 100644
--- a/hw/core/Makefile.objs
+++ b/hw/core/Makefile.objs
@@ -1,5 +1,6 @@
 # core qdev-related obj files, also used by *-user:
 common-obj-y += qdev.o qdev-properties.o
+common-obj-y += bus.o
 common-obj-y += fw-path-provider.o
 # irq.o needed for qdev GPIO handling:
 common-obj-y += irq.o
diff --git a/hw/core/bus.c b/hw/core/bus.c
new file mode 100644
index 0000000000..3e3f8ac740
--- /dev/null
+++ b/hw/core/bus.c
@@ -0,0 +1,251 @@
+/*
+ *  Dynamic device configuration and creation -- buses.
+ *
+ *  Copyright (c) 2009 CodeSourcery
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "hw/qdev.h"
+#include "qapi/error.h"
+
+static void qbus_set_hotplug_handler_internal(BusState *bus, Object *handler,
+                                              Error **errp)
+{
+
+    object_property_set_link(OBJECT(bus), OBJECT(handler),
+                             QDEV_HOTPLUG_HANDLER_PROPERTY, errp);
+}
+
+void qbus_set_hotplug_handler(BusState *bus, DeviceState *handler, Error **errp)
+{
+    qbus_set_hotplug_handler_internal(bus, OBJECT(handler), errp);
+}
+
+void qbus_set_bus_hotplug_handler(BusState *bus, Error **errp)
+{
+    qbus_set_hotplug_handler_internal(bus, OBJECT(bus), errp);
+}
+
+int qbus_walk_children(BusState *bus,
+                       qdev_walkerfn *pre_devfn, qbus_walkerfn *pre_busfn,
+                       qdev_walkerfn *post_devfn, qbus_walkerfn *post_busfn,
+                       void *opaque)
+{
+    BusChild *kid;
+    int err;
+
+    if (pre_busfn) {
+        err = pre_busfn(bus, opaque);
+        if (err) {
+            return err;
+        }
+    }
+
+    QTAILQ_FOREACH(kid, &bus->children, sibling) {
+        err = qdev_walk_children(kid->child,
+                                 pre_devfn, pre_busfn,
+                                 post_devfn, post_busfn, opaque);
+        if (err < 0) {
+            return err;
+        }
+    }
+
+    if (post_busfn) {
+        err = post_busfn(bus, opaque);
+        if (err) {
+            return err;
+        }
+    }
+
+    return 0;
+}
+
+static void qbus_realize(BusState *bus, DeviceState *parent, const char *name)
+{
+    const char *typename = object_get_typename(OBJECT(bus));
+    BusClass *bc;
+    char *buf;
+    int i, len, bus_id;
+
+    bus->parent = parent;
+
+    if (name) {
+        bus->name = g_strdup(name);
+    } else if (bus->parent && bus->parent->id) {
+        /* parent device has id -> use it plus parent-bus-id for bus name */
+        bus_id = bus->parent->num_child_bus;
+
+        len = strlen(bus->parent->id) + 16;
+        buf = g_malloc(len);
+        snprintf(buf, len, "%s.%d", bus->parent->id, bus_id);
+        bus->name = buf;
+    } else {
+        /* no id -> use lowercase bus type plus global bus-id for bus name */
+        bc = BUS_GET_CLASS(bus);
+        bus_id = bc->automatic_ids++;
+
+        len = strlen(typename) + 16;
+        buf = g_malloc(len);
+        len = snprintf(buf, len, "%s.%d", typename, bus_id);
+        for (i = 0; i < len; i++) {
+            buf[i] = qemu_tolower(buf[i]);
+        }
+        bus->name = buf;
+    }
+
+    if (bus->parent) {
+        QLIST_INSERT_HEAD(&bus->parent->child_bus, bus, sibling);
+        bus->parent->num_child_bus++;
+        object_property_add_child(OBJECT(bus->parent), bus->name, OBJECT(bus), NULL);
+        object_unref(OBJECT(bus));
+    } else if (bus != sysbus_get_default()) {
+        /* TODO: once all bus devices are qdevified,
+           only reset handler for main_system_bus should be registered here. */
+        qemu_register_reset(qbus_reset_all_fn, bus);
+    }
+}
+
+static void bus_unparent(Object *obj)
+{
+    BusState *bus = BUS(obj);
+    BusChild *kid;
+
+    while ((kid = QTAILQ_FIRST(&bus->children)) != NULL) {
+        DeviceState *dev = kid->child;
+        object_unparent(OBJECT(dev));
+    }
+    if (bus->parent) {
+        QLIST_REMOVE(bus, sibling);
+        bus->parent->num_child_bus--;
+        bus->parent = NULL;
+    } else {
+        assert(bus != sysbus_get_default()); /* main_system_bus is never freed */
+        qemu_unregister_reset(qbus_reset_all_fn, bus);
+    }
+}
+
+void qbus_create_inplace(void *bus, size_t size, const char *typename,
+                         DeviceState *parent, const char *name)
+{
+    object_initialize(bus, size, typename);
+    qbus_realize(bus, parent, name);
+}
+
+BusState *qbus_create(const char *typename, DeviceState *parent, const char *name)
+{
+    BusState *bus;
+
+    bus = BUS(object_new(typename));
+    qbus_realize(bus, parent, name);
+
+    return bus;
+}
+
+static bool bus_get_realized(Object *obj, Error **errp)
+{
+    BusState *bus = BUS(obj);
+
+    return bus->realized;
+}
+
+static void bus_set_realized(Object *obj, bool value, Error **errp)
+{
+    BusState *bus = BUS(obj);
+    BusClass *bc = BUS_GET_CLASS(bus);
+    BusChild *kid;
+    Error *local_err = NULL;
+
+    if (value && !bus->realized) {
+        if (bc->realize) {
+            bc->realize(bus, &local_err);
+        }
+
+        /* TODO: recursive realization */
+    } else if (!value && bus->realized) {
+        QTAILQ_FOREACH(kid, &bus->children, sibling) {
+            DeviceState *dev = kid->child;
+            object_property_set_bool(OBJECT(dev), false, "realized",
+                                     &local_err);
+            if (local_err != NULL) {
+                break;
+            }
+        }
+        if (bc->unrealize && local_err == NULL) {
+            bc->unrealize(bus, &local_err);
+        }
+    }
+
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    bus->realized = value;
+}
+
+static void qbus_initfn(Object *obj)
+{
+    BusState *bus = BUS(obj);
+
+    QTAILQ_INIT(&bus->children);
+    object_property_add_link(obj, QDEV_HOTPLUG_HANDLER_PROPERTY,
+                             TYPE_HOTPLUG_HANDLER,
+                             (Object **)&bus->hotplug_handler,
+                             object_property_allow_set_link,
+                             OBJ_PROP_LINK_UNREF_ON_RELEASE,
+                             NULL);
+    object_property_add_bool(obj, "realized",
+                             bus_get_realized, bus_set_realized, NULL);
+}
+
+static char *default_bus_get_fw_dev_path(DeviceState *dev)
+{
+    return g_strdup(object_get_typename(OBJECT(dev)));
+}
+
+static void bus_class_init(ObjectClass *class, void *data)
+{
+    BusClass *bc = BUS_CLASS(class);
+
+    class->unparent = bus_unparent;
+    bc->get_fw_dev_path = default_bus_get_fw_dev_path;
+}
+
+static void qbus_finalize(Object *obj)
+{
+    BusState *bus = BUS(obj);
+
+    g_free((char *)bus->name);
+}
+
+static const TypeInfo bus_info = {
+    .name = TYPE_BUS,
+    .parent = TYPE_OBJECT,
+    .instance_size = sizeof(BusState),
+    .abstract = true,
+    .class_size = sizeof(BusClass),
+    .instance_init = qbus_initfn,
+    .instance_finalize = qbus_finalize,
+    .class_init = bus_class_init,
+};
+
+static void bus_register_types(void)
+{
+    type_register_static(&bus_info);
+}
+
+type_init(bus_register_types)
diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index db41aa1f26..853162b670 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -109,24 +109,6 @@ void qdev_set_parent_bus(DeviceState *dev, BusState *bus)
     bus_add_child(bus, dev);
 }
 
-static void qbus_set_hotplug_handler_internal(BusState *bus, Object *handler,
-                                              Error **errp)
-{
-
-    object_property_set_link(OBJECT(bus), OBJECT(handler),
-                             QDEV_HOTPLUG_HANDLER_PROPERTY, errp);
-}
-
-void qbus_set_hotplug_handler(BusState *bus, DeviceState *handler, Error **errp)
-{
-    qbus_set_hotplug_handler_internal(bus, OBJECT(handler), errp);
-}
-
-void qbus_set_bus_hotplug_handler(BusState *bus, Error **errp)
-{
-    qbus_set_hotplug_handler_internal(bus, OBJECT(bus), errp);
-}
-
 /* Create a new device.  This only initializes the device state
    structure and allows properties to be set.  The device still needs
    to be realized.  See qdev-core.h.  */
@@ -595,40 +577,6 @@ BusState *qdev_get_child_bus(DeviceState *dev, const char *name)
     return NULL;
 }
 
-int qbus_walk_children(BusState *bus,
-                       qdev_walkerfn *pre_devfn, qbus_walkerfn *pre_busfn,
-                       qdev_walkerfn *post_devfn, qbus_walkerfn *post_busfn,
-                       void *opaque)
-{
-    BusChild *kid;
-    int err;
-
-    if (pre_busfn) {
-        err = pre_busfn(bus, opaque);
-        if (err) {
-            return err;
-        }
-    }
-
-    QTAILQ_FOREACH(kid, &bus->children, sibling) {
-        err = qdev_walk_children(kid->child,
-                                 pre_devfn, pre_busfn,
-                                 post_devfn, post_busfn, opaque);
-        if (err < 0) {
-            return err;
-        }
-    }
-
-    if (post_busfn) {
-        err = post_busfn(bus, opaque);
-        if (err) {
-            return err;
-        }
-    }
-
-    return 0;
-}
-
 int qdev_walk_children(DeviceState *dev,
                        qdev_walkerfn *pre_devfn, qbus_walkerfn *pre_busfn,
                        qdev_walkerfn *post_devfn, qbus_walkerfn *post_busfn,
@@ -685,129 +633,6 @@ DeviceState *qdev_find_recursive(BusState *bus, const char *id)
     return NULL;
 }
 
-static void qbus_realize(BusState *bus, DeviceState *parent, const char *name)
-{
-    const char *typename = object_get_typename(OBJECT(bus));
-    BusClass *bc;
-    char *buf;
-    int i, len, bus_id;
-
-    bus->parent = parent;
-
-    if (name) {
-        bus->name = g_strdup(name);
-    } else if (bus->parent && bus->parent->id) {
-        /* parent device has id -> use it plus parent-bus-id for bus name */
-        bus_id = bus->parent->num_child_bus;
-
-        len = strlen(bus->parent->id) + 16;
-        buf = g_malloc(len);
-        snprintf(buf, len, "%s.%d", bus->parent->id, bus_id);
-        bus->name = buf;
-    } else {
-        /* no id -> use lowercase bus type plus global bus-id for bus name */
-        bc = BUS_GET_CLASS(bus);
-        bus_id = bc->automatic_ids++;
-
-        len = strlen(typename) + 16;
-        buf = g_malloc(len);
-        len = snprintf(buf, len, "%s.%d", typename, bus_id);
-        for (i = 0; i < len; i++) {
-            buf[i] = qemu_tolower(buf[i]);
-        }
-        bus->name = buf;
-    }
-
-    if (bus->parent) {
-        QLIST_INSERT_HEAD(&bus->parent->child_bus, bus, sibling);
-        bus->parent->num_child_bus++;
-        object_property_add_child(OBJECT(bus->parent), bus->name, OBJECT(bus), NULL);
-        object_unref(OBJECT(bus));
-    } else if (bus != sysbus_get_default()) {
-        /* TODO: once all bus devices are qdevified,
-           only reset handler for main_system_bus should be registered here. */
-        qemu_register_reset(qbus_reset_all_fn, bus);
-    }
-}
-
-static void bus_unparent(Object *obj)
-{
-    BusState *bus = BUS(obj);
-    BusChild *kid;
-
-    while ((kid = QTAILQ_FIRST(&bus->children)) != NULL) {
-        DeviceState *dev = kid->child;
-        object_unparent(OBJECT(dev));
-    }
-    if (bus->parent) {
-        QLIST_REMOVE(bus, sibling);
-        bus->parent->num_child_bus--;
-        bus->parent = NULL;
-    } else {
-        assert(bus != sysbus_get_default()); /* main_system_bus is never freed */
-        qemu_unregister_reset(qbus_reset_all_fn, bus);
-    }
-}
-
-static bool bus_get_realized(Object *obj, Error **errp)
-{
-    BusState *bus = BUS(obj);
-
-    return bus->realized;
-}
-
-static void bus_set_realized(Object *obj, bool value, Error **errp)
-{
-    BusState *bus = BUS(obj);
-    BusClass *bc = BUS_GET_CLASS(bus);
-    BusChild *kid;
-    Error *local_err = NULL;
-
-    if (value && !bus->realized) {
-        if (bc->realize) {
-            bc->realize(bus, &local_err);
-        }
-
-        /* TODO: recursive realization */
-    } else if (!value && bus->realized) {
-        QTAILQ_FOREACH(kid, &bus->children, sibling) {
-            DeviceState *dev = kid->child;
-            object_property_set_bool(OBJECT(dev), false, "realized",
-                                     &local_err);
-            if (local_err != NULL) {
-                break;
-            }
-        }
-        if (bc->unrealize && local_err == NULL) {
-            bc->unrealize(bus, &local_err);
-        }
-    }
-
-    if (local_err != NULL) {
-        error_propagate(errp, local_err);
-        return;
-    }
-
-    bus->realized = value;
-}
-
-void qbus_create_inplace(void *bus, size_t size, const char *typename,
-                         DeviceState *parent, const char *name)
-{
-    object_initialize(bus, size, typename);
-    qbus_realize(bus, parent, name);
-}
-
-BusState *qbus_create(const char *typename, DeviceState *parent, const char *name)
-{
-    BusState *bus;
-
-    bus = BUS(object_new(typename));
-    qbus_realize(bus, parent, name);
-
-    return bus;
-}
-
 static char *bus_get_fw_dev_path(BusState *bus, DeviceState *dev)
 {
     BusClass *bc = BUS_GET_CLASS(bus);
@@ -1315,55 +1140,8 @@ static const TypeInfo device_type_info = {
     .class_size = sizeof(DeviceClass),
 };
 
-static void qbus_initfn(Object *obj)
-{
-    BusState *bus = BUS(obj);
-
-    QTAILQ_INIT(&bus->children);
-    object_property_add_link(obj, QDEV_HOTPLUG_HANDLER_PROPERTY,
-                             TYPE_HOTPLUG_HANDLER,
-                             (Object **)&bus->hotplug_handler,
-                             object_property_allow_set_link,
-                             OBJ_PROP_LINK_UNREF_ON_RELEASE,
-                             NULL);
-    object_property_add_bool(obj, "realized",
-                             bus_get_realized, bus_set_realized, NULL);
-}
-
-static char *default_bus_get_fw_dev_path(DeviceState *dev)
-{
-    return g_strdup(object_get_typename(OBJECT(dev)));
-}
-
-static void bus_class_init(ObjectClass *class, void *data)
-{
-    BusClass *bc = BUS_CLASS(class);
-
-    class->unparent = bus_unparent;
-    bc->get_fw_dev_path = default_bus_get_fw_dev_path;
-}
-
-static void qbus_finalize(Object *obj)
-{
-    BusState *bus = BUS(obj);
-
-    g_free((char *)bus->name);
-}
-
-static const TypeInfo bus_info = {
-    .name = TYPE_BUS,
-    .parent = TYPE_OBJECT,
-    .instance_size = sizeof(BusState),
-    .abstract = true,
-    .class_size = sizeof(BusClass),
-    .instance_init = qbus_initfn,
-    .instance_finalize = qbus_finalize,
-    .class_init = bus_class_init,
-};
-
 static void qdev_register_types(void)
 {
-    type_register_static(&bus_info);
     type_register_static(&device_type_info);
 }
 
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index f244bc01c9..502d4f1c7b 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -1006,7 +1006,8 @@ static void execute_ncq_command(NCQTransferState *ncq_tfs)
         dma_acct_start(ide_state->blk, &ncq_tfs->acct,
                        &ncq_tfs->sglist, BLOCK_ACCT_READ);
         ncq_tfs->aiocb = dma_blk_read(ide_state->blk, &ncq_tfs->sglist,
-                                      ncq_tfs->lba, ncq_cb, ncq_tfs);
+                                      ncq_tfs->lba << BDRV_SECTOR_BITS,
+                                      ncq_cb, ncq_tfs);
         break;
     case WRITE_FPDMA_QUEUED:
         DPRINTF(port, "NCQ writing %d sectors to LBA %"PRId64", tag %d\n",
@@ -1018,7 +1019,8 @@ static void execute_ncq_command(NCQTransferState *ncq_tfs)
         dma_acct_start(ide_state->blk, &ncq_tfs->acct,
                        &ncq_tfs->sglist, BLOCK_ACCT_WRITE);
         ncq_tfs->aiocb = dma_blk_write(ide_state->blk, &ncq_tfs->sglist,
-                                       ncq_tfs->lba, ncq_cb, ncq_tfs);
+                                       ncq_tfs->lba << BDRV_SECTOR_BITS,
+                                       ncq_cb, ncq_tfs);
         break;
     default:
         DPRINTF(port, "error: unsupported NCQ command (0x%02x) received\n",
diff --git a/hw/ide/core.c b/hw/ide/core.c
index fe2bfba489..029f6b9b12 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -441,13 +441,14 @@ static void ide_issue_trim_cb(void *opaque, int ret)
     }
 }
 
-BlockAIOCB *ide_issue_trim(BlockBackend *blk,
-        int64_t offset, QEMUIOVector *qiov, BdrvRequestFlags flags,
-        BlockCompletionFunc *cb, void *opaque)
+BlockAIOCB *ide_issue_trim(
+        int64_t offset, QEMUIOVector *qiov,
+        BlockCompletionFunc *cb, void *cb_opaque, void *opaque)
 {
+    BlockBackend *blk = opaque;
     TrimAIOCB *iocb;
 
-    iocb = blk_aio_get(&trim_aiocb_info, blk, cb, opaque);
+    iocb = blk_aio_get(&trim_aiocb_info, blk, cb, cb_opaque);
     iocb->blk = blk;
     iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb);
     iocb->ret = 0;
@@ -799,6 +800,7 @@ static void ide_dma_cb(void *opaque, int ret)
     IDEState *s = opaque;
     int n;
     int64_t sector_num;
+    uint64_t offset;
     bool stay_active = false;
 
     if (ret == -ECANCELED) {
@@ -859,18 +861,20 @@ static void ide_dma_cb(void *opaque, int ret)
         return;
     }
 
+    offset = sector_num << BDRV_SECTOR_BITS;
     switch (s->dma_cmd) {
     case IDE_DMA_READ:
-        s->bus->dma->aiocb = dma_blk_read(s->blk, &s->sg, sector_num,
+        s->bus->dma->aiocb = dma_blk_read(s->blk, &s->sg, offset,
                                           ide_dma_cb, s);
         break;
     case IDE_DMA_WRITE:
-        s->bus->dma->aiocb = dma_blk_write(s->blk, &s->sg, sector_num,
+        s->bus->dma->aiocb = dma_blk_write(s->blk, &s->sg, offset,
                                            ide_dma_cb, s);
         break;
     case IDE_DMA_TRIM:
-        s->bus->dma->aiocb = dma_blk_io(s->blk, &s->sg, sector_num,
-                                        ide_issue_trim, ide_dma_cb, s,
+        s->bus->dma->aiocb = dma_blk_io(blk_get_aio_context(s->blk),
+                                        &s->sg, offset,
+                                        ide_issue_trim, s->blk, ide_dma_cb, s,
                                         DMA_DIRECTION_TO_DEVICE);
         break;
     default:
diff --git a/hw/ide/internal.h b/hw/ide/internal.h
index ceb9e5994a..773928af77 100644
--- a/hw/ide/internal.h
+++ b/hw/ide/internal.h
@@ -613,9 +613,9 @@ void ide_transfer_start(IDEState *s, uint8_t *buf, int size,
                         EndTransferFunc *end_transfer_func);
 void ide_transfer_stop(IDEState *s);
 void ide_set_inactive(IDEState *s, bool more);
-BlockAIOCB *ide_issue_trim(BlockBackend *blk,
-        int64_t offset, QEMUIOVector *qiov, BdrvRequestFlags flags,
-        BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *ide_issue_trim(
+        int64_t offset, QEMUIOVector *qiov,
+        BlockCompletionFunc *cb, void *cb_opaque, void *opaque);
 BlockAIOCB *ide_buffered_readv(IDEState *s, int64_t sector_num,
                                QEMUIOVector *iov, int nb_sectors,
                                BlockCompletionFunc *cb, void *opaque);
diff --git a/hw/ide/macio.c b/hw/ide/macio.c
index d7d9c0ff3a..42ad68a1c0 100644
--- a/hw/ide/macio.c
+++ b/hw/ide/macio.c
@@ -230,7 +230,7 @@ static void pmac_dma_trim(BlockBackend *blk,
     s->io_buffer_index += io->len;
     io->len = 0;
 
-    s->bus->dma->aiocb = ide_issue_trim(blk, offset, &io->iov, 0, cb, io);
+    s->bus->dma->aiocb = ide_issue_trim(offset, &io->iov, cb, io, blk);
 }
 
 static void pmac_ide_atapi_transfer_cb(void *opaque, int ret)
diff --git a/hw/net/spapr_llan.c b/hw/net/spapr_llan.c
index a8266f8ec7..8b2eebd4e3 100644
--- a/hw/net/spapr_llan.c
+++ b/hw/net/spapr_llan.c
@@ -110,6 +110,7 @@ typedef struct VIOsPAPRVLANDevice {
     hwaddr buf_list;
     uint32_t add_buf_ptr, use_buf_ptr, rx_bufs;
     hwaddr rxq_ptr;
+    QEMUTimer *rxp_timer;
     uint32_t compat_flags;             /* Compatability flags for migration */
     RxBufPool *rx_pool[RX_MAX_POOLS];  /* Receive buffer descriptor pools */
 } VIOsPAPRVLANDevice;
@@ -122,6 +123,21 @@ static int spapr_vlan_can_receive(NetClientState *nc)
 }
 
 /**
+ * The last 8 bytes of the receive buffer list page (that has been
+ * supplied by the guest with the H_REGISTER_LOGICAL_LAN call) contain
+ * a counter for frames that have been dropped because there was no
+ * suitable receive buffer available. This function is used to increase
+ * this counter by one.
+ */
+static void spapr_vlan_record_dropped_rx_frame(VIOsPAPRVLANDevice *dev)
+{
+    uint64_t cnt;
+
+    cnt = vio_ldq(&dev->sdev, dev->buf_list + 4096 - 8);
+    vio_stq(&dev->sdev, dev->buf_list + 4096 - 8, cnt + 1);
+}
+
+/**
  * Get buffer descriptor from one of our receive buffer pools
  */
 static vlan_bd_t spapr_vlan_get_rx_bd_from_pool(VIOsPAPRVLANDevice *dev,
@@ -206,7 +222,8 @@ static ssize_t spapr_vlan_receive(NetClientState *nc, const uint8_t *buf,
     }
 
     if (!dev->rx_bufs) {
-        return -1;
+        spapr_vlan_record_dropped_rx_frame(dev);
+        return 0;
     }
 
     if (dev->compat_flags & SPAPRVLAN_FLAG_RX_BUF_POOLS) {
@@ -215,7 +232,8 @@ static ssize_t spapr_vlan_receive(NetClientState *nc, const uint8_t *buf,
         bd = spapr_vlan_get_rx_bd_from_page(dev, size);
     }
     if (!bd) {
-        return -1;
+        spapr_vlan_record_dropped_rx_frame(dev);
+        return 0;
     }
 
     dev->rx_bufs--;
@@ -266,6 +284,13 @@ static NetClientInfo net_spapr_vlan_info = {
     .receive = spapr_vlan_receive,
 };
 
+static void spapr_vlan_flush_rx_queue(void *opaque)
+{
+    VIOsPAPRVLANDevice *dev = opaque;
+
+    qemu_flush_queued_packets(qemu_get_queue(dev->nic));
+}
+
 static void spapr_vlan_reset_rx_pool(RxBufPool *rxp)
 {
     /*
@@ -302,6 +327,9 @@ static void spapr_vlan_realize(VIOsPAPRDevice *sdev, Error **errp)
     dev->nic = qemu_new_nic(&net_spapr_vlan_info, &dev->nicconf,
                             object_get_typename(OBJECT(sdev)), sdev->qdev.id, dev);
     qemu_format_nic_info_str(qemu_get_queue(dev->nic), dev->nicconf.macaddr.a);
+
+    dev->rxp_timer = timer_new_us(QEMU_CLOCK_VIRTUAL, spapr_vlan_flush_rx_queue,
+                                  dev);
 }
 
 static void spapr_vlan_instance_init(Object *obj)
@@ -332,6 +360,11 @@ static void spapr_vlan_instance_finalize(Object *obj)
             dev->rx_pool[i] = NULL;
         }
     }
+
+    if (dev->rxp_timer) {
+        timer_del(dev->rxp_timer);
+        timer_free(dev->rxp_timer);
+    }
 }
 
 void spapr_vlan_create(VIOsPAPRBus *bus, NICInfo *nd)
@@ -629,7 +662,13 @@ static target_ulong h_add_logical_lan_buffer(PowerPCCPU *cpu,
 
     dev->rx_bufs++;
 
-    qemu_flush_queued_packets(qemu_get_queue(dev->nic));
+    /*
+     * Give guest some more time to add additional RX buffers before we
+     * flush the receive queue, so that e.g. fragmented IP packets can
+     * be passed to the guest in one go later (instead of passing single
+     * fragments if there is only one receive buffer available).
+     */
+    timer_mod(dev->rxp_timer, qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + 500);
 
     return H_SUCCESS;
 }
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index add68acfef..44e401ae99 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1842,6 +1842,10 @@ static void ppc_spapr_init(MachineState *machine)
         exit(1);
     }
     spapr->rtas_size = get_image_size(filename);
+    if (spapr->rtas_size < 0) {
+        error_report("Could not get size of LPAR rtas '%s'", filename);
+        exit(1);
+    }
     spapr->rtas_blob = g_malloc(spapr->rtas_size);
     if (load_image_size(filename, spapr->rtas_blob, spapr->rtas_size) < 0) {
         error_report("Could not load LPAR rtas '%s'", filename);
@@ -2132,15 +2136,6 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr, uint64_t size,
     int i, fdt_offset, fdt_size;
     void *fdt;
 
-    /*
-     * Check for DRC connectors and send hotplug notification to the
-     * guest only in case of hotplugged memory. This allows cold plugged
-     * memory to be specified at boot time.
-     */
-    if (!dev->hotplugged) {
-        return;
-    }
-
     for (i = 0; i < nr_lmbs; i++) {
         drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB,
                 addr/SPAPR_MEMORY_BLOCK_SIZE);
@@ -2154,7 +2149,12 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr, uint64_t size,
         drck->attach(drc, dev, fdt, fdt_offset, !dev->hotplugged, errp);
         addr += SPAPR_MEMORY_BLOCK_SIZE;
     }
-    spapr_hotplug_req_add_by_count(SPAPR_DR_CONNECTOR_TYPE_LMB, nr_lmbs);
+    /* send hotplug notification to the
+     * guest only in case of hotplugged memory
+     */
+    if (dev->hotplugged) {
+       spapr_hotplug_req_add_by_count(SPAPR_DR_CONNECTOR_TYPE_LMB, nr_lmbs);
+    }
 }
 
 static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 722db91ffa..96bb0181a7 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -76,6 +76,37 @@ static IOMMUAccessFlags spapr_tce_iommu_access_flags(uint64_t tce)
     }
 }
 
+static uint64_t *spapr_tce_alloc_table(uint32_t liobn,
+                                       uint32_t page_shift,
+                                       uint32_t nb_table,
+                                       int *fd,
+                                       bool need_vfio)
+{
+    uint64_t *table = NULL;
+    uint64_t window_size = (uint64_t)nb_table << page_shift;
+
+    if (kvm_enabled() && !(window_size >> 32)) {
+        table = kvmppc_create_spapr_tce(liobn, window_size, fd, need_vfio);
+    }
+
+    if (!table) {
+        *fd = -1;
+        table = g_malloc0(nb_table * sizeof(uint64_t));
+    }
+
+    trace_spapr_iommu_new_table(liobn, table, *fd);
+
+    return table;
+}
+
+static void spapr_tce_free_table(uint64_t *table, int fd, uint32_t nb_table)
+{
+    if (!kvm_enabled() ||
+        (kvmppc_remove_spapr_tce(table, fd, nb_table) != 0)) {
+        g_free(table);
+    }
+}
+
 /* Called from RCU critical section */
 static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr,
                                                bool is_write)
@@ -142,21 +173,13 @@ static MemoryRegionIOMMUOps spapr_iommu_ops = {
 static int spapr_tce_table_realize(DeviceState *dev)
 {
     sPAPRTCETable *tcet = SPAPR_TCE_TABLE(dev);
-    uint64_t window_size = (uint64_t)tcet->nb_table << tcet->page_shift;
-
-    if (kvm_enabled() && !(window_size >> 32)) {
-        tcet->table = kvmppc_create_spapr_tce(tcet->liobn,
-                                              window_size,
-                                              &tcet->fd,
-                                              tcet->need_vfio);
-    }
 
-    if (!tcet->table) {
-        size_t table_size = tcet->nb_table * sizeof(uint64_t);
-        tcet->table = g_malloc0(table_size);
-    }
-
-    trace_spapr_iommu_new_table(tcet->liobn, tcet, tcet->table, tcet->fd);
+    tcet->fd = -1;
+    tcet->table = spapr_tce_alloc_table(tcet->liobn,
+                                        tcet->page_shift,
+                                        tcet->nb_table,
+                                        &tcet->fd,
+                                        tcet->need_vfio);
 
     memory_region_init_iommu(&tcet->iommu, OBJECT(dev), &spapr_iommu_ops,
                              "iommu-spapr",
@@ -242,11 +265,8 @@ static void spapr_tce_table_unrealize(DeviceState *dev, Error **errp)
 
     QLIST_REMOVE(tcet, list);
 
-    if (!kvm_enabled() ||
-        (kvmppc_remove_spapr_tce(tcet->table, tcet->fd,
-                                 tcet->nb_table) != 0)) {
-        g_free(tcet->table);
-    }
+    spapr_tce_free_table(tcet->table, tcet->fd, tcet->nb_table);
+    tcet->fd = -1;
 }
 
 MemoryRegion *spapr_tce_get_iommu(sPAPRTCETable *tcet)
@@ -278,7 +298,7 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
     tcet->table[index] = tce;
 
     entry.target_as = &address_space_memory,
-    entry.iova = ioba & page_mask;
+    entry.iova = (ioba - tcet->bus_offset) & page_mask;
     entry.translated_addr = tce & page_mask;
     entry.addr_mask = ~page_mask;
     entry.perm = spapr_tce_iommu_access_flags(tce);
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index e55b505c96..856aec7f51 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1093,13 +1093,11 @@ static void spapr_phb_add_pci_device(sPAPRDRConnector *drc,
         spapr_tce_set_need_vfio(tcet, true);
     }
 
-    if (dev->hotplugged) {
-        fdt = create_device_tree(&fdt_size);
-        fdt_start_offset = spapr_create_pci_child_dt(phb, pdev, fdt, 0);
-        if (!fdt_start_offset) {
-            error_setg(errp, "Failed to create pci child device tree node");
-            goto out;
-        }
+    fdt = create_device_tree(&fdt_size);
+    fdt_start_offset = spapr_create_pci_child_dt(phb, pdev, fdt, 0);
+    if (!fdt_start_offset) {
+        error_setg(errp, "Failed to create pci child device tree node");
+        goto out;
     }
 
     drck->attach(drc, DEVICE(pdev),
@@ -1816,7 +1814,7 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
     _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
                      sizeof(interrupt_map)));
 
-    tcet = spapr_tce_find_by_liobn(SPAPR_PCI_LIOBN(phb->index, 0));
+    tcet = spapr_tce_find_by_liobn(phb->dma_liobn);
     if (!tcet) {
         return -1;
     }
diff --git a/hw/s390x/s390-skeys.c b/hw/s390x/s390-skeys.c
index d772cfc7ea..e2d4e1af79 100644
--- a/hw/s390x/s390-skeys.c
+++ b/hw/s390x/s390-skeys.c
@@ -47,15 +47,11 @@ void s390_skeys_init(void)
     qdev_init_nofail(DEVICE(obj));
 }
 
-static void write_keys(QEMUFile *f, uint8_t *keys, uint64_t startgfn,
+static void write_keys(FILE *f, uint8_t *keys, uint64_t startgfn,
                        uint64_t count, Error **errp)
 {
     uint64_t curpage = startgfn;
     uint64_t maxpage = curpage + count - 1;
-    const char *fmt = "page=%03" PRIx64 ": key(%d) => ACC=%X, FP=%d, REF=%d,"
-                      " ch=%d, reserved=%d\n";
-    char buf[128];
-    int len;
 
     for (; curpage <= maxpage; curpage++) {
         uint8_t acc = (*keys & 0xF0) >> 4;
@@ -64,10 +60,9 @@ static void write_keys(QEMUFile *f, uint8_t *keys, uint64_t startgfn,
         int ch = (*keys & 0x02);
         int res = (*keys & 0x01);
 
-        len = snprintf(buf, sizeof(buf), fmt, curpage,
-                       *keys, acc, fp, ref, ch, res);
-        assert(len < sizeof(buf));
-        qemu_put_buffer(f, (uint8_t *)buf, len);
+        fprintf(f, "page=%03" PRIx64 ": key(%d) => ACC=%X, FP=%d, REF=%d,"
+                " ch=%d, reserved=%d\n",
+                curpage, *keys, acc, fp, ref, ch, res);
         keys++;
     }
 }
@@ -116,7 +111,8 @@ void qmp_dump_skeys(const char *filename, Error **errp)
     vaddr cur_gfn = 0;
     uint8_t *buf;
     int ret;
-    QEMUFile *f;
+    int fd;
+    FILE *f;
 
     /* Quick check to see if guest is using storage keys*/
     if (!skeyclass->skeys_enabled(ss)) {
@@ -125,8 +121,14 @@ void qmp_dump_skeys(const char *filename, Error **errp)
         return;
     }
 
-    f = qemu_fopen(filename, "wb");
+    fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0600);
+    if (fd < 0) {
+        error_setg_file_open(errp, errno, filename);
+        return;
+    }
+    f = fdopen(fd, "wb");
     if (!f) {
+        close(fd);
         error_setg_file_open(errp, errno, filename);
         return;
     }
@@ -162,7 +164,7 @@ out_free:
     error_propagate(errp, lerr);
     g_free(buf);
 out:
-    qemu_fclose(f);
+    fclose(f);
 }
 
 static void qemu_s390_skeys_init(Object *obj)
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index ce89c98b4e..8865da53e8 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -335,7 +335,8 @@ static void scsi_do_read(SCSIDiskReq *r, int ret)
     if (r->req.sg) {
         dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_READ);
         r->req.resid -= r->req.sg->size;
-        r->req.aiocb = dma_blk_read(s->qdev.conf.blk, r->req.sg, r->sector,
+        r->req.aiocb = dma_blk_read(s->qdev.conf.blk, r->req.sg,
+                                    r->sector << BDRV_SECTOR_BITS,
                                     scsi_dma_complete, r);
     } else {
         scsi_init_iovec(r, SCSI_DMA_BUF_SIZE);
@@ -539,7 +540,8 @@ static void scsi_write_data(SCSIRequest *req)
     if (r->req.sg) {
         dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_WRITE);
         r->req.resid -= r->req.sg->size;
-        r->req.aiocb = dma_blk_write(s->qdev.conf.blk, r->req.sg, r->sector,
+        r->req.aiocb = dma_blk_write(s->qdev.conf.blk, r->req.sg,
+                                     r->sector << BDRV_SECTOR_BITS,
                                      scsi_dma_complete, r);
     } else {
         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
@@ -1778,7 +1780,7 @@ static void scsi_disk_emulate_write_same(SCSIDiskReq *r, uint8_t *inbuf)
         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
                          nb_sectors * s->qdev.blocksize,
                         BLOCK_ACCT_WRITE);
-        r->req.aiocb = blk_aio_write_zeroes(s->qdev.conf.blk,
+        r->req.aiocb = blk_aio_pwrite_zeroes(s->qdev.conf.blk,
                                 r->req.cmd.lba * s->qdev.blocksize,
                                 nb_sectors * s->qdev.blocksize,
                                 flags, scsi_aio_complete, r);
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 88154a1f03..e51ed3a348 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -260,14 +260,20 @@ static void vfio_iommu_map_notify(Notifier *n, void *data)
     VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
     VFIOContainer *container = giommu->container;
     IOMMUTLBEntry *iotlb = data;
+    hwaddr iova = iotlb->iova + giommu->iommu_offset;
     MemoryRegion *mr;
     hwaddr xlat;
     hwaddr len = iotlb->addr_mask + 1;
     void *vaddr;
     int ret;
 
-    trace_vfio_iommu_map_notify(iotlb->iova,
-                                iotlb->iova + iotlb->addr_mask);
+    trace_vfio_iommu_map_notify(iova, iova + iotlb->addr_mask);
+
+    if (iotlb->target_as != &address_space_memory) {
+        error_report("Wrong target AS \"%s\", only system memory is allowed",
+                     iotlb->target_as->name ? iotlb->target_as->name : "none");
+        return;
+    }
 
     /*
      * The IOMMU TLB entry we have just covers translation through
@@ -294,21 +300,21 @@ static void vfio_iommu_map_notify(Notifier *n, void *data)
 
     if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
         vaddr = memory_region_get_ram_ptr(mr) + xlat;
-        ret = vfio_dma_map(container, iotlb->iova,
+        ret = vfio_dma_map(container, iova,
                            iotlb->addr_mask + 1, vaddr,
                            !(iotlb->perm & IOMMU_WO) || mr->readonly);
         if (ret) {
             error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
                          "0x%"HWADDR_PRIx", %p) = %d (%m)",
-                         container, iotlb->iova,
+                         container, iova,
                          iotlb->addr_mask + 1, vaddr, ret);
         }
     } else {
-        ret = vfio_dma_unmap(container, iotlb->iova, iotlb->addr_mask + 1);
+        ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1);
         if (ret) {
             error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
                          "0x%"HWADDR_PRIx") = %d (%m)",
-                         container, iotlb->iova,
+                         container, iova,
                          iotlb->addr_mask + 1, ret);
         }
     }
@@ -380,6 +386,8 @@ static void vfio_listener_region_add(MemoryListener *listener,
          */
         giommu = g_malloc0(sizeof(*giommu));
         giommu->iommu = section->mr;
+        giommu->iommu_offset = section->offset_within_address_space -
+                               section->offset_within_region;
         giommu->container = container;
         giommu->n.notify = vfio_iommu_map_notify;
         QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
@@ -433,6 +441,7 @@ static void vfio_listener_region_del(MemoryListener *listener,
 {
     VFIOContainer *container = container_of(listener, VFIOContainer, listener);
     hwaddr iova, end;
+    Int128 llend, llsize;
     int ret;
 
     if (vfio_listener_skipped_section(section)) {
@@ -471,21 +480,25 @@ static void vfio_listener_region_del(MemoryListener *listener,
     }
 
     iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
-    end = (section->offset_within_address_space + int128_get64(section->size)) &
-          TARGET_PAGE_MASK;
+    llend = int128_make64(section->offset_within_address_space);
+    llend = int128_add(llend, section->size);
+    llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
 
-    if (iova >= end) {
+    if (int128_ge(int128_make64(iova), llend)) {
         return;
     }
+    end = int128_get64(int128_sub(llend, int128_one()));
+
+    llsize = int128_sub(llend, int128_make64(iova));
 
-    trace_vfio_listener_region_del(iova, end - 1);
+    trace_vfio_listener_region_del(iova, end);
 
-    ret = vfio_dma_unmap(container, iova, end - iova);
+    ret = vfio_dma_unmap(container, iova, int128_get64(llsize));
     memory_region_unref(section->mr);
     if (ret) {
         error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
                      "0x%"HWADDR_PRIx") = %d (%m)",
-                     container, iova, end - iova, ret);
+                     container, iova, int128_get64(llsize), ret);
     }
 }
 
@@ -499,6 +512,54 @@ static void vfio_listener_release(VFIOContainer *container)
     memory_listener_unregister(&container->listener);
 }
 
+static struct vfio_info_cap_header *
+vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id)
+{
+    struct vfio_info_cap_header *hdr;
+    void *ptr = info;
+
+    if (!(info->flags & VFIO_REGION_INFO_FLAG_CAPS)) {
+        return NULL;
+    }
+
+    for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) {
+        if (hdr->id == id) {
+            return hdr;
+        }
+    }
+
+    return NULL;
+}
+
+static void vfio_setup_region_sparse_mmaps(VFIORegion *region,
+                                           struct vfio_region_info *info)
+{
+    struct vfio_info_cap_header *hdr;
+    struct vfio_region_info_cap_sparse_mmap *sparse;
+    int i;
+
+    hdr = vfio_get_region_info_cap(info, VFIO_REGION_INFO_CAP_SPARSE_MMAP);
+    if (!hdr) {
+        return;
+    }
+
+    sparse = container_of(hdr, struct vfio_region_info_cap_sparse_mmap, header);
+
+    trace_vfio_region_sparse_mmap_header(region->vbasedev->name,
+                                         region->nr, sparse->nr_areas);
+
+    region->nr_mmaps = sparse->nr_areas;
+    region->mmaps = g_new0(VFIOMmap, region->nr_mmaps);
+
+    for (i = 0; i < region->nr_mmaps; i++) {
+        region->mmaps[i].offset = sparse->areas[i].offset;
+        region->mmaps[i].size = sparse->areas[i].size;
+        trace_vfio_region_sparse_mmap_entry(i, region->mmaps[i].offset,
+                                            region->mmaps[i].offset +
+                                            region->mmaps[i].size);
+    }
+}
+
 int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
                       int index, const char *name)
 {
@@ -525,11 +586,14 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
             region->flags & VFIO_REGION_INFO_FLAG_MMAP &&
             !(region->size & ~qemu_real_host_page_mask)) {
 
-            region->nr_mmaps = 1;
-            region->mmaps = g_new0(VFIOMmap, region->nr_mmaps);
+            vfio_setup_region_sparse_mmaps(region, info);
 
-            region->mmaps[0].offset = 0;
-            region->mmaps[0].size = region->size;
+            if (!region->nr_mmaps) {
+                region->nr_mmaps = 1;
+                region->mmaps = g_new0(VFIOMmap, region->nr_mmaps);
+                region->mmaps[0].offset = 0;
+                region->mmaps[0].size = region->size;
+            }
         }
     }
 
@@ -1089,16 +1153,60 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index,
     *info = g_malloc0(argsz);
 
     (*info)->index = index;
+retry:
     (*info)->argsz = argsz;
 
     if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info)) {
         g_free(*info);
+        *info = NULL;
         return -errno;
     }
 
+    if ((*info)->argsz > argsz) {
+        argsz = (*info)->argsz;
+        *info = g_realloc(*info, argsz);
+
+        goto retry;
+    }
+
     return 0;
 }
 
+int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type,
+                             uint32_t subtype, struct vfio_region_info **info)
+{
+    int i;
+
+    for (i = 0; i < vbasedev->num_regions; i++) {
+        struct vfio_info_cap_header *hdr;
+        struct vfio_region_info_cap_type *cap_type;
+
+        if (vfio_get_region_info(vbasedev, i, info)) {
+            continue;
+        }
+
+        hdr = vfio_get_region_info_cap(*info, VFIO_REGION_INFO_CAP_TYPE);
+        if (!hdr) {
+            g_free(*info);
+            continue;
+        }
+
+        cap_type = container_of(hdr, struct vfio_region_info_cap_type, header);
+
+        trace_vfio_get_dev_region(vbasedev->name, i,
+                                  cap_type->type, cap_type->subtype);
+
+        if (cap_type->type == type && cap_type->subtype == subtype) {
+            return 0;
+        }
+
+        g_free(*info);
+    }
+
+    *info = NULL;
+    return -ENODEV;
+}
+
 /*
  * Interfaces for IBM EEH (Enhanced Error Handling)
  */
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index 49ecf1172a..35d32b78f4 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -11,9 +11,12 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qemu/range.h"
+#include "qapi/error.h"
+#include "hw/nvram/fw_cfg.h"
 #include "pci.h"
 #include "trace.h"
-#include "qemu/range.h"
 
 /* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */
 static bool vfio_pci_is(VFIOPCIDevice *vdev, uint32_t vendor, uint32_t device)
@@ -962,6 +965,643 @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr)
 }
 
 /*
+ * Intel IGD support
+ *
+ * Obviously IGD is not a discrete device, this is evidenced not only by it
+ * being integrated into the CPU, but by the various chipset and BIOS
+ * dependencies that it brings along with it.  Intel is trying to move away
+ * from this and Broadwell and newer devices can run in what Intel calls
+ * "Universal Pass-Through" mode, or UPT.  Theoretically in UPT mode, nothing
+ * more is required beyond assigning the IGD device to a VM.  There are
+ * however support limitations to this mode.  It only supports IGD as a
+ * secondary graphics device in the VM and it doesn't officially support any
+ * physical outputs.
+ *
+ * The code here attempts to enable what we'll call legacy mode assignment,
+ * IGD retains most of the capabilities we expect for it to have on bare
+ * metal.  To enable this mode, the IGD device must be assigned to the VM
+ * at PCI address 00:02.0, it must have a ROM, it very likely needs VGA
+ * support, we must have VM BIOS support for reserving and populating some
+ * of the required tables, and we need to tweak the chipset with revisions
+ * and IDs and an LPC/ISA bridge device.  The intention is to make all of
+ * this happen automatically by installing the device at the correct VM PCI
+ * bus address.  If any of the conditions are not met, we cross our fingers
+ * and hope the user knows better.
+ *
+ * NB - It is possible to enable physical outputs in UPT mode by supplying
+ * an OpRegion table.  We don't do this by default because the guest driver
+ * behaves differently if an OpRegion is provided and no monitor is attached
+ * vs no OpRegion and a monitor being attached or not.  Effectively, if a
+ * headless setup is desired, the OpRegion gets in the way of that.
+ */
+
+/*
+ * This presumes the device is already known to be an Intel VGA device, so we
+ * take liberties in which device ID bits match which generation.  This should
+ * not be taken as an indication that all the devices are supported, or even
+ * supportable, some of them don't even support VT-d.
+ * See linux:include/drm/i915_pciids.h for IDs.
+ */
+static int igd_gen(VFIOPCIDevice *vdev)
+{
+    if ((vdev->device_id & 0xfff) == 0xa84) {
+        return 8; /* Broxton */
+    }
+
+    switch (vdev->device_id & 0xff00) {
+    /* Old, untested, unavailable, unknown */
+    case 0x0000:
+    case 0x2500:
+    case 0x2700:
+    case 0x2900:
+    case 0x2a00:
+    case 0x2e00:
+    case 0x3500:
+    case 0xa000:
+        return -1;
+    /* SandyBridge, IvyBridge, ValleyView, Haswell */
+    case 0x0100:
+    case 0x0400:
+    case 0x0a00:
+    case 0x0c00:
+    case 0x0d00:
+    case 0x0f00:
+        return 6;
+    /* BroadWell, CherryView, SkyLake, KabyLake */
+    case 0x1600:
+    case 0x1900:
+    case 0x2200:
+    case 0x5900:
+        return 8;
+    }
+
+    return 8; /* Assume newer is compatible */
+}
+
+typedef struct VFIOIGDQuirk {
+    struct VFIOPCIDevice *vdev;
+    uint32_t index;
+} VFIOIGDQuirk;
+
+#define IGD_GMCH 0x50 /* Graphics Control Register */
+#define IGD_BDSM 0x5c /* Base Data of Stolen Memory */
+#define IGD_ASLS 0xfc /* ASL Storage Register */
+
+/*
+ * The OpRegion includes the Video BIOS Table, which seems important for
+ * telling the driver what sort of outputs it has.  Without this, the device
+ * may work in the guest, but we may not get output.  This also requires BIOS
+ * support to reserve and populate a section of guest memory sufficient for
+ * the table and to write the base address of that memory to the ASLS register
+ * of the IGD device.
+ */
+int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
+                               struct vfio_region_info *info)
+{
+    int ret;
+
+    vdev->igd_opregion = g_malloc0(info->size);
+    ret = pread(vdev->vbasedev.fd, vdev->igd_opregion,
+                info->size, info->offset);
+    if (ret != info->size) {
+        error_report("vfio: Error reading IGD OpRegion");
+        g_free(vdev->igd_opregion);
+        vdev->igd_opregion = NULL;
+        return -EINVAL;
+    }
+
+    /*
+     * Provide fw_cfg with a copy of the OpRegion which the VM firmware is to
+     * allocate 32bit reserved memory for, copy these contents into, and write
+     * the reserved memory base address to the device ASLS register at 0xFC.
+     * Alignment of this reserved region seems flexible, but using a 4k page
+     * alignment seems to work well.  This interface assumes a single IGD
+     * device, which may be at VM address 00:02.0 in legacy mode or another
+     * address in UPT mode.
+     *
+     * NB, there may be future use cases discovered where the VM should have
+     * direct interaction with the host OpRegion, in which case the write to
+     * the ASLS register would trigger MemoryRegion setup to enable that.
+     */
+    fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion",
+                    vdev->igd_opregion, info->size);
+
+    trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name);
+
+    pci_set_long(vdev->pdev.config + IGD_ASLS, 0);
+    pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0);
+    pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0);
+
+    return 0;
+}
+
+/*
+ * The rather short list of registers that we copy from the host devices.
+ * The LPC/ISA bridge values are definitely needed to support the vBIOS, the
+ * host bridge values may or may not be needed depending on the guest OS.
+ * Since we're only munging revision and subsystem values on the host bridge,
+ * we don't require our own device.  The LPC/ISA bridge needs to be our very
+ * own though.
+ */
+typedef struct {
+    uint8_t offset;
+    uint8_t len;
+} IGDHostInfo;
+
+static const IGDHostInfo igd_host_bridge_infos[] = {
+    {PCI_REVISION_ID,         2},
+    {PCI_SUBSYSTEM_VENDOR_ID, 2},
+    {PCI_SUBSYSTEM_ID,        2},
+};
+
+static const IGDHostInfo igd_lpc_bridge_infos[] = {
+    {PCI_VENDOR_ID,           2},
+    {PCI_DEVICE_ID,           2},
+    {PCI_REVISION_ID,         2},
+    {PCI_SUBSYSTEM_VENDOR_ID, 2},
+    {PCI_SUBSYSTEM_ID,        2},
+};
+
+static int vfio_pci_igd_copy(VFIOPCIDevice *vdev, PCIDevice *pdev,
+                             struct vfio_region_info *info,
+                             const IGDHostInfo *list, int len)
+{
+    int i, ret;
+
+    for (i = 0; i < len; i++) {
+        ret = pread(vdev->vbasedev.fd, pdev->config + list[i].offset,
+                    list[i].len, info->offset + list[i].offset);
+        if (ret != list[i].len) {
+            error_report("IGD copy failed: %m");
+            return -errno;
+        }
+    }
+
+    return 0;
+}
+
+/*
+ * Stuff a few values into the host bridge.
+ */
+static int vfio_pci_igd_host_init(VFIOPCIDevice *vdev,
+                                  struct vfio_region_info *info)
+{
+    PCIBus *bus;
+    PCIDevice *host_bridge;
+    int ret;
+
+    bus = pci_device_root_bus(&vdev->pdev);
+    host_bridge = pci_find_device(bus, 0, PCI_DEVFN(0, 0));
+
+    if (!host_bridge) {
+        error_report("Can't find host bridge");
+        return -ENODEV;
+    }
+
+    ret = vfio_pci_igd_copy(vdev, host_bridge, info, igd_host_bridge_infos,
+                            ARRAY_SIZE(igd_host_bridge_infos));
+    if (!ret) {
+        trace_vfio_pci_igd_host_bridge_enabled(vdev->vbasedev.name);
+    }
+
+    return ret;
+}
+
+/*
+ * IGD LPC/ISA bridge support code.  The vBIOS needs this, but we can't write
+ * arbitrary values into just any bridge, so we must create our own.  We try
+ * to handle if the user has created it for us, which they might want to do
+ * to enable multifuction so we don't occupy the whole PCI slot.
+ */
+static void vfio_pci_igd_lpc_bridge_realize(PCIDevice *pdev, Error **errp)
+{
+    if (pdev->devfn != PCI_DEVFN(0x1f, 0)) {
+        error_setg(errp, "VFIO dummy ISA/LPC bridge must have address 1f.0");
+    }
+}
+
+static void vfio_pci_igd_lpc_bridge_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+
+    dc->desc = "VFIO dummy ISA/LPC bridge for IGD assignment";
+    dc->hotpluggable = false;
+    k->realize = vfio_pci_igd_lpc_bridge_realize;
+    k->class_id = PCI_CLASS_BRIDGE_ISA;
+}
+
+static TypeInfo vfio_pci_igd_lpc_bridge_info = {
+    .name = "vfio-pci-igd-lpc-bridge",
+    .parent = TYPE_PCI_DEVICE,
+    .class_init = vfio_pci_igd_lpc_bridge_class_init,
+};
+
+static void vfio_pci_igd_register_types(void)
+{
+    type_register_static(&vfio_pci_igd_lpc_bridge_info);
+}
+
+type_init(vfio_pci_igd_register_types)
+
+static int vfio_pci_igd_lpc_init(VFIOPCIDevice *vdev,
+                                 struct vfio_region_info *info)
+{
+    PCIDevice *lpc_bridge;
+    int ret;
+
+    lpc_bridge = pci_find_device(pci_device_root_bus(&vdev->pdev),
+                                 0, PCI_DEVFN(0x1f, 0));
+    if (!lpc_bridge) {
+        lpc_bridge = pci_create_simple(pci_device_root_bus(&vdev->pdev),
+                                 PCI_DEVFN(0x1f, 0), "vfio-pci-igd-lpc-bridge");
+    }
+
+    ret = vfio_pci_igd_copy(vdev, lpc_bridge, info, igd_lpc_bridge_infos,
+                            ARRAY_SIZE(igd_lpc_bridge_infos));
+    if (!ret) {
+        trace_vfio_pci_igd_lpc_bridge_enabled(vdev->vbasedev.name);
+    }
+
+    return ret;
+}
+
+/*
+ * IGD Gen8 and newer support up to 8MB for the GTT and use a 64bit PTE
+ * entry, older IGDs use 2MB and 32bit.  Each PTE maps a 4k page.  Therefore
+ * we either have 2M/4k * 4 = 2k or 8M/4k * 8 = 16k as the maximum iobar index
+ * for programming the GTT.
+ *
+ * See linux:include/drm/i915_drm.h for shift and mask values.
+ */
+static int vfio_igd_gtt_max(VFIOPCIDevice *vdev)
+{
+    uint32_t gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, sizeof(gmch));
+    int ggms, gen = igd_gen(vdev);
+
+    gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, sizeof(gmch));
+    ggms = (gmch >> (gen < 8 ? 8 : 6)) & 0x3;
+    if (gen > 6) {
+        ggms = 1 << ggms;
+    }
+
+    ggms *= 1024 * 1024;
+
+    return (ggms / (4 * 1024)) * (gen < 8 ? 4 : 8);
+}
+
+/*
+ * The IGD ROM will make use of stolen memory (GGMS) for support of VESA modes.
+ * Somehow the host stolen memory range is used for this, but how the ROM gets
+ * it is a mystery, perhaps it's hardcoded into the ROM.  Thankfully though, it
+ * reprograms the GTT through the IOBAR where we can trap it and transpose the
+ * programming to the VM allocated buffer.  That buffer gets reserved by the VM
+ * firmware via the fw_cfg entry added below.  Here we're just monitoring the
+ * IOBAR address and data registers to detect a write sequence targeting the
+ * GTTADR.  This code is developed by observed behavior and doesn't have a
+ * direct spec reference, unfortunately.
+ */
+static uint64_t vfio_igd_quirk_data_read(void *opaque,
+                                         hwaddr addr, unsigned size)
+{
+    VFIOIGDQuirk *igd = opaque;
+    VFIOPCIDevice *vdev = igd->vdev;
+
+    igd->index = ~0;
+
+    return vfio_region_read(&vdev->bars[4].region, addr + 4, size);
+}
+
+static void vfio_igd_quirk_data_write(void *opaque, hwaddr addr,
+                                      uint64_t data, unsigned size)
+{
+    VFIOIGDQuirk *igd = opaque;
+    VFIOPCIDevice *vdev = igd->vdev;
+    uint64_t val = data;
+    int gen = igd_gen(vdev);
+
+    /*
+     * Programming the GGMS starts at index 0x1 and uses every 4th index (ie.
+     * 0x1, 0x5, 0x9, 0xd,...).  For pre-Gen8 each 4-byte write is a whole PTE
+     * entry, with 0th bit enable set.  For Gen8 and up, PTEs are 64bit, so
+     * entries 0x5 & 0xd are the high dword, in our case zero.  Each PTE points
+     * to a 4k page, which we translate to a page from the VM allocated region,
+     * pointed to by the BDSM register.  If this is not set, we fail.
+     *
+     * We trap writes to the full configured GTT size, but we typically only
+     * see the vBIOS writing up to (nearly) the 1MB barrier.  In fact it often
+     * seems to miss the last entry for an even 1MB GTT.  Doing a gratuitous
+     * write of that last entry does work, but is hopefully unnecessary since
+     * we clear the previous GTT on initialization.
+     */
+    if ((igd->index % 4 == 1) && igd->index < vfio_igd_gtt_max(vdev)) {
+        if (gen < 8 || (igd->index % 8 == 1)) {
+            uint32_t base;
+
+            base = pci_get_long(vdev->pdev.config + IGD_BDSM);
+            if (!base) {
+                hw_error("vfio-igd: Guest attempted to program IGD GTT before "
+                         "BIOS reserved stolen memory.  Unsupported BIOS?");
+            }
+
+            val = base | (data & ((1 << 20) - 1));
+        } else {
+            val = 0; /* upper 32bits of pte, we only enable below 4G PTEs */
+        }
+
+        trace_vfio_pci_igd_bar4_write(vdev->vbasedev.name,
+                                      igd->index, data, val);
+    }
+
+    vfio_region_write(&vdev->bars[4].region, addr + 4, val, size);
+
+    igd->index = ~0;
+}
+
+static const MemoryRegionOps vfio_igd_data_quirk = {
+    .read = vfio_igd_quirk_data_read,
+    .write = vfio_igd_quirk_data_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static uint64_t vfio_igd_quirk_index_read(void *opaque,
+                                          hwaddr addr, unsigned size)
+{
+    VFIOIGDQuirk *igd = opaque;
+    VFIOPCIDevice *vdev = igd->vdev;
+
+    igd->index = ~0;
+
+    return vfio_region_read(&vdev->bars[4].region, addr, size);
+}
+
+static void vfio_igd_quirk_index_write(void *opaque, hwaddr addr,
+                                       uint64_t data, unsigned size)
+{
+    VFIOIGDQuirk *igd = opaque;
+    VFIOPCIDevice *vdev = igd->vdev;
+
+    igd->index = data;
+
+    vfio_region_write(&vdev->bars[4].region, addr, data, size);
+}
+
+static const MemoryRegionOps vfio_igd_index_quirk = {
+    .read = vfio_igd_quirk_index_read,
+    .write = vfio_igd_quirk_index_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
+{
+    struct vfio_region_info *rom = NULL, *opregion = NULL,
+                            *host = NULL, *lpc = NULL;
+    VFIOQuirk *quirk;
+    VFIOIGDQuirk *igd;
+    PCIDevice *lpc_bridge;
+    int i, ret, ggms_mb, gms_mb = 0, gen;
+    uint64_t *bdsm_size;
+    uint32_t gmch;
+    uint16_t cmd_orig, cmd;
+
+    /*
+     * This must be an Intel VGA device at address 00:02.0 for us to even
+     * consider enabling legacy mode.  The vBIOS has dependencies on the
+     * PCI bus address.
+     */
+    if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) ||
+        !vfio_is_vga(vdev) || nr != 4 ||
+        &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev),
+                                       0, PCI_DEVFN(0x2, 0))) {
+        return;
+    }
+
+    /*
+     * We need to create an LPC/ISA bridge at PCI bus address 00:1f.0 that we
+     * can stuff host values into, so if there's already one there and it's not
+     * one we can hack on, legacy mode is no-go.  Sorry Q35.
+     */
+    lpc_bridge = pci_find_device(pci_device_root_bus(&vdev->pdev),
+                                 0, PCI_DEVFN(0x1f, 0));
+    if (lpc_bridge && !object_dynamic_cast(OBJECT(lpc_bridge),
+                                           "vfio-pci-igd-lpc-bridge")) {
+        error_report("IGD device %s cannot support legacy mode due to existing "
+                     "devices at address 1f.0", vdev->vbasedev.name);
+        return;
+    }
+
+    /*
+     * IGD is not a standard, they like to change their specs often.  We
+     * only attempt to support back to SandBridge and we hope that newer
+     * devices maintain compatibility with generation 8.
+     */
+    gen = igd_gen(vdev);
+    if (gen != 6 && gen != 8) {
+        error_report("IGD device %s is unsupported in legacy mode, "
+                     "try SandyBridge or newer", vdev->vbasedev.name);
+        return;
+    }
+
+    /*
+     * Most of what we're doing here is to enable the ROM to run, so if
+     * there's no ROM, there's no point in setting up this quirk.
+     * NB. We only seem to get BIOS ROMs, so a UEFI VM would need CSM support.
+     */
+    ret = vfio_get_region_info(&vdev->vbasedev,
+                               VFIO_PCI_ROM_REGION_INDEX, &rom);
+    if ((ret || !rom->size) && !vdev->pdev.romfile) {
+        error_report("IGD device %s has no ROM, legacy mode disabled",
+                     vdev->vbasedev.name);
+        goto out;
+    }
+
+    /*
+     * Ignore the hotplug corner case, mark the ROM failed, we can't
+     * create the devices we need for legacy mode in the hotplug scenario.
+     */
+    if (vdev->pdev.qdev.hotplugged) {
+        error_report("IGD device %s hotplugged, ROM disabled, "
+                     "legacy mode disabled", vdev->vbasedev.name);
+        vdev->rom_read_failed = true;
+        goto out;
+    }
+
+    /*
+     * Check whether we have all the vfio device specific regions to
+     * support legacy mode (added in Linux v4.6).  If not, bail.
+     */
+    ret = vfio_get_dev_region_info(&vdev->vbasedev,
+                        VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
+                        VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion);
+    if (ret) {
+        error_report("IGD device %s does not support OpRegion access,"
+                     "legacy mode disabled", vdev->vbasedev.name);
+        goto out;
+    }
+
+    ret = vfio_get_dev_region_info(&vdev->vbasedev,
+                        VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
+                        VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG, &host);
+    if (ret) {
+        error_report("IGD device %s does not support host bridge access,"
+                     "legacy mode disabled", vdev->vbasedev.name);
+        goto out;
+    }
+
+    ret = vfio_get_dev_region_info(&vdev->vbasedev,
+                        VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
+                        VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG, &lpc);
+    if (ret) {
+        error_report("IGD device %s does not support LPC bridge access,"
+                     "legacy mode disabled", vdev->vbasedev.name);
+        goto out;
+    }
+
+    gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4);
+
+    /*
+     * If IGD VGA Disable is clear (expected) and VGA is not already enabled,
+     * try to enable it.  Probably shouldn't be using legacy mode without VGA,
+     * but also no point in us enabling VGA if disabled in hardware.
+     */
+    if (!(gmch & 0x2) && !vdev->vga && vfio_populate_vga(vdev)) {
+        error_report("IGD device %s failed to enable VGA access, "
+                     "legacy mode disabled", vdev->vbasedev.name);
+        goto out;
+    }
+
+    /* Create our LPC/ISA bridge */
+    ret = vfio_pci_igd_lpc_init(vdev, lpc);
+    if (ret) {
+        error_report("IGD device %s failed to create LPC bridge, "
+                     "legacy mode disabled", vdev->vbasedev.name);
+        goto out;
+    }
+
+    /* Stuff some host values into the VM PCI host bridge */
+    ret = vfio_pci_igd_host_init(vdev, host);
+    if (ret) {
+        error_report("IGD device %s failed to modify host bridge, "
+                     "legacy mode disabled", vdev->vbasedev.name);
+        goto out;
+    }
+
+    /* Setup OpRegion access */
+    ret = vfio_pci_igd_opregion_init(vdev, opregion);
+    if (ret) {
+        error_report("IGD device %s failed to setup OpRegion, "
+                     "legacy mode disabled", vdev->vbasedev.name);
+        goto out;
+    }
+
+    /* Setup our quirk to munge GTT addresses to the VM allocated buffer */
+    quirk = g_malloc0(sizeof(*quirk));
+    quirk->mem = g_new0(MemoryRegion, 2);
+    quirk->nr_mem = 2;
+    igd = quirk->data = g_malloc0(sizeof(*igd));
+    igd->vdev = vdev;
+    igd->index = ~0;
+
+    memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_igd_index_quirk,
+                          igd, "vfio-igd-index-quirk", 4);
+    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
+                                        0, &quirk->mem[0], 1);
+
+    memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_igd_data_quirk,
+                          igd, "vfio-igd-data-quirk", 4);
+    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
+                                        4, &quirk->mem[1], 1);
+
+    QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
+
+    /* Determine the size of stolen memory needed for GTT */
+    ggms_mb = (gmch >> (gen < 8 ? 8 : 6)) & 0x3;
+    if (gen > 6) {
+        ggms_mb = 1 << ggms_mb;
+    }
+
+    /*
+     * Assume we have no GMS memory, but allow it to be overrided by device
+     * option (experimental).  The spec doesn't actually allow zero GMS when
+     * when IVD (IGD VGA Disable) is clear, but the claim is that it's unused,
+     * so let's not waste VM memory for it.
+     */
+    gmch &= ~((gen < 8 ? 0x1f : 0xff) << (gen < 8 ? 3 : 8));
+
+    if (vdev->igd_gms) {
+        if (vdev->igd_gms <= 0x10) {
+            gms_mb = vdev->igd_gms * 32;
+            gmch |= vdev->igd_gms << (gen < 8 ? 3 : 8);
+        } else {
+            error_report("Unsupported IGD GMS value 0x%x", vdev->igd_gms);
+            vdev->igd_gms = 0;
+        }
+    }
+
+    /*
+     * Request reserved memory for stolen memory via fw_cfg.  VM firmware
+     * must allocate a 1MB aligned reserved memory region below 4GB with
+     * the requested size (in bytes) for use by the Intel PCI class VGA
+     * device at VM address 00:02.0.  The base address of this reserved
+     * memory region must be written to the device BDSM regsiter at PCI
+     * config offset 0x5C.
+     */
+    bdsm_size = g_malloc(sizeof(*bdsm_size));
+    *bdsm_size = cpu_to_le64((ggms_mb + gms_mb) * 1024 * 1024);
+    fw_cfg_add_file(fw_cfg_find(), "etc/igd-bdsm-size",
+                    bdsm_size, sizeof(*bdsm_size));
+
+    /* GMCH is read-only, emulated */
+    pci_set_long(vdev->pdev.config + IGD_GMCH, gmch);
+    pci_set_long(vdev->pdev.wmask + IGD_GMCH, 0);
+    pci_set_long(vdev->emulated_config_bits + IGD_GMCH, ~0);
+
+    /* BDSM is read-write, emulated.  The BIOS needs to be able to write it */
+    pci_set_long(vdev->pdev.config + IGD_BDSM, 0);
+    pci_set_long(vdev->pdev.wmask + IGD_BDSM, ~0);
+    pci_set_long(vdev->emulated_config_bits + IGD_BDSM, ~0);
+
+    /*
+     * This IOBAR gives us access to GTTADR, which allows us to write to
+     * the GTT itself.  So let's go ahead and write zero to all the GTT
+     * entries to avoid spurious DMA faults.  Be sure I/O access is enabled
+     * before talking to the device.
+     */
+    if (pread(vdev->vbasedev.fd, &cmd_orig, sizeof(cmd_orig),
+              vdev->config_offset + PCI_COMMAND) != sizeof(cmd_orig)) {
+        error_report("IGD device %s - failed to read PCI command register",
+                     vdev->vbasedev.name);
+    }
+
+    cmd = cmd_orig | PCI_COMMAND_IO;
+
+    if (pwrite(vdev->vbasedev.fd, &cmd, sizeof(cmd),
+               vdev->config_offset + PCI_COMMAND) != sizeof(cmd)) {
+        error_report("IGD device %s - failed to write PCI command register",
+                     vdev->vbasedev.name);
+    }
+
+    for (i = 1; i < vfio_igd_gtt_max(vdev); i += 4) {
+        vfio_region_write(&vdev->bars[4].region, 0, i, 4);
+        vfio_region_write(&vdev->bars[4].region, 4, 0, 4);
+    }
+
+    if (pwrite(vdev->vbasedev.fd, &cmd_orig, sizeof(cmd_orig),
+               vdev->config_offset + PCI_COMMAND) != sizeof(cmd_orig)) {
+        error_report("IGD device %s - failed to restore PCI command register",
+                     vdev->vbasedev.name);
+    }
+
+    trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name, ggms_mb + gms_mb);
+
+out:
+    g_free(rom);
+    g_free(opregion);
+    g_free(host);
+    g_free(lpc);
+}
+
+/*
  * Common quirk probe entry points.
  */
 void vfio_vga_quirk_setup(VFIOPCIDevice *vdev)
@@ -1010,6 +1650,7 @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
     vfio_probe_nvidia_bar5_quirk(vdev, nr);
     vfio_probe_nvidia_bar0_quirk(vdev, nr);
     vfio_probe_rtl8168_bar2_quirk(vdev, nr);
+    vfio_probe_igd_bar4_quirk(vdev, nr);
 }
 
 void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index d091d8cf0e..deab0c601a 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1440,8 +1440,6 @@ static void vfio_bar_setup(VFIOPCIDevice *vdev, int nr)
                      vdev->vbasedev.name, nr);
     }
 
-    vfio_bar_quirk_setup(vdev, nr);
-
     pci_register_bar(&vdev->pdev, nr, type, bar->region.mem);
 }
 
@@ -1452,29 +1450,6 @@ static void vfio_bars_setup(VFIOPCIDevice *vdev)
     for (i = 0; i < PCI_ROM_SLOT; i++) {
         vfio_bar_setup(vdev, i);
     }
-
-    if (vdev->vga) {
-        memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_MEM].mem,
-                              OBJECT(vdev), &vfio_vga_ops,
-                              &vdev->vga->region[QEMU_PCI_VGA_MEM],
-                              "vfio-vga-mmio@0xa0000",
-                              QEMU_PCI_VGA_MEM_SIZE);
-        memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem,
-                              OBJECT(vdev), &vfio_vga_ops,
-                              &vdev->vga->region[QEMU_PCI_VGA_IO_LO],
-                              "vfio-vga-io@0x3b0",
-                              QEMU_PCI_VGA_IO_LO_SIZE);
-        memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
-                              OBJECT(vdev), &vfio_vga_ops,
-                              &vdev->vga->region[QEMU_PCI_VGA_IO_HI],
-                              "vfio-vga-io@0x3c0",
-                              QEMU_PCI_VGA_IO_HI_SIZE);
-
-        pci_register_vga(&vdev->pdev, &vdev->vga->region[QEMU_PCI_VGA_MEM].mem,
-                         &vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem,
-                         &vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem);
-        vfio_vga_quirk_setup(vdev);
-    }
 }
 
 static void vfio_bars_exit(VFIOPCIDevice *vdev)
@@ -2061,42 +2036,61 @@ int vfio_populate_vga(VFIOPCIDevice *vdev)
     struct vfio_region_info *reg_info;
     int ret;
 
-    if (vbasedev->num_regions > VFIO_PCI_VGA_REGION_INDEX) {
-        ret = vfio_get_region_info(vbasedev,
-                                   VFIO_PCI_VGA_REGION_INDEX, &reg_info);
-        if (ret) {
-            return ret;
-        }
+    ret = vfio_get_region_info(vbasedev, VFIO_PCI_VGA_REGION_INDEX, &reg_info);
+    if (ret) {
+        return ret;
+    }
 
-        if (!(reg_info->flags & VFIO_REGION_INFO_FLAG_READ) ||
-            !(reg_info->flags & VFIO_REGION_INFO_FLAG_WRITE) ||
-            reg_info->size < 0xbffff + 1) {
-            error_report("vfio: Unexpected VGA info, flags 0x%lx, size 0x%lx",
-                         (unsigned long)reg_info->flags,
-                         (unsigned long)reg_info->size);
-            g_free(reg_info);
-            return -EINVAL;
-        }
+    if (!(reg_info->flags & VFIO_REGION_INFO_FLAG_READ) ||
+        !(reg_info->flags & VFIO_REGION_INFO_FLAG_WRITE) ||
+        reg_info->size < 0xbffff + 1) {
+        error_report("vfio: Unexpected VGA info, flags 0x%lx, size 0x%lx",
+                     (unsigned long)reg_info->flags,
+                     (unsigned long)reg_info->size);
+        g_free(reg_info);
+        return -EINVAL;
+    }
 
-        vdev->vga = g_new0(VFIOVGA, 1);
+    vdev->vga = g_new0(VFIOVGA, 1);
 
-        vdev->vga->fd_offset = reg_info->offset;
-        vdev->vga->fd = vdev->vbasedev.fd;
+    vdev->vga->fd_offset = reg_info->offset;
+    vdev->vga->fd = vdev->vbasedev.fd;
 
-        g_free(reg_info);
+    g_free(reg_info);
 
-        vdev->vga->region[QEMU_PCI_VGA_MEM].offset = QEMU_PCI_VGA_MEM_BASE;
-        vdev->vga->region[QEMU_PCI_VGA_MEM].nr = QEMU_PCI_VGA_MEM;
-        QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_MEM].quirks);
+    vdev->vga->region[QEMU_PCI_VGA_MEM].offset = QEMU_PCI_VGA_MEM_BASE;
+    vdev->vga->region[QEMU_PCI_VGA_MEM].nr = QEMU_PCI_VGA_MEM;
+    QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_MEM].quirks);
 
-        vdev->vga->region[QEMU_PCI_VGA_IO_LO].offset = QEMU_PCI_VGA_IO_LO_BASE;
-        vdev->vga->region[QEMU_PCI_VGA_IO_LO].nr = QEMU_PCI_VGA_IO_LO;
-        QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_LO].quirks);
+    memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_MEM].mem,
+                          OBJECT(vdev), &vfio_vga_ops,
+                          &vdev->vga->region[QEMU_PCI_VGA_MEM],
+                          "vfio-vga-mmio@0xa0000",
+                          QEMU_PCI_VGA_MEM_SIZE);
 
-        vdev->vga->region[QEMU_PCI_VGA_IO_HI].offset = QEMU_PCI_VGA_IO_HI_BASE;
-        vdev->vga->region[QEMU_PCI_VGA_IO_HI].nr = QEMU_PCI_VGA_IO_HI;
-        QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks);
-    }
+    vdev->vga->region[QEMU_PCI_VGA_IO_LO].offset = QEMU_PCI_VGA_IO_LO_BASE;
+    vdev->vga->region[QEMU_PCI_VGA_IO_LO].nr = QEMU_PCI_VGA_IO_LO;
+    QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_LO].quirks);
+
+    memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem,
+                          OBJECT(vdev), &vfio_vga_ops,
+                          &vdev->vga->region[QEMU_PCI_VGA_IO_LO],
+                          "vfio-vga-io@0x3b0",
+                          QEMU_PCI_VGA_IO_LO_SIZE);
+
+    vdev->vga->region[QEMU_PCI_VGA_IO_HI].offset = QEMU_PCI_VGA_IO_HI_BASE;
+    vdev->vga->region[QEMU_PCI_VGA_IO_HI].nr = QEMU_PCI_VGA_IO_HI;
+    QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks);
+
+    memory_region_init_io(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
+                          OBJECT(vdev), &vfio_vga_ops,
+                          &vdev->vga->region[QEMU_PCI_VGA_IO_HI],
+                          "vfio-vga-io@0x3c0",
+                          QEMU_PCI_VGA_IO_HI_SIZE);
+
+    pci_register_vga(&vdev->pdev, &vdev->vga->region[QEMU_PCI_VGA_MEM].mem,
+                     &vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem,
+                     &vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem);
 
     return 0;
 }
@@ -2398,7 +2392,7 @@ static int vfio_initfn(PCIDevice *pdev)
     ssize_t len;
     struct stat st;
     int groupid;
-    int ret;
+    int i, ret;
 
     if (!vdev->vbasedev.sysfsdev) {
         vdev->vbasedev.sysfsdev =
@@ -2560,6 +2554,43 @@ static int vfio_initfn(PCIDevice *pdev)
         goto out_teardown;
     }
 
+    if (vdev->vga) {
+        vfio_vga_quirk_setup(vdev);
+    }
+
+    for (i = 0; i < PCI_ROM_SLOT; i++) {
+        vfio_bar_quirk_setup(vdev, i);
+    }
+
+    if (!vdev->igd_opregion &&
+        vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) {
+        struct vfio_region_info *opregion;
+
+        if (vdev->pdev.qdev.hotplugged) {
+            error_report("Cannot support IGD OpRegion feature on hotplugged "
+                         "device %s", vdev->vbasedev.name);
+            ret = -EINVAL;
+            goto out_teardown;
+        }
+
+        ret = vfio_get_dev_region_info(&vdev->vbasedev,
+                        VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
+                        VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion);
+        if (ret) {
+            error_report("Device %s does not support requested IGD OpRegion "
+                         "feature", vdev->vbasedev.name);
+            goto out_teardown;
+        }
+
+        ret = vfio_pci_igd_opregion_init(vdev, opregion);
+        g_free(opregion);
+        if (ret) {
+            error_report("Device %s IGD OpRegion initialization failed",
+                         vdev->vbasedev.name);
+            goto out_teardown;
+        }
+    }
+
     /* QEMU emulates all of MSI & MSIX */
     if (pdev->cap_present & QEMU_PCI_CAP_MSIX) {
         memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff,
@@ -2603,6 +2634,13 @@ static void vfio_instance_finalize(Object *obj)
     vfio_bars_finalize(vdev);
     g_free(vdev->emulated_config_bits);
     g_free(vdev->rom);
+    /*
+     * XXX Leaking igd_opregion is not an oversight, we can't remove the
+     * fw_cfg entry therefore leaking this allocation seems like the safest
+     * option.
+     *
+     * g_free(vdev->igd_opregion);
+     */
     vfio_put_device(vdev);
     vfio_put_group(group);
 }
@@ -2677,6 +2715,8 @@ static Property vfio_pci_dev_properties[] = {
                     VFIO_FEATURE_ENABLE_VGA_BIT, false),
     DEFINE_PROP_BIT("x-req", VFIOPCIDevice, features,
                     VFIO_FEATURE_ENABLE_REQ_BIT, true),
+    DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features,
+                    VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false),
     DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
     DEFINE_PROP_BOOL("x-no-kvm-intx", VFIOPCIDevice, no_kvm_intx, false),
     DEFINE_PROP_BOOL("x-no-kvm-msi", VFIOPCIDevice, no_kvm_msi, false),
@@ -2687,6 +2727,7 @@ static Property vfio_pci_dev_properties[] = {
                        sub_vendor_id, PCI_ANY_ID),
     DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice,
                        sub_device_id, PCI_ANY_ID),
+    DEFINE_PROP_UINT32("x-igd-gms", VFIOPCIDevice, igd_gms, 0),
     /*
      * TODO - support passed fds... is this necessary?
      * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 3976f68549..b3eb0d838e 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -115,6 +115,7 @@ typedef struct VFIOPCIDevice {
     int interrupt; /* Current interrupt type */
     VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
     VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */
+    void *igd_opregion;
     PCIHostDeviceAddress host;
     EventNotifier err_notifier;
     EventNotifier req_notifier;
@@ -128,7 +129,11 @@ typedef struct VFIOPCIDevice {
 #define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT)
 #define VFIO_FEATURE_ENABLE_REQ_BIT 1
 #define VFIO_FEATURE_ENABLE_REQ (1 << VFIO_FEATURE_ENABLE_REQ_BIT)
+#define VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT 2
+#define VFIO_FEATURE_ENABLE_IGD_OPREGION \
+                                (1 << VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT)
     int32_t bootindex;
+    uint32_t igd_gms;
     uint8_t pm_cap;
     bool has_vga;
     bool pci_aer;
@@ -159,4 +164,7 @@ void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev);
 
 int vfio_populate_vga(VFIOPCIDevice *vdev);
 
+int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
+                               struct vfio_region_info *info);
+
 #endif /* HW_VFIO_VFIO_PCI_H */
diff --git a/include/block/block.h b/include/block/block.h
index a8c15e36e7..70ea29947c 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -17,7 +17,6 @@ typedef struct BlockJob BlockJob;
 typedef struct BdrvChild BdrvChild;
 typedef struct BdrvChildRole BdrvChildRole;
 typedef struct BlockJobTxn BlockJobTxn;
-typedef struct BdrvNextIterator BdrvNextIterator;
 
 typedef struct BlockDriverInfo {
     /* in bytes, 0 if irrelevant */
@@ -198,7 +197,6 @@ BlockDriver *bdrv_find_format(const char *format_name);
 int bdrv_create(BlockDriver *drv, const char* filename,
                 QemuOpts *opts, Error **errp);
 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp);
-BlockDriverState *bdrv_new_root(void);
 BlockDriverState *bdrv_new(void);
 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top);
 void bdrv_replace_in_backing_chain(BlockDriverState *old,
@@ -214,8 +212,8 @@ BdrvChild *bdrv_open_child(const char *filename,
 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd);
 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
                            const char *bdref_key, Error **errp);
-int bdrv_open(BlockDriverState **pbs, const char *filename,
-              const char *reference, QDict *options, int flags, Error **errp);
+BlockDriverState *bdrv_open(const char *filename, const char *reference,
+                            QDict *options, int flags, Error **errp);
 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
                                     BlockDriverState *bs,
                                     QDict *options, int flags);
@@ -244,10 +242,6 @@ int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
     const void *buf, int count);
 int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
     int nb_sectors, QEMUIOVector *qiov);
-int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
-int coroutine_fn bdrv_co_readv_no_serialising(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
 int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
     int nb_sectors, QEMUIOVector *qiov);
 /*
@@ -402,7 +396,19 @@ BlockDriverState *bdrv_lookup_bs(const char *device,
                                  Error **errp);
 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base);
 BlockDriverState *bdrv_next_node(BlockDriverState *bs);
-BdrvNextIterator *bdrv_next(BdrvNextIterator *it, BlockDriverState **bs);
+
+typedef struct BdrvNextIterator {
+    enum {
+        BDRV_NEXT_BACKEND_ROOTS,
+        BDRV_NEXT_MONITOR_OWNED,
+    } phase;
+    BlockBackend *blk;
+    BlockDriverState *bs;
+} BdrvNextIterator;
+
+BlockDriverState *bdrv_first(BdrvNextIterator *it);
+BlockDriverState *bdrv_next(BdrvNextIterator *it);
+
 BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs);
 int bdrv_is_encrypted(BlockDriverState *bs);
 int bdrv_key_required(BlockDriverState *bs);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index b6f4755725..30a97178c8 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -719,7 +719,8 @@ void hmp_drive_add_node(Monitor *mon, const char *optstr);
 
 BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
                                   const char *child_name,
-                                  const BdrvChildRole *child_role);
+                                  const BdrvChildRole *child_role,
+                                  void *opaque);
 void bdrv_root_unref_child(BdrvChild *child);
 
 const char *bdrv_get_parent_name(const BlockDriverState *bs);
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index 073a433cf8..86d28070b8 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -82,7 +82,7 @@ struct BlockJob {
     const BlockJobDriver *driver;
 
     /** The block device on which the job is operating.  */
-    BlockDriverState *bs;
+    BlockBackend *blk;
 
     /**
      * The ID of the block job. Currently the BlockBackend name of the BDS
@@ -135,6 +135,9 @@ struct BlockJob {
      */
     bool deferred_to_main_loop;
 
+    /** Element of the list of block jobs */
+    QLIST_ENTRY(BlockJob) job_list;
+
     /** Status that is published by the query-block-jobs QMP API */
     BlockDeviceIoStatus iostatus;
 
@@ -173,6 +176,17 @@ struct BlockJob {
 };
 
 /**
+ * block_job_next:
+ * @job: A block job, or %NULL.
+ *
+ * Get the next element from the list of block jobs after @job, or the
+ * first one if @job is %NULL.
+ *
+ * Returns the requested job, or %NULL if there are no more jobs left.
+ */
+BlockJob *block_job_next(BlockJob *job);
+
+/**
  * block_job_create:
  * @job_type: The class object for the newly-created job.
  * @bs: The block
@@ -357,6 +371,13 @@ bool block_job_is_paused(BlockJob *job);
 int block_job_cancel_sync(BlockJob *job);
 
 /**
+ * block_job_cancel_sync_all:
+ *
+ * Synchronously cancels all jobs using block_job_cancel_sync().
+ */
+void block_job_cancel_sync_all(void);
+
+/**
  * block_job_complete_sync:
  * @job: The job to be completed.
  * @errp: Error object which may be set by block_job_complete(); this is not
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index eb0e1b0342..0610377789 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -90,6 +90,7 @@ typedef struct VFIOContainer {
 typedef struct VFIOGuestIOMMU {
     VFIOContainer *container;
     MemoryRegion *iommu;
+    hwaddr iommu_offset;
     Notifier n;
     QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
 } VFIOGuestIOMMU;
@@ -154,5 +155,7 @@ extern QLIST_HEAD(vfio_as_head, VFIOAddressSpace) vfio_address_spaces;
 #ifdef CONFIG_LINUX
 int vfio_get_region_info(VFIODevice *vbasedev, int index,
                          struct vfio_region_info **info);
+int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type,
+                             uint32_t subtype, struct vfio_region_info **info);
 #endif
 #endif /* !HW_VFIO_VFIO_COMMON_H */
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 9e36a97fc5..13b12b7e87 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -135,9 +135,12 @@ struct MigrationState
     QemuThread thread;
     QEMUBH *cleanup_bh;
     QEMUFile *to_dst_file;
-    int parameters[MIGRATION_PARAMETER__MAX];
+
+    /* New style params from 'migrate-set-parameters' */
+    MigrationParameters parameters;
 
     int state;
+    /* Old style params from 'migrate' command */
     MigrationParams params;
 
     /* State related to return path */
@@ -171,6 +174,9 @@ struct MigrationState
     QSIMPLEQ_HEAD(src_page_requests, MigrationSrcPageRequest) src_page_requests;
     /* The RAMBlock used in the last src_page_request */
     RAMBlock *last_req_rb;
+
+    /* The last error that occurred */
+    Error *error;
 };
 
 void migrate_set_state(int *state, int old_state, int new_state);
@@ -179,6 +185,22 @@ void process_incoming_migration(QEMUFile *f);
 
 void qemu_start_incoming_migration(const char *uri, Error **errp);
 
+void migration_set_incoming_channel(MigrationState *s,
+                                    QIOChannel *ioc);
+
+void migration_tls_set_incoming_channel(MigrationState *s,
+                                        QIOChannel *ioc,
+                                        Error **errp);
+
+void migration_set_outgoing_channel(MigrationState *s,
+                                    QIOChannel *ioc,
+                                    const char *hostname);
+
+void migration_tls_set_outgoing_channel(MigrationState *s,
+                                        QIOChannel *ioc,
+                                        const char *hostname,
+                                        Error **errp);
+
 uint64_t migrate_max_downtime(void);
 
 void exec_start_incoming_migration(const char *host_port, Error **errp);
@@ -201,7 +223,7 @@ void rdma_start_outgoing_migration(void *opaque, const char *host_port, Error **
 
 void rdma_start_incoming_migration(const char *host_port, Error **errp);
 
-void migrate_fd_error(MigrationState *s);
+void migrate_fd_error(MigrationState *s, const Error *error);
 
 void migrate_fd_connect(MigrationState *s);
 
diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
index 3f6b4ed581..2409a98967 100644
--- a/include/migration/qemu-file.h
+++ b/include/migration/qemu-file.h
@@ -23,16 +23,11 @@
  */
 #ifndef QEMU_FILE_H
 #define QEMU_FILE_H 1
+#include "qemu-common.h"
 #include "exec/cpu-common.h"
+#include "io/channel.h"
 
 
-/* This function writes a chunk of data to a file at the given position.
- * The pos argument can be ignored if the file is only being used for
- * streaming.  The handler should try to write all of the data it can.
- */
-typedef ssize_t (QEMUFilePutBufferFunc)(void *opaque, const uint8_t *buf,
-                                        int64_t pos, size_t size);
-
 /* Read a chunk of data from a file at the given position.  The pos argument
  * can be ignored if the file is only be used for streaming.  The number of
  * bytes actually read should be returned.
@@ -53,8 +48,13 @@ typedef int (QEMUFileCloseFunc)(void *opaque);
  */
 typedef int (QEMUFileGetFD)(void *opaque);
 
+/* Called to change the blocking mode of the file
+ */
+typedef int (QEMUFileSetBlocking)(void *opaque, bool enabled);
+
 /*
- * This function writes an iovec to file.
+ * This function writes an iovec to file. The handler must write all
+ * of the data or return a negative errno value.
  */
 typedef ssize_t (QEMUFileWritevBufferFunc)(void *opaque, struct iovec *iov,
                                            int iovcnt, int64_t pos);
@@ -101,32 +101,25 @@ typedef QEMUFile *(QEMURetPathFunc)(void *opaque);
 typedef int (QEMUFileShutdownFunc)(void *opaque, bool rd, bool wr);
 
 typedef struct QEMUFileOps {
-    QEMUFilePutBufferFunc *put_buffer;
     QEMUFileGetBufferFunc *get_buffer;
     QEMUFileCloseFunc *close;
-    QEMUFileGetFD *get_fd;
+    QEMUFileSetBlocking *set_blocking;
     QEMUFileWritevBufferFunc *writev_buffer;
-    QEMURamHookFunc *before_ram_iterate;
-    QEMURamHookFunc *after_ram_iterate;
-    QEMURamHookFunc *hook_ram_load;
-    QEMURamSaveFunc *save_page;
     QEMURetPathFunc *get_return_path;
     QEMUFileShutdownFunc *shut_down;
 } QEMUFileOps;
 
-struct QEMUSizedBuffer {
-    struct iovec *iov;
-    size_t n_iov;
-    size_t size; /* total allocated size in all iov's */
-    size_t used; /* number of used bytes */
-};
+typedef struct QEMUFileHooks {
+    QEMURamHookFunc *before_ram_iterate;
+    QEMURamHookFunc *after_ram_iterate;
+    QEMURamHookFunc *hook_ram_load;
+    QEMURamSaveFunc *save_page;
+} QEMUFileHooks;
 
 QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops);
-QEMUFile *qemu_fopen(const char *filename, const char *mode);
-QEMUFile *qemu_fdopen(int fd, const char *mode);
-QEMUFile *qemu_fopen_socket(int fd, const char *mode);
-QEMUFile *qemu_popen_cmd(const char *command, const char *mode);
-QEMUFile *qemu_bufopen(const char *mode, QEMUSizedBuffer *input);
+QEMUFile *qemu_fopen_channel_input(QIOChannel *ioc);
+QEMUFile *qemu_fopen_channel_output(QIOChannel *ioc);
+void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks);
 int qemu_get_fd(QEMUFile *f);
 int qemu_fclose(QEMUFile *f);
 int64_t qemu_ftell(QEMUFile *f);
@@ -141,20 +134,6 @@ void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size);
 bool qemu_file_mode_is_not_valid(const char *mode);
 bool qemu_file_is_writable(QEMUFile *f);
 
-QEMUSizedBuffer *qsb_create(const uint8_t *buffer, size_t len);
-void qsb_free(QEMUSizedBuffer *);
-size_t qsb_set_length(QEMUSizedBuffer *qsb, size_t length);
-size_t qsb_get_length(const QEMUSizedBuffer *qsb);
-ssize_t qsb_get_buffer(const QEMUSizedBuffer *, off_t start, size_t count,
-                       uint8_t *buf);
-ssize_t qsb_write_at(QEMUSizedBuffer *qsb, const uint8_t *buf,
-                     off_t pos, size_t count);
-
-
-/*
- * For use on files opened with qemu_bufopen
- */
-const QEMUSizedBuffer *qemu_buf_get(QEMUFile *f);
 
 static inline void qemu_put_ubyte(QEMUFile *f, unsigned int v)
 {
diff --git a/include/qapi/error.h b/include/qapi/error.h
index 11be2327c0..0576659603 100644
--- a/include/qapi/error.h
+++ b/include/qapi/error.h
@@ -134,7 +134,7 @@ typedef enum ErrorClass {
 /*
  * Get @err's human-readable error message.
  */
-const char *error_get_pretty(Error *err);
+const char *error_get_pretty(const Error *err);
 
 /*
  * Get @err's error class.
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index 1dcf6f5d53..b113fcf156 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -82,7 +82,6 @@ typedef struct QemuOpt QemuOpt;
 typedef struct QemuOpts QemuOpts;
 typedef struct QemuOptsList QemuOptsList;
 typedef struct QEMUSGList QEMUSGList;
-typedef struct QEMUSizedBuffer QEMUSizedBuffer;
 typedef struct QEMUTimer QEMUTimer;
 typedef struct QEMUTimerListGroup QEMUTimerListGroup;
 typedef struct QObject QObject;
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 68d92b556e..c04af8ea46 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -78,8 +78,7 @@ typedef struct BlockBackendPublic {
     QLIST_ENTRY(BlockBackendPublic) round_robin;
 } BlockBackendPublic;
 
-BlockBackend *blk_new(Error **errp);
-BlockBackend *blk_new_with_bs(Error **errp);
+BlockBackend *blk_new(void);
 BlockBackend *blk_new_open(const char *filename, const char *reference,
                            QDict *options, int flags, Error **errp);
 int blk_get_refcnt(BlockBackend *blk);
@@ -114,11 +113,17 @@ void *blk_get_attached_dev(BlockBackend *blk);
 void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque);
 int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
                           int count);
-int blk_write_zeroes(BlockBackend *blk, int64_t offset,
-                     int count, BdrvRequestFlags flags);
-BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t offset,
-                                 int count, BdrvRequestFlags flags,
-                                 BlockCompletionFunc *cb, void *opaque);
+int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
+                               unsigned int bytes, QEMUIOVector *qiov,
+                               BdrvRequestFlags flags);
+int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
+                               unsigned int bytes, QEMUIOVector *qiov,
+                               BdrvRequestFlags flags);
+int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+                      int count, BdrvRequestFlags flags);
+BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+                                  int count, BdrvRequestFlags flags,
+                                  BlockCompletionFunc *cb, void *opaque);
 int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count);
 int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count,
                BdrvRequestFlags flags);
@@ -196,8 +201,8 @@ int blk_get_open_flags_from_root_state(BlockBackend *blk);
 
 void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
                   BlockCompletionFunc *cb, void *opaque);
-int coroutine_fn blk_co_write_zeroes(BlockBackend *blk, int64_t offset,
-                                     int count, BdrvRequestFlags flags);
+int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+                                      int count, BdrvRequestFlags flags);
 int blk_write_compressed(BlockBackend *blk, int64_t sector_num,
                          const uint8_t *buf, int nb_sectors);
 int blk_truncate(BlockBackend *blk, int64_t offset);
diff --git a/include/sysemu/dma.h b/include/sysemu/dma.h
index d6e96a4298..34c8eaf64e 100644
--- a/include/sysemu/dma.h
+++ b/include/sysemu/dma.h
@@ -194,19 +194,19 @@ void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len);
 void qemu_sglist_destroy(QEMUSGList *qsg);
 #endif
 
-typedef BlockAIOCB *DMAIOFunc(BlockBackend *blk, int64_t offset,
-                              QEMUIOVector *iov, BdrvRequestFlags flags,
-                              BlockCompletionFunc *cb, void *opaque);
-
-BlockAIOCB *dma_blk_io(BlockBackend *blk,
-                       QEMUSGList *sg, uint64_t sector_num,
-                       DMAIOFunc *io_func, BlockCompletionFunc *cb,
-                       void *opaque, DMADirection dir);
+typedef BlockAIOCB *DMAIOFunc(int64_t offset, QEMUIOVector *iov,
+                              BlockCompletionFunc *cb, void *cb_opaque,
+                              void *opaque);
+
+BlockAIOCB *dma_blk_io(AioContext *ctx,
+                       QEMUSGList *sg, uint64_t offset,
+                       DMAIOFunc *io_func, void *io_func_opaque,
+                       BlockCompletionFunc *cb, void *opaque, DMADirection dir);
 BlockAIOCB *dma_blk_read(BlockBackend *blk,
-                         QEMUSGList *sg, uint64_t sector,
+                         QEMUSGList *sg, uint64_t offset,
                          BlockCompletionFunc *cb, void *opaque);
 BlockAIOCB *dma_blk_write(BlockBackend *blk,
-                          QEMUSGList *sg, uint64_t sector,
+                          QEMUSGList *sg, uint64_t offset,
                           BlockCompletionFunc *cb, void *opaque);
 uint64_t dma_buf_read(uint8_t *ptr, int32_t len, QEMUSGList *sg);
 uint64_t dma_buf_write(uint8_t *ptr, int32_t len, QEMUSGList *sg);
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index f9f00e2e56..f357ccde91 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -345,6 +345,8 @@ int kvm_arch_init(MachineState *ms, KVMState *s);
 
 int kvm_arch_init_vcpu(CPUState *cpu);
 
+bool kvm_vcpu_id_is_valid(int vcpu_id);
+
 /* Returns VCPU ID to be used on KVM_CREATE_VCPU ioctl() */
 unsigned long kvm_arch_vcpu_id(CPUState *cpu);
 
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 618169c4d5..94281413d0 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -119,7 +119,7 @@ void qemu_savevm_command_send(QEMUFile *f, enum qemu_vm_cmd command,
                               uint16_t len, uint8_t *data);
 void qemu_savevm_send_ping(QEMUFile *f, uint32_t value);
 void qemu_savevm_send_open_return_path(QEMUFile *f);
-int qemu_savevm_send_packaged(QEMUFile *f, const QEMUSizedBuffer *qsb);
+int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len);
 void qemu_savevm_send_postcopy_advise(QEMUFile *f);
 void qemu_savevm_send_postcopy_listen(QEMUFile *f);
 void qemu_savevm_send_postcopy_run(QEMUFile *f);
diff --git a/io/channel-buffer.c b/io/channel-buffer.c
index 3e5117bf28..43d795976d 100644
--- a/io/channel-buffer.c
+++ b/io/channel-buffer.c
@@ -140,6 +140,7 @@ static int qio_channel_buffer_close(QIOChannel *ioc,
     QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc);
 
     g_free(bioc->data);
+    bioc->data = NULL;
     bioc->capacity = bioc->usage = bioc->offset = 0;
 
     return 0;
diff --git a/kvm-all.c b/kvm-all.c
index f9ae8f9bf8..e56f385278 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1459,6 +1459,12 @@ static int kvm_max_vcpus(KVMState *s)
     return (ret) ? ret : kvm_recommended_vcpus(s);
 }
 
+bool kvm_vcpu_id_is_valid(int vcpu_id)
+{
+    KVMState *s = KVM_STATE(current_machine->accelerator);
+    return vcpu_id >= 0 && vcpu_id < kvm_max_vcpus(s);
+}
+
 static int kvm_init(MachineState *ms)
 {
     MachineClass *mc = MACHINE_GET_CLASS(ms);
diff --git a/migration/Makefile.objs b/migration/Makefile.objs
index d25ff483eb..30ad945918 100644
--- a/migration/Makefile.objs
+++ b/migration/Makefile.objs
@@ -1,11 +1,12 @@
-common-obj-y += migration.o tcp.o
+common-obj-y += migration.o socket.o fd.o exec.o
+common-obj-y += tls.o
 common-obj-y += vmstate.o
-common-obj-y += qemu-file.o qemu-file-buf.o qemu-file-unix.o qemu-file-stdio.o
+common-obj-y += qemu-file.o
+common-obj-y += qemu-file-channel.o
 common-obj-y += xbzrle.o postcopy-ram.o
 common-obj-y += qjson.o
 
 common-obj-$(CONFIG_RDMA) += rdma.o
-common-obj-$(CONFIG_POSIX) += exec.o unix.o fd.o
 
 common-obj-y += block.o
 
diff --git a/migration/block.c b/migration/block.c
index a7a76a0fb9..e0628d187f 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -383,7 +383,7 @@ static void init_blk_migration(QEMUFile *f)
     BlockDriverState *bs;
     BlkMigDevState *bmds;
     int64_t sectors;
-    BdrvNextIterator *it = NULL;
+    BdrvNextIterator it;
 
     block_mig_state.submitted = 0;
     block_mig_state.read_done = 0;
@@ -394,7 +394,7 @@ static void init_blk_migration(QEMUFile *f)
     block_mig_state.zero_blocks = migrate_zero_blocks();
 
 
-    while ((it = bdrv_next(it, &bs))) {
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         if (bdrv_is_read_only(bs)) {
             continue;
         }
diff --git a/migration/exec.c b/migration/exec.c
index 559420969b..1515cc3319 100644
--- a/migration/exec.c
+++ b/migration/exec.c
@@ -3,10 +3,12 @@
  *
  * Copyright IBM, Corp. 2008
  * Copyright Dell MessageOne 2008
+ * Copyright Red Hat, Inc. 2015-2016
  *
  * Authors:
  *  Anthony Liguori   <aliguori@us.ibm.com>
  *  Charles Duffy     <charles_duffy@messageone.com>
+ *  Daniel P. Berrange <berrange@redhat.com>
  *
  * This work is licensed under the terms of the GNU GPL, version 2.  See
  * the COPYING file in the top-level directory.
@@ -18,53 +20,53 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
-#include "qemu/sockets.h"
-#include "qemu/main-loop.h"
 #include "migration/migration.h"
-#include "migration/qemu-file.h"
-#include "block/block.h"
-#include <sys/wait.h>
+#include "io/channel-command.h"
+#include "trace.h"
 
-//#define DEBUG_MIGRATION_EXEC
-
-#ifdef DEBUG_MIGRATION_EXEC
-#define DPRINTF(fmt, ...) \
-    do { printf("migration-exec: " fmt, ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTF(fmt, ...) \
-    do { } while (0)
-#endif
 
 void exec_start_outgoing_migration(MigrationState *s, const char *command, Error **errp)
 {
-    s->to_dst_file = qemu_popen_cmd(command, "w");
-    if (s->to_dst_file == NULL) {
-        error_setg_errno(errp, errno, "failed to popen the migration target");
+    QIOChannel *ioc;
+    const char *argv[] = { "/bin/sh", "-c", command, NULL };
+
+    trace_migration_exec_outgoing(command);
+    ioc = QIO_CHANNEL(qio_channel_command_new_spawn(argv,
+                                                    O_WRONLY,
+                                                    errp));
+    if (!ioc) {
         return;
     }
 
-    migrate_fd_connect(s);
+    migration_set_outgoing_channel(s, ioc, NULL);
+    object_unref(OBJECT(ioc));
 }
 
-static void exec_accept_incoming_migration(void *opaque)
+static gboolean exec_accept_incoming_migration(QIOChannel *ioc,
+                                               GIOCondition condition,
+                                               gpointer opaque)
 {
-    QEMUFile *f = opaque;
-
-    qemu_set_fd_handler(qemu_get_fd(f), NULL, NULL, NULL);
-    process_incoming_migration(f);
+    migration_set_incoming_channel(migrate_get_current(), ioc);
+    object_unref(OBJECT(ioc));
+    return FALSE; /* unregister */
 }
 
 void exec_start_incoming_migration(const char *command, Error **errp)
 {
-    QEMUFile *f;
+    QIOChannel *ioc;
+    const char *argv[] = { "/bin/sh", "-c", command, NULL };
 
-    DPRINTF("Attempting to start an incoming migration\n");
-    f = qemu_popen_cmd(command, "r");
-    if(f == NULL) {
-        error_setg_errno(errp, errno, "failed to popen the migration source");
+    trace_migration_exec_incoming(command);
+    ioc = QIO_CHANNEL(qio_channel_command_new_spawn(argv,
+                                                    O_RDONLY,
+                                                    errp));
+    if (!ioc) {
         return;
     }
 
-    qemu_set_fd_handler(qemu_get_fd(f), exec_accept_incoming_migration, NULL,
-                        f);
+    qio_channel_add_watch(ioc,
+                          G_IO_IN,
+                          exec_accept_incoming_migration,
+                          NULL,
+                          NULL);
 }
diff --git a/migration/fd.c b/migration/fd.c
index 3d788bb297..fc5c9eee02 100644
--- a/migration/fd.c
+++ b/migration/fd.c
@@ -1,10 +1,11 @@
 /*
  * QEMU live migration via generic fd
  *
- * Copyright Red Hat, Inc. 2009
+ * Copyright Red Hat, Inc. 2009-2016
  *
  * Authors:
  *  Chris Lalancette <clalance@redhat.com>
+ *  Daniel P. Berrange <berrange@redhat.com>
  *
  * This work is licensed under the terms of the GNU GPL, version 2.  See
  * the COPYING file in the top-level directory.
@@ -16,75 +17,57 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
-#include "qemu/main-loop.h"
-#include "qemu/sockets.h"
 #include "migration/migration.h"
 #include "monitor/monitor.h"
-#include "migration/qemu-file.h"
-#include "block/block.h"
+#include "io/channel-util.h"
+#include "trace.h"
 
-//#define DEBUG_MIGRATION_FD
-
-#ifdef DEBUG_MIGRATION_FD
-#define DPRINTF(fmt, ...) \
-    do { printf("migration-fd: " fmt, ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTF(fmt, ...) \
-    do { } while (0)
-#endif
-
-static bool fd_is_socket(int fd)
-{
-    struct stat stat;
-    int ret = fstat(fd, &stat);
-    if (ret == -1) {
-        /* When in doubt say no */
-        return false;
-    }
-    return S_ISSOCK(stat.st_mode);
-}
 
 void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error **errp)
 {
+    QIOChannel *ioc;
     int fd = monitor_get_fd(cur_mon, fdname, errp);
     if (fd == -1) {
         return;
     }
 
-    if (fd_is_socket(fd)) {
-        s->to_dst_file = qemu_fopen_socket(fd, "wb");
-    } else {
-        s->to_dst_file = qemu_fdopen(fd, "wb");
+    trace_migration_fd_outgoing(fd);
+    ioc = qio_channel_new_fd(fd, errp);
+    if (!ioc) {
+        close(fd);
+        return;
     }
 
-    migrate_fd_connect(s);
+    migration_set_outgoing_channel(s, ioc, NULL);
+    object_unref(OBJECT(ioc));
 }
 
-static void fd_accept_incoming_migration(void *opaque)
+static gboolean fd_accept_incoming_migration(QIOChannel *ioc,
+                                             GIOCondition condition,
+                                             gpointer opaque)
 {
-    QEMUFile *f = opaque;
-
-    qemu_set_fd_handler(qemu_get_fd(f), NULL, NULL, NULL);
-    process_incoming_migration(f);
+    migration_set_incoming_channel(migrate_get_current(), ioc);
+    object_unref(OBJECT(ioc));
+    return FALSE; /* unregister */
 }
 
 void fd_start_incoming_migration(const char *infd, Error **errp)
 {
+    QIOChannel *ioc;
     int fd;
-    QEMUFile *f;
-
-    DPRINTF("Attempting to start an incoming migration via fd\n");
 
     fd = strtol(infd, NULL, 0);
-    if (fd_is_socket(fd)) {
-        f = qemu_fopen_socket(fd, "rb");
-    } else {
-        f = qemu_fdopen(fd, "rb");
-    }
-    if(f == NULL) {
-        error_setg_errno(errp, errno, "failed to open the source descriptor");
+    trace_migration_fd_incoming(fd);
+
+    ioc = qio_channel_new_fd(fd, errp);
+    if (!ioc) {
+        close(fd);
         return;
     }
 
-    qemu_set_fd_handler(fd, fd_accept_incoming_migration, NULL, f);
+    qio_channel_add_watch(ioc,
+                          G_IO_IN,
+                          fd_accept_incoming_migration,
+                          NULL,
+                          NULL);
 }
diff --git a/migration/migration.c b/migration/migration.c
index f5327e8c0a..7ecbadee6f 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -34,6 +34,8 @@
 #include "qom/cpu.h"
 #include "exec/memory.h"
 #include "exec/address-spaces.h"
+#include "io/channel-buffer.h"
+#include "io/channel-tls.h"
 
 #define MAX_THROTTLE  (32 << 20)      /* Migration transfer speed throttling */
 
@@ -81,16 +83,13 @@ MigrationState *migrate_get_current(void)
         .bandwidth_limit = MAX_THROTTLE,
         .xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE,
         .mbps = -1,
-        .parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL] =
-                DEFAULT_MIGRATE_COMPRESS_LEVEL,
-        .parameters[MIGRATION_PARAMETER_COMPRESS_THREADS] =
-                DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT,
-        .parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] =
-                DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT,
-        .parameters[MIGRATION_PARAMETER_CPU_THROTTLE_INITIAL] =
-                DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL,
-        .parameters[MIGRATION_PARAMETER_CPU_THROTTLE_INCREMENT] =
-                DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT,
+        .parameters = {
+            .compress_level = DEFAULT_MIGRATE_COMPRESS_LEVEL,
+            .compress_threads = DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT,
+            .decompress_threads = DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT,
+            .cpu_throttle_initial = DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL,
+            .cpu_throttle_increment = DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT,
+        },
     };
 
     if (!once) {
@@ -310,14 +309,12 @@ void qemu_start_incoming_migration(const char *uri, Error **errp)
     } else if (strstart(uri, "rdma:", &p)) {
         rdma_start_incoming_migration(p, errp);
 #endif
-#if !defined(WIN32)
     } else if (strstart(uri, "exec:", &p)) {
         exec_start_incoming_migration(p, errp);
     } else if (strstart(uri, "unix:", &p)) {
         unix_start_incoming_migration(p, errp);
     } else if (strstart(uri, "fd:", &p)) {
         fd_start_incoming_migration(p, errp);
-#endif
     } else {
         error_setg(errp, "unknown migration protocol: %s", uri);
     }
@@ -422,14 +419,60 @@ static void process_incoming_migration_co(void *opaque)
 void process_incoming_migration(QEMUFile *f)
 {
     Coroutine *co = qemu_coroutine_create(process_incoming_migration_co);
-    int fd = qemu_get_fd(f);
 
-    assert(fd != -1);
     migrate_decompress_threads_create();
-    qemu_set_nonblock(fd);
+    qemu_file_set_blocking(f, false);
     qemu_coroutine_enter(co, f);
 }
 
+
+void migration_set_incoming_channel(MigrationState *s,
+                                    QIOChannel *ioc)
+{
+    trace_migration_set_incoming_channel(
+        ioc, object_get_typename(OBJECT(ioc)));
+
+    if (s->parameters.tls_creds &&
+        !object_dynamic_cast(OBJECT(ioc),
+                             TYPE_QIO_CHANNEL_TLS)) {
+        Error *local_err = NULL;
+        migration_tls_set_incoming_channel(s, ioc, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+        }
+    } else {
+        QEMUFile *f = qemu_fopen_channel_input(ioc);
+        process_incoming_migration(f);
+    }
+}
+
+
+void migration_set_outgoing_channel(MigrationState *s,
+                                    QIOChannel *ioc,
+                                    const char *hostname)
+{
+    trace_migration_set_outgoing_channel(
+        ioc, object_get_typename(OBJECT(ioc)), hostname);
+
+    if (s->parameters.tls_creds &&
+        !object_dynamic_cast(OBJECT(ioc),
+                             TYPE_QIO_CHANNEL_TLS)) {
+        Error *local_err = NULL;
+        migration_tls_set_outgoing_channel(s, ioc, hostname, &local_err);
+        if (local_err) {
+            migrate_fd_error(s, local_err);
+            error_free(local_err);
+        }
+    } else {
+        QEMUFile *f = qemu_fopen_channel_output(ioc);
+
+        s->to_dst_file = f;
+
+        migrate_fd_connect(s);
+    }
+}
+
+
 /*
  * Send a message on the return channel back to the source
  * of the migration.
@@ -516,15 +559,13 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp)
     MigrationState *s = migrate_get_current();
 
     params = g_malloc0(sizeof(*params));
-    params->compress_level = s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL];
-    params->compress_threads =
-            s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS];
-    params->decompress_threads =
-            s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS];
-    params->cpu_throttle_initial =
-            s->parameters[MIGRATION_PARAMETER_CPU_THROTTLE_INITIAL];
-    params->cpu_throttle_increment =
-            s->parameters[MIGRATION_PARAMETER_CPU_THROTTLE_INCREMENT];
+    params->compress_level = s->parameters.compress_level;
+    params->compress_threads = s->parameters.compress_threads;
+    params->decompress_threads = s->parameters.decompress_threads;
+    params->cpu_throttle_initial = s->parameters.cpu_throttle_initial;
+    params->cpu_throttle_increment = s->parameters.cpu_throttle_increment;
+    params->tls_creds = g_strdup(s->parameters.tls_creds);
+    params->tls_hostname = g_strdup(s->parameters.tls_hostname);
 
     return params;
 }
@@ -672,6 +713,10 @@ MigrationInfo *qmp_query_migrate(Error **errp)
         break;
     case MIGRATION_STATUS_FAILED:
         info->has_status = true;
+        if (s->error) {
+            info->has_error_desc = true;
+            info->error_desc = g_strdup(error_get_pretty(s->error));
+        }
         break;
     case MIGRATION_STATUS_CANCELLED:
         info->has_status = true;
@@ -721,7 +766,12 @@ void qmp_migrate_set_parameters(bool has_compress_level,
                                 bool has_cpu_throttle_initial,
                                 int64_t cpu_throttle_initial,
                                 bool has_cpu_throttle_increment,
-                                int64_t cpu_throttle_increment, Error **errp)
+                                int64_t cpu_throttle_increment,
+                                bool has_tls_creds,
+                                const char *tls_creds,
+                                bool has_tls_hostname,
+                                const char *tls_hostname,
+                                Error **errp)
 {
     MigrationState *s = migrate_get_current();
 
@@ -758,26 +808,31 @@ void qmp_migrate_set_parameters(bool has_compress_level,
     }
 
     if (has_compress_level) {
-        s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL] = compress_level;
+        s->parameters.compress_level = compress_level;
     }
     if (has_compress_threads) {
-        s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS] = compress_threads;
+        s->parameters.compress_threads = compress_threads;
     }
     if (has_decompress_threads) {
-        s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] =
-                                                    decompress_threads;
+        s->parameters.decompress_threads = decompress_threads;
     }
     if (has_cpu_throttle_initial) {
-        s->parameters[MIGRATION_PARAMETER_CPU_THROTTLE_INITIAL] =
-                                                    cpu_throttle_initial;
+        s->parameters.cpu_throttle_initial = cpu_throttle_initial;
     }
-
     if (has_cpu_throttle_increment) {
-        s->parameters[MIGRATION_PARAMETER_CPU_THROTTLE_INCREMENT] =
-                                                    cpu_throttle_increment;
+        s->parameters.cpu_throttle_increment = cpu_throttle_increment;
+    }
+    if (has_tls_creds) {
+        g_free(s->parameters.tls_creds);
+        s->parameters.tls_creds = g_strdup(tls_creds);
+    }
+    if (has_tls_hostname) {
+        g_free(s->parameters.tls_hostname);
+        s->parameters.tls_hostname = g_strdup(tls_hostname);
     }
 }
 
+
 void qmp_migrate_start_postcopy(Error **errp)
 {
     MigrationState *s = migrate_get_current();
@@ -844,12 +899,15 @@ static void migrate_fd_cleanup(void *opaque)
     notifier_list_notify(&migration_state_notifiers, s);
 }
 
-void migrate_fd_error(MigrationState *s)
+void migrate_fd_error(MigrationState *s, const Error *error)
 {
-    trace_migrate_fd_error();
+    trace_migrate_fd_error(error ? error_get_pretty(error) : "");
     assert(s->to_dst_file == NULL);
     migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
                       MIGRATION_STATUS_FAILED);
+    if (!s->error) {
+        s->error = error_copy(error);
+    }
     notifier_list_notify(&migration_state_notifiers, s);
 }
 
@@ -948,6 +1006,8 @@ MigrationState *migrate_init(const MigrationParams *params)
     s->postcopy_after_devices = false;
     s->migration_thread_running = false;
     s->last_req_rb = NULL;
+    error_free(s->error);
+    s->error = NULL;
 
     migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
 
@@ -1040,14 +1100,12 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
     } else if (strstart(uri, "rdma:", &p)) {
         rdma_start_outgoing_migration(s, p, &local_err);
 #endif
-#if !defined(WIN32)
     } else if (strstart(uri, "exec:", &p)) {
         exec_start_outgoing_migration(s, p, &local_err);
     } else if (strstart(uri, "unix:", &p)) {
         unix_start_outgoing_migration(s, p, &local_err);
     } else if (strstart(uri, "fd:", &p)) {
         fd_start_outgoing_migration(s, p, &local_err);
-#endif
     } else {
         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri",
                    "a valid migration protocol");
@@ -1057,7 +1115,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
     }
 
     if (local_err) {
-        migrate_fd_error(s);
+        migrate_fd_error(s, local_err);
         error_propagate(errp, local_err);
         return;
     }
@@ -1170,7 +1228,7 @@ int migrate_compress_level(void)
 
     s = migrate_get_current();
 
-    return s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL];
+    return s->parameters.compress_level;
 }
 
 int migrate_compress_threads(void)
@@ -1179,7 +1237,7 @@ int migrate_compress_threads(void)
 
     s = migrate_get_current();
 
-    return s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS];
+    return s->parameters.compress_threads;
 }
 
 int migrate_decompress_threads(void)
@@ -1188,7 +1246,7 @@ int migrate_decompress_threads(void)
 
     s = migrate_get_current();
 
-    return s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS];
+    return s->parameters.decompress_threads;
 }
 
 bool migrate_use_events(void)
@@ -1429,7 +1487,8 @@ static int await_return_path_close_on_source(MigrationState *ms)
 static int postcopy_start(MigrationState *ms, bool *old_vm_running)
 {
     int ret;
-    const QEMUSizedBuffer *qsb;
+    QIOChannelBuffer *bioc;
+    QEMUFile *fb;
     int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
     migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE,
                       MIGRATION_STATUS_POSTCOPY_ACTIVE);
@@ -1488,11 +1547,9 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running)
      * So we wrap the device state up in a package with a length at the start;
      * to do this we use a qemu_buf to hold the whole of the device state.
      */
-    QEMUFile *fb = qemu_bufopen("w", NULL);
-    if (!fb) {
-        error_report("Failed to create buffered file");
-        goto fail;
-    }
+    bioc = qio_channel_buffer_new(4096);
+    fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc));
+    object_unref(OBJECT(bioc));
 
     /*
      * Make sure the receiver can get incoming pages before we send the rest
@@ -1506,10 +1563,9 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running)
     qemu_savevm_send_postcopy_run(fb);
 
     /* <><> end of stuff going into the package */
-    qsb = qemu_buf_get(fb);
 
     /* Now send that blob */
-    if (qemu_savevm_send_packaged(ms->to_dst_file, qsb)) {
+    if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) {
         goto fail_closefb;
     }
     qemu_fclose(fb);
@@ -1793,6 +1849,7 @@ void migrate_fd_connect(MigrationState *s)
     s->expected_downtime = max_downtime/1000000;
     s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s);
 
+    qemu_file_set_blocking(s->to_dst_file, true);
     qemu_file_set_rate_limit(s->to_dst_file,
                              s->bandwidth_limit / XFER_LIMIT_RATIO);
 
diff --git a/migration/qemu-file-buf.c b/migration/qemu-file-buf.c
deleted file mode 100644
index 7b8e78e99c..0000000000
--- a/migration/qemu-file-buf.c
+++ /dev/null
@@ -1,464 +0,0 @@
-/*
- * QEMU System Emulator
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- * Copyright (c) 2014 IBM Corp.
- *
- * Authors:
- *  Stefan Berger <stefanb@linux.vnet.ibm.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu/osdep.h"
-#include "qemu-common.h"
-#include "qemu/error-report.h"
-#include "qemu/iov.h"
-#include "qemu/sockets.h"
-#include "qemu/coroutine.h"
-#include "migration/migration.h"
-#include "migration/qemu-file.h"
-#include "migration/qemu-file-internal.h"
-#include "trace.h"
-
-#define QSB_CHUNK_SIZE      (1 << 10)
-#define QSB_MAX_CHUNK_SIZE  (16 * QSB_CHUNK_SIZE)
-
-/**
- * Create a QEMUSizedBuffer
- * This type of buffer uses scatter-gather lists internally and
- * can grow to any size. Any data array in the scatter-gather list
- * can hold different amount of bytes.
- *
- * @buffer: Optional buffer to copy into the QSB
- * @len: size of initial buffer; if @buffer is given, buffer must
- *       hold at least len bytes
- *
- * Returns a pointer to a QEMUSizedBuffer or NULL on allocation failure
- */
-QEMUSizedBuffer *qsb_create(const uint8_t *buffer, size_t len)
-{
-    QEMUSizedBuffer *qsb;
-    size_t alloc_len, num_chunks, i, to_copy;
-    size_t chunk_size = (len > QSB_MAX_CHUNK_SIZE)
-                        ? QSB_MAX_CHUNK_SIZE
-                        : QSB_CHUNK_SIZE;
-
-    num_chunks = DIV_ROUND_UP(len ? len : QSB_CHUNK_SIZE, chunk_size);
-    alloc_len = num_chunks * chunk_size;
-
-    qsb = g_try_new0(QEMUSizedBuffer, 1);
-    if (!qsb) {
-        return NULL;
-    }
-
-    qsb->iov = g_try_new0(struct iovec, num_chunks);
-    if (!qsb->iov) {
-        g_free(qsb);
-        return NULL;
-    }
-
-    qsb->n_iov = num_chunks;
-
-    for (i = 0; i < num_chunks; i++) {
-        qsb->iov[i].iov_base = g_try_malloc0(chunk_size);
-        if (!qsb->iov[i].iov_base) {
-            /* qsb_free is safe since g_free can cope with NULL */
-            qsb_free(qsb);
-            return NULL;
-        }
-
-        qsb->iov[i].iov_len = chunk_size;
-        if (buffer) {
-            to_copy = (len - qsb->used) > chunk_size
-                      ? chunk_size : (len - qsb->used);
-            memcpy(qsb->iov[i].iov_base, &buffer[qsb->used], to_copy);
-            qsb->used += to_copy;
-        }
-    }
-
-    qsb->size = alloc_len;
-
-    return qsb;
-}
-
-/**
- * Free the QEMUSizedBuffer
- *
- * @qsb: The QEMUSizedBuffer to free
- */
-void qsb_free(QEMUSizedBuffer *qsb)
-{
-    size_t i;
-
-    if (!qsb) {
-        return;
-    }
-
-    for (i = 0; i < qsb->n_iov; i++) {
-        g_free(qsb->iov[i].iov_base);
-    }
-    g_free(qsb->iov);
-    g_free(qsb);
-}
-
-/**
- * Get the number of used bytes in the QEMUSizedBuffer
- *
- * @qsb: A QEMUSizedBuffer
- *
- * Returns the number of bytes currently used in this buffer
- */
-size_t qsb_get_length(const QEMUSizedBuffer *qsb)
-{
-    return qsb->used;
-}
-
-/**
- * Set the length of the buffer; the primary usage of this
- * function is to truncate the number of used bytes in the buffer.
- * The size will not be extended beyond the current number of
- * allocated bytes in the QEMUSizedBuffer.
- *
- * @qsb: A QEMUSizedBuffer
- * @new_len: The new length of bytes in the buffer
- *
- * Returns the number of bytes the buffer was truncated or extended
- * to.
- */
-size_t qsb_set_length(QEMUSizedBuffer *qsb, size_t new_len)
-{
-    if (new_len <= qsb->size) {
-        qsb->used = new_len;
-    } else {
-        qsb->used = qsb->size;
-    }
-    return qsb->used;
-}
-
-/**
- * Get the iovec that holds the data for a given position @pos.
- *
- * @qsb: A QEMUSizedBuffer
- * @pos: The index of a byte in the buffer
- * @d_off: Pointer to an offset that this function will indicate
- *         at what position within the returned iovec the byte
- *         is to be found
- *
- * Returns the index of the iovec that holds the byte at the given
- * index @pos in the byte stream; a negative number if the iovec
- * for the given position @pos does not exist.
- */
-static ssize_t qsb_get_iovec(const QEMUSizedBuffer *qsb,
-                             off_t pos, off_t *d_off)
-{
-    ssize_t i;
-    off_t curr = 0;
-
-    if (pos > qsb->used) {
-        return -1;
-    }
-
-    for (i = 0; i < qsb->n_iov; i++) {
-        if (curr + qsb->iov[i].iov_len > pos) {
-            *d_off = pos - curr;
-            return i;
-        }
-        curr += qsb->iov[i].iov_len;
-    }
-    return -1;
-}
-
-/*
- * Convert the QEMUSizedBuffer into a flat buffer.
- *
- * Note: If at all possible, try to avoid this function since it
- *       may unnecessarily copy memory around.
- *
- * @qsb: pointer to QEMUSizedBuffer
- * @start: offset to start at
- * @count: number of bytes to copy
- * @buf: a pointer to a buffer to write into (at least @count bytes)
- *
- * Returns the number of bytes copied into the output buffer
- */
-ssize_t qsb_get_buffer(const QEMUSizedBuffer *qsb, off_t start,
-                       size_t count, uint8_t *buffer)
-{
-    const struct iovec *iov;
-    size_t to_copy, all_copy;
-    ssize_t index;
-    off_t s_off;
-    off_t d_off = 0;
-    char *s;
-
-    if (start > qsb->used) {
-        return 0;
-    }
-
-    all_copy = qsb->used - start;
-    if (all_copy > count) {
-        all_copy = count;
-    } else {
-        count = all_copy;
-    }
-
-    index = qsb_get_iovec(qsb, start, &s_off);
-    if (index < 0) {
-        return 0;
-    }
-
-    while (all_copy > 0) {
-        iov = &qsb->iov[index];
-
-        s = iov->iov_base;
-
-        to_copy = iov->iov_len - s_off;
-        if (to_copy > all_copy) {
-            to_copy = all_copy;
-        }
-        memcpy(&buffer[d_off], &s[s_off], to_copy);
-
-        d_off += to_copy;
-        all_copy -= to_copy;
-
-        s_off = 0;
-        index++;
-    }
-
-    return count;
-}
-
-/**
- * Grow the QEMUSizedBuffer to the given size and allocate
- * memory for it.
- *
- * @qsb: A QEMUSizedBuffer
- * @new_size: The new size of the buffer
- *
- * Return:
- *    a negative error code in case of memory allocation failure
- * or
- *    the new size of the buffer. The returned size may be greater or equal
- *    to @new_size.
- */
-static ssize_t qsb_grow(QEMUSizedBuffer *qsb, size_t new_size)
-{
-    size_t needed_chunks, i;
-
-    if (qsb->size < new_size) {
-        struct iovec *new_iov;
-        size_t size_diff = new_size - qsb->size;
-        size_t chunk_size = (size_diff > QSB_MAX_CHUNK_SIZE)
-                             ? QSB_MAX_CHUNK_SIZE : QSB_CHUNK_SIZE;
-
-        needed_chunks = DIV_ROUND_UP(size_diff, chunk_size);
-
-        new_iov = g_try_new(struct iovec, qsb->n_iov + needed_chunks);
-        if (new_iov == NULL) {
-            return -ENOMEM;
-        }
-
-        /* Allocate new chunks as needed into new_iov */
-        for (i = qsb->n_iov; i < qsb->n_iov + needed_chunks; i++) {
-            new_iov[i].iov_base = g_try_malloc0(chunk_size);
-            new_iov[i].iov_len = chunk_size;
-            if (!new_iov[i].iov_base) {
-                size_t j;
-
-                /* Free previously allocated new chunks */
-                for (j = qsb->n_iov; j < i; j++) {
-                    g_free(new_iov[j].iov_base);
-                }
-                g_free(new_iov);
-
-                return -ENOMEM;
-            }
-        }
-
-        /*
-         * Now we can't get any allocation errors, copy over to new iov
-         * and switch.
-         */
-        for (i = 0; i < qsb->n_iov; i++) {
-            new_iov[i] = qsb->iov[i];
-        }
-
-        qsb->n_iov += needed_chunks;
-        g_free(qsb->iov);
-        qsb->iov = new_iov;
-        qsb->size += (needed_chunks * chunk_size);
-    }
-
-    return qsb->size;
-}
-
-/**
- * Write into the QEMUSizedBuffer at a given position and a given
- * number of bytes. This function will automatically grow the
- * QEMUSizedBuffer.
- *
- * @qsb: A QEMUSizedBuffer
- * @source: A byte array to copy data from
- * @pos: The position within the @qsb to write data to
- * @size: The number of bytes to copy into the @qsb
- *
- * Returns @size or a negative error code in case of memory allocation failure,
- *           or with an invalid 'pos'
- */
-ssize_t qsb_write_at(QEMUSizedBuffer *qsb, const uint8_t *source,
-                     off_t pos, size_t count)
-{
-    ssize_t rc = qsb_grow(qsb, pos + count);
-    size_t to_copy;
-    size_t all_copy = count;
-    const struct iovec *iov;
-    ssize_t index;
-    char *dest;
-    off_t d_off, s_off = 0;
-
-    if (rc < 0) {
-        return rc;
-    }
-
-    if (pos + count > qsb->used) {
-        qsb->used = pos + count;
-    }
-
-    index = qsb_get_iovec(qsb, pos, &d_off);
-    if (index < 0) {
-        return -EINVAL;
-    }
-
-    while (all_copy > 0) {
-        iov = &qsb->iov[index];
-
-        dest = iov->iov_base;
-
-        to_copy = iov->iov_len - d_off;
-        if (to_copy > all_copy) {
-            to_copy = all_copy;
-        }
-
-        memcpy(&dest[d_off], &source[s_off], to_copy);
-
-        s_off += to_copy;
-        all_copy -= to_copy;
-
-        d_off = 0;
-        index++;
-    }
-
-    return count;
-}
-
-typedef struct QEMUBuffer {
-    QEMUSizedBuffer *qsb;
-    QEMUFile *file;
-    bool qsb_allocated;
-} QEMUBuffer;
-
-static ssize_t buf_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
-                              size_t size)
-{
-    QEMUBuffer *s = opaque;
-    ssize_t len = qsb_get_length(s->qsb) - pos;
-
-    if (len <= 0) {
-        return 0;
-    }
-
-    if (len > size) {
-        len = size;
-    }
-    return qsb_get_buffer(s->qsb, pos, len, buf);
-}
-
-static ssize_t buf_put_buffer(void *opaque, const uint8_t *buf,
-                              int64_t pos, size_t size)
-{
-    QEMUBuffer *s = opaque;
-
-    return qsb_write_at(s->qsb, buf, pos, size);
-}
-
-static int buf_close(void *opaque)
-{
-    QEMUBuffer *s = opaque;
-
-    if (s->qsb_allocated) {
-        qsb_free(s->qsb);
-    }
-
-    g_free(s);
-
-    return 0;
-}
-
-const QEMUSizedBuffer *qemu_buf_get(QEMUFile *f)
-{
-    QEMUBuffer *p;
-
-    qemu_fflush(f);
-
-    p = f->opaque;
-
-    return p->qsb;
-}
-
-static const QEMUFileOps buf_read_ops = {
-    .get_buffer = buf_get_buffer,
-    .close =      buf_close,
-};
-
-static const QEMUFileOps buf_write_ops = {
-    .put_buffer = buf_put_buffer,
-    .close =      buf_close,
-};
-
-QEMUFile *qemu_bufopen(const char *mode, QEMUSizedBuffer *input)
-{
-    QEMUBuffer *s;
-
-    if (mode == NULL || (mode[0] != 'r' && mode[0] != 'w') ||
-        mode[1] != '\0') {
-        error_report("qemu_bufopen: Argument validity check failed");
-        return NULL;
-    }
-
-    s = g_new0(QEMUBuffer, 1);
-    s->qsb = input;
-
-    if (s->qsb == NULL) {
-        s->qsb = qsb_create(NULL, 0);
-        s->qsb_allocated = true;
-    }
-    if (!s->qsb) {
-        g_free(s);
-        error_report("qemu_bufopen: qsb_create failed");
-        return NULL;
-    }
-
-
-    if (mode[0] == 'r') {
-        s->file = qemu_fopen_ops(s, &buf_read_ops);
-    } else {
-        s->file = qemu_fopen_ops(s, &buf_write_ops);
-    }
-    return s->file;
-}
diff --git a/migration/qemu-file-channel.c b/migration/qemu-file-channel.c
new file mode 100644
index 0000000000..45c13f1028
--- /dev/null
+++ b/migration/qemu-file-channel.c
@@ -0,0 +1,180 @@
+/*
+ * QEMUFile backend for QIOChannel objects
+ *
+ * Copyright (c) 2015-2016 Red Hat, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "migration/qemu-file.h"
+#include "io/channel-socket.h"
+#include "qemu/iov.h"
+
+
+static ssize_t channel_writev_buffer(void *opaque,
+                                     struct iovec *iov,
+                                     int iovcnt,
+                                     int64_t pos)
+{
+    QIOChannel *ioc = QIO_CHANNEL(opaque);
+    ssize_t done = 0;
+    struct iovec *local_iov = g_new(struct iovec, iovcnt);
+    struct iovec *local_iov_head = local_iov;
+    unsigned int nlocal_iov = iovcnt;
+
+    nlocal_iov = iov_copy(local_iov, nlocal_iov,
+                          iov, iovcnt,
+                          0, iov_size(iov, iovcnt));
+
+    while (nlocal_iov > 0) {
+        ssize_t len;
+        len = qio_channel_writev(ioc, local_iov, nlocal_iov, NULL);
+        if (len == QIO_CHANNEL_ERR_BLOCK) {
+            qio_channel_wait(ioc, G_IO_OUT);
+            continue;
+        }
+        if (len < 0) {
+            /* XXX handle Error objects */
+            done = -EIO;
+            goto cleanup;
+        }
+
+        iov_discard_front(&local_iov, &nlocal_iov, len);
+        done += len;
+    }
+
+ cleanup:
+    g_free(local_iov_head);
+    return done;
+}
+
+
+static ssize_t channel_get_buffer(void *opaque,
+                                  uint8_t *buf,
+                                  int64_t pos,
+                                  size_t size)
+{
+    QIOChannel *ioc = QIO_CHANNEL(opaque);
+    ssize_t ret;
+
+    do {
+        ret = qio_channel_read(ioc, (char *)buf, size, NULL);
+        if (ret < 0) {
+            if (ret == QIO_CHANNEL_ERR_BLOCK) {
+                qio_channel_yield(ioc, G_IO_IN);
+            } else {
+                /* XXX handle Error * object */
+                return -EIO;
+            }
+        }
+    } while (ret == QIO_CHANNEL_ERR_BLOCK);
+
+    return ret;
+}
+
+
+static int channel_close(void *opaque)
+{
+    QIOChannel *ioc = QIO_CHANNEL(opaque);
+    qio_channel_close(ioc, NULL);
+    object_unref(OBJECT(ioc));
+    return 0;
+}
+
+
+static int channel_shutdown(void *opaque,
+                            bool rd,
+                            bool wr)
+{
+    QIOChannel *ioc = QIO_CHANNEL(opaque);
+
+    if (qio_channel_has_feature(ioc,
+                                QIO_CHANNEL_FEATURE_SHUTDOWN)) {
+        QIOChannelShutdown mode;
+        if (rd && wr) {
+            mode = QIO_CHANNEL_SHUTDOWN_BOTH;
+        } else if (rd) {
+            mode = QIO_CHANNEL_SHUTDOWN_READ;
+        } else {
+            mode = QIO_CHANNEL_SHUTDOWN_WRITE;
+        }
+        if (qio_channel_shutdown(ioc, mode, NULL) < 0) {
+            /* XXX handler Error * object */
+            return -EIO;
+        }
+    }
+    return 0;
+}
+
+
+static int channel_set_blocking(void *opaque,
+                                bool enabled)
+{
+    QIOChannel *ioc = QIO_CHANNEL(opaque);
+
+    if (qio_channel_set_blocking(ioc, enabled, NULL) < 0) {
+        return -1;
+    }
+    return 0;
+}
+
+static QEMUFile *channel_get_input_return_path(void *opaque)
+{
+    QIOChannel *ioc = QIO_CHANNEL(opaque);
+
+    return qemu_fopen_channel_output(ioc);
+}
+
+static QEMUFile *channel_get_output_return_path(void *opaque)
+{
+    QIOChannel *ioc = QIO_CHANNEL(opaque);
+
+    return qemu_fopen_channel_input(ioc);
+}
+
+static const QEMUFileOps channel_input_ops = {
+    .get_buffer = channel_get_buffer,
+    .close = channel_close,
+    .shut_down = channel_shutdown,
+    .set_blocking = channel_set_blocking,
+    .get_return_path = channel_get_input_return_path,
+};
+
+
+static const QEMUFileOps channel_output_ops = {
+    .writev_buffer = channel_writev_buffer,
+    .close = channel_close,
+    .shut_down = channel_shutdown,
+    .set_blocking = channel_set_blocking,
+    .get_return_path = channel_get_output_return_path,
+};
+
+
+QEMUFile *qemu_fopen_channel_input(QIOChannel *ioc)
+{
+    object_ref(OBJECT(ioc));
+    return qemu_fopen_ops(ioc, &channel_input_ops);
+}
+
+QEMUFile *qemu_fopen_channel_output(QIOChannel *ioc)
+{
+    object_ref(OBJECT(ioc));
+    return qemu_fopen_ops(ioc, &channel_output_ops);
+}
diff --git a/migration/qemu-file-internal.h b/migration/qemu-file-internal.h
deleted file mode 100644
index d95e8538e7..0000000000
--- a/migration/qemu-file-internal.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * QEMU System Emulator
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#ifndef QEMU_FILE_INTERNAL_H
-#define QEMU_FILE_INTERNAL_H 1
-
-#include "qemu-common.h"
-#include "qemu/iov.h"
-
-#define IO_BUF_SIZE 32768
-#define MAX_IOV_SIZE MIN(IOV_MAX, 64)
-
-struct QEMUFile {
-    const QEMUFileOps *ops;
-    void *opaque;
-
-    int64_t bytes_xfer;
-    int64_t xfer_limit;
-
-    int64_t pos; /* start of buffer when writing, end of buffer
-                    when reading */
-    int buf_index;
-    int buf_size; /* 0 when writing */
-    uint8_t buf[IO_BUF_SIZE];
-
-    struct iovec iov[MAX_IOV_SIZE];
-    unsigned int iovcnt;
-
-    int last_error;
-};
-
-#endif
diff --git a/migration/qemu-file-stdio.c b/migration/qemu-file-stdio.c
deleted file mode 100644
index f402e8f708..0000000000
--- a/migration/qemu-file-stdio.c
+++ /dev/null
@@ -1,196 +0,0 @@
-/*
- * QEMU System Emulator
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu/osdep.h"
-#include "qemu-common.h"
-#include "qemu/coroutine.h"
-#include "migration/qemu-file.h"
-
-typedef struct QEMUFileStdio {
-    FILE *stdio_file;
-    QEMUFile *file;
-} QEMUFileStdio;
-
-static int stdio_get_fd(void *opaque)
-{
-    QEMUFileStdio *s = opaque;
-
-    return fileno(s->stdio_file);
-}
-
-static ssize_t stdio_put_buffer(void *opaque, const uint8_t *buf, int64_t pos,
-                                size_t size)
-{
-    QEMUFileStdio *s = opaque;
-    size_t res;
-
-    res = fwrite(buf, 1, size, s->stdio_file);
-
-    if (res != size) {
-        return -errno;
-    }
-    return res;
-}
-
-static ssize_t stdio_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
-                                size_t size)
-{
-    QEMUFileStdio *s = opaque;
-    FILE *fp = s->stdio_file;
-    ssize_t bytes;
-
-    for (;;) {
-        clearerr(fp);
-        bytes = fread(buf, 1, size, fp);
-        if (bytes != 0 || !ferror(fp)) {
-            break;
-        }
-        if (errno == EAGAIN) {
-            yield_until_fd_readable(fileno(fp));
-        } else if (errno != EINTR) {
-            break;
-        }
-    }
-    return bytes;
-}
-
-static int stdio_pclose(void *opaque)
-{
-    QEMUFileStdio *s = opaque;
-    int ret;
-    ret = pclose(s->stdio_file);
-    if (ret == -1) {
-        ret = -errno;
-    } else if (!WIFEXITED(ret) || WEXITSTATUS(ret) != 0) {
-        /* close succeeded, but non-zero exit code: */
-        ret = -EIO; /* fake errno value */
-    }
-    g_free(s);
-    return ret;
-}
-
-static int stdio_fclose(void *opaque)
-{
-    QEMUFileStdio *s = opaque;
-    int ret = 0;
-
-    if (qemu_file_is_writable(s->file)) {
-        int fd = fileno(s->stdio_file);
-        struct stat st;
-
-        ret = fstat(fd, &st);
-        if (ret == 0 && S_ISREG(st.st_mode)) {
-            /*
-             * If the file handle is a regular file make sure the
-             * data is flushed to disk before signaling success.
-             */
-            ret = fsync(fd);
-            if (ret != 0) {
-                ret = -errno;
-                return ret;
-            }
-        }
-    }
-    if (fclose(s->stdio_file) == EOF) {
-        ret = -errno;
-    }
-    g_free(s);
-    return ret;
-}
-
-static const QEMUFileOps stdio_pipe_read_ops = {
-    .get_fd =     stdio_get_fd,
-    .get_buffer = stdio_get_buffer,
-    .close =      stdio_pclose
-};
-
-static const QEMUFileOps stdio_pipe_write_ops = {
-    .get_fd =     stdio_get_fd,
-    .put_buffer = stdio_put_buffer,
-    .close =      stdio_pclose
-};
-
-QEMUFile *qemu_popen_cmd(const char *command, const char *mode)
-{
-    FILE *stdio_file;
-    QEMUFileStdio *s;
-
-    if (mode == NULL || (mode[0] != 'r' && mode[0] != 'w') || mode[1] != 0) {
-        fprintf(stderr, "qemu_popen: Argument validity check failed\n");
-        return NULL;
-    }
-
-    stdio_file = popen(command, mode);
-    if (stdio_file == NULL) {
-        return NULL;
-    }
-
-    s = g_new0(QEMUFileStdio, 1);
-
-    s->stdio_file = stdio_file;
-
-    if (mode[0] == 'r') {
-        s->file = qemu_fopen_ops(s, &stdio_pipe_read_ops);
-    } else {
-        s->file = qemu_fopen_ops(s, &stdio_pipe_write_ops);
-    }
-    return s->file;
-}
-
-static const QEMUFileOps stdio_file_read_ops = {
-    .get_fd =     stdio_get_fd,
-    .get_buffer = stdio_get_buffer,
-    .close =      stdio_fclose
-};
-
-static const QEMUFileOps stdio_file_write_ops = {
-    .get_fd =     stdio_get_fd,
-    .put_buffer = stdio_put_buffer,
-    .close =      stdio_fclose
-};
-
-QEMUFile *qemu_fopen(const char *filename, const char *mode)
-{
-    QEMUFileStdio *s;
-
-    if (qemu_file_mode_is_not_valid(mode)) {
-        return NULL;
-    }
-
-    s = g_new0(QEMUFileStdio, 1);
-
-    s->stdio_file = fopen(filename, mode);
-    if (!s->stdio_file) {
-        goto fail;
-    }
-
-    if (mode[0] == 'w') {
-        s->file = qemu_fopen_ops(s, &stdio_file_write_ops);
-    } else {
-        s->file = qemu_fopen_ops(s, &stdio_file_read_ops);
-    }
-    return s->file;
-fail:
-    g_free(s);
-    return NULL;
-}
diff --git a/migration/qemu-file-unix.c b/migration/qemu-file-unix.c
deleted file mode 100644
index 4474e18ff8..0000000000
--- a/migration/qemu-file-unix.c
+++ /dev/null
@@ -1,323 +0,0 @@
-/*
- * QEMU System Emulator
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu/osdep.h"
-#include "qemu-common.h"
-#include "qemu/error-report.h"
-#include "qemu/iov.h"
-#include "qemu/sockets.h"
-#include "qemu/coroutine.h"
-#include "migration/qemu-file.h"
-#include "migration/qemu-file-internal.h"
-
-typedef struct QEMUFileSocket {
-    int fd;
-    QEMUFile *file;
-} QEMUFileSocket;
-
-static ssize_t socket_writev_buffer(void *opaque, struct iovec *iov, int iovcnt,
-                                    int64_t pos)
-{
-    QEMUFileSocket *s = opaque;
-    ssize_t len;
-    ssize_t size = iov_size(iov, iovcnt);
-    ssize_t offset = 0;
-    int     err;
-
-    while (size > 0) {
-        len = iov_send(s->fd, iov, iovcnt, offset, size);
-
-        if (len > 0) {
-            size -= len;
-            offset += len;
-        }
-
-        if (size > 0) {
-            if (errno != EAGAIN && errno != EWOULDBLOCK) {
-                error_report("socket_writev_buffer: Got err=%d for (%zu/%zu)",
-                             errno, (size_t)size, (size_t)len);
-                /*
-                 * If I've already sent some but only just got the error, I
-                 * could return the amount validly sent so far and wait for the
-                 * next call to report the error, but I'd rather flag the error
-                 * immediately.
-                 */
-                return -errno;
-            }
-
-            /* Emulate blocking */
-            GPollFD pfd;
-
-            pfd.fd = s->fd;
-            pfd.events = G_IO_OUT | G_IO_ERR;
-            pfd.revents = 0;
-            TFR(err = g_poll(&pfd, 1, -1 /* no timeout */));
-            /* Errors other than EINTR intentionally ignored */
-        }
-     }
-
-    return offset;
-}
-
-static int socket_get_fd(void *opaque)
-{
-    QEMUFileSocket *s = opaque;
-
-    return s->fd;
-}
-
-static ssize_t socket_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
-                                 size_t size)
-{
-    QEMUFileSocket *s = opaque;
-    ssize_t len;
-
-    for (;;) {
-        len = qemu_recv(s->fd, buf, size, 0);
-        if (len != -1) {
-            break;
-        }
-        if (errno == EAGAIN) {
-            yield_until_fd_readable(s->fd);
-        } else if (errno != EINTR) {
-            break;
-        }
-    }
-
-    if (len == -1) {
-        len = -errno;
-    }
-    return len;
-}
-
-static int socket_close(void *opaque)
-{
-    QEMUFileSocket *s = opaque;
-    closesocket(s->fd);
-    g_free(s);
-    return 0;
-}
-
-static int socket_shutdown(void *opaque, bool rd, bool wr)
-{
-    QEMUFileSocket *s = opaque;
-
-    if (shutdown(s->fd, rd ? (wr ? SHUT_RDWR : SHUT_RD) : SHUT_WR)) {
-        return -errno;
-    } else {
-        return 0;
-    }
-}
-
-static int socket_return_close(void *opaque)
-{
-    QEMUFileSocket *s = opaque;
-    /*
-     * Note: We don't close the socket, that should be done by the forward
-     * path.
-     */
-    g_free(s);
-    return 0;
-}
-
-static const QEMUFileOps socket_return_read_ops = {
-    .get_fd          = socket_get_fd,
-    .get_buffer      = socket_get_buffer,
-    .close           = socket_return_close,
-    .shut_down       = socket_shutdown,
-};
-
-static const QEMUFileOps socket_return_write_ops = {
-    .get_fd          = socket_get_fd,
-    .writev_buffer   = socket_writev_buffer,
-    .close           = socket_return_close,
-    .shut_down       = socket_shutdown,
-};
-
-/*
- * Give a QEMUFile* off the same socket but data in the opposite
- * direction.
- */
-static QEMUFile *socket_get_return_path(void *opaque)
-{
-    QEMUFileSocket *forward = opaque;
-    QEMUFileSocket *reverse;
-
-    if (qemu_file_get_error(forward->file)) {
-        /* If the forward file is in error, don't try and open a return */
-        return NULL;
-    }
-
-    reverse = g_malloc0(sizeof(QEMUFileSocket));
-    reverse->fd = forward->fd;
-    /* I don't think there's a better way to tell which direction 'this' is */
-    if (forward->file->ops->get_buffer != NULL) {
-        /* being called from the read side, so we need to be able to write */
-        return qemu_fopen_ops(reverse, &socket_return_write_ops);
-    } else {
-        return qemu_fopen_ops(reverse, &socket_return_read_ops);
-    }
-}
-
-static ssize_t unix_writev_buffer(void *opaque, struct iovec *iov, int iovcnt,
-                                  int64_t pos)
-{
-    QEMUFileSocket *s = opaque;
-    ssize_t len, offset;
-    ssize_t size = iov_size(iov, iovcnt);
-    ssize_t total = 0;
-
-    assert(iovcnt > 0);
-    offset = 0;
-    while (size > 0) {
-        /* Find the next start position; skip all full-sized vector elements  */
-        while (offset >= iov[0].iov_len) {
-            offset -= iov[0].iov_len;
-            iov++, iovcnt--;
-        }
-
-        /* skip `offset' bytes from the (now) first element, undo it on exit */
-        assert(iovcnt > 0);
-        iov[0].iov_base += offset;
-        iov[0].iov_len -= offset;
-
-        do {
-            len = writev(s->fd, iov, iovcnt);
-        } while (len == -1 && errno == EINTR);
-        if (len == -1) {
-            return -errno;
-        }
-
-        /* Undo the changes above */
-        iov[0].iov_base -= offset;
-        iov[0].iov_len += offset;
-
-        /* Prepare for the next iteration */
-        offset += len;
-        total += len;
-        size -= len;
-    }
-
-    return total;
-}
-
-static ssize_t unix_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
-                              size_t size)
-{
-    QEMUFileSocket *s = opaque;
-    ssize_t len;
-
-    for (;;) {
-        len = read(s->fd, buf, size);
-        if (len != -1) {
-            break;
-        }
-        if (errno == EAGAIN) {
-            yield_until_fd_readable(s->fd);
-        } else if (errno != EINTR) {
-            break;
-        }
-    }
-
-    if (len == -1) {
-        len = -errno;
-    }
-    return len;
-}
-
-static int unix_close(void *opaque)
-{
-    QEMUFileSocket *s = opaque;
-    close(s->fd);
-    g_free(s);
-    return 0;
-}
-
-static const QEMUFileOps unix_read_ops = {
-    .get_fd =     socket_get_fd,
-    .get_buffer = unix_get_buffer,
-    .close =      unix_close
-};
-
-static const QEMUFileOps unix_write_ops = {
-    .get_fd =     socket_get_fd,
-    .writev_buffer = unix_writev_buffer,
-    .close =      unix_close
-};
-
-QEMUFile *qemu_fdopen(int fd, const char *mode)
-{
-    QEMUFileSocket *s;
-
-    if (mode == NULL ||
-        (mode[0] != 'r' && mode[0] != 'w') ||
-        mode[1] != 'b' || mode[2] != 0) {
-        fprintf(stderr, "qemu_fdopen: Argument validity check failed\n");
-        return NULL;
-    }
-
-    s = g_new0(QEMUFileSocket, 1);
-    s->fd = fd;
-
-    if (mode[0] == 'r') {
-        s->file = qemu_fopen_ops(s, &unix_read_ops);
-    } else {
-        s->file = qemu_fopen_ops(s, &unix_write_ops);
-    }
-    return s->file;
-}
-
-static const QEMUFileOps socket_read_ops = {
-    .get_fd          = socket_get_fd,
-    .get_buffer      = socket_get_buffer,
-    .close           = socket_close,
-    .shut_down       = socket_shutdown,
-    .get_return_path = socket_get_return_path
-};
-
-static const QEMUFileOps socket_write_ops = {
-    .get_fd          = socket_get_fd,
-    .writev_buffer   = socket_writev_buffer,
-    .close           = socket_close,
-    .shut_down       = socket_shutdown,
-    .get_return_path = socket_get_return_path
-};
-
-QEMUFile *qemu_fopen_socket(int fd, const char *mode)
-{
-    QEMUFileSocket *s;
-
-    if (qemu_file_mode_is_not_valid(mode)) {
-        return NULL;
-    }
-
-    s = g_new0(QEMUFileSocket, 1);
-    s->fd = fd;
-    if (mode[0] == 'w') {
-        qemu_set_block(s->fd);
-        s->file = qemu_fopen_ops(s, &socket_write_ops);
-    } else {
-        s->file = qemu_fopen_ops(s, &socket_read_ops);
-    }
-    return s->file;
-}
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 6f4a1299b3..8aea1c7094 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -30,9 +30,31 @@
 #include "qemu/coroutine.h"
 #include "migration/migration.h"
 #include "migration/qemu-file.h"
-#include "migration/qemu-file-internal.h"
 #include "trace.h"
 
+#define IO_BUF_SIZE 32768
+#define MAX_IOV_SIZE MIN(IOV_MAX, 64)
+
+struct QEMUFile {
+    const QEMUFileOps *ops;
+    const QEMUFileHooks *hooks;
+    void *opaque;
+
+    int64_t bytes_xfer;
+    int64_t xfer_limit;
+
+    int64_t pos; /* start of buffer when writing, end of buffer
+                    when reading */
+    int buf_index;
+    int buf_size; /* 0 when writing */
+    uint8_t buf[IO_BUF_SIZE];
+
+    struct iovec iov[MAX_IOV_SIZE];
+    unsigned int iovcnt;
+
+    int last_error;
+};
+
 /*
  * Stop a file from being read/written - not all backing files can do this
  * typically only sockets can.
@@ -80,6 +102,12 @@ QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops)
     return f;
 }
 
+
+void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks)
+{
+    f->hooks = hooks;
+}
+
 /*
  * Get last error for stream f
  *
@@ -101,48 +129,49 @@ void qemu_file_set_error(QEMUFile *f, int ret)
 
 bool qemu_file_is_writable(QEMUFile *f)
 {
-    return f->ops->writev_buffer || f->ops->put_buffer;
+    return f->ops->writev_buffer;
 }
 
 /**
  * Flushes QEMUFile buffer
  *
  * If there is writev_buffer QEMUFileOps it uses it otherwise uses
- * put_buffer ops.
+ * put_buffer ops. This will flush all pending data. If data was
+ * only partially flushed, it will set an error state.
  */
 void qemu_fflush(QEMUFile *f)
 {
     ssize_t ret = 0;
+    ssize_t expect = 0;
 
     if (!qemu_file_is_writable(f)) {
         return;
     }
 
-    if (f->ops->writev_buffer) {
-        if (f->iovcnt > 0) {
-            ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos);
-        }
-    } else {
-        if (f->buf_index > 0) {
-            ret = f->ops->put_buffer(f->opaque, f->buf, f->pos, f->buf_index);
-        }
+    if (f->iovcnt > 0) {
+        expect = iov_size(f->iov, f->iovcnt);
+        ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos);
     }
+
     if (ret >= 0) {
         f->pos += ret;
     }
+    /* We expect the QEMUFile write impl to send the full
+     * data set we requested, so sanity check that.
+     */
+    if (ret != expect) {
+        qemu_file_set_error(f, ret < 0 ? ret : -EIO);
+    }
     f->buf_index = 0;
     f->iovcnt = 0;
-    if (ret < 0) {
-        qemu_file_set_error(f, ret);
-    }
 }
 
 void ram_control_before_iterate(QEMUFile *f, uint64_t flags)
 {
     int ret = 0;
 
-    if (f->ops->before_ram_iterate) {
-        ret = f->ops->before_ram_iterate(f, f->opaque, flags, NULL);
+    if (f->hooks && f->hooks->before_ram_iterate) {
+        ret = f->hooks->before_ram_iterate(f, f->opaque, flags, NULL);
         if (ret < 0) {
             qemu_file_set_error(f, ret);
         }
@@ -153,8 +182,8 @@ void ram_control_after_iterate(QEMUFile *f, uint64_t flags)
 {
     int ret = 0;
 
-    if (f->ops->after_ram_iterate) {
-        ret = f->ops->after_ram_iterate(f, f->opaque, flags, NULL);
+    if (f->hooks && f->hooks->after_ram_iterate) {
+        ret = f->hooks->after_ram_iterate(f, f->opaque, flags, NULL);
         if (ret < 0) {
             qemu_file_set_error(f, ret);
         }
@@ -165,8 +194,8 @@ void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data)
 {
     int ret = -EINVAL;
 
-    if (f->ops->hook_ram_load) {
-        ret = f->ops->hook_ram_load(f, f->opaque, flags, data);
+    if (f->hooks && f->hooks->hook_ram_load) {
+        ret = f->hooks->hook_ram_load(f, f->opaque, flags, data);
         if (ret < 0) {
             qemu_file_set_error(f, ret);
         }
@@ -185,9 +214,9 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
                              ram_addr_t offset, size_t size,
                              uint64_t *bytes_sent)
 {
-    if (f->ops->save_page) {
-        int ret = f->ops->save_page(f, f->opaque, block_offset,
-                                    offset, size, bytes_sent);
+    if (f->hooks && f->hooks->save_page) {
+        int ret = f->hooks->save_page(f, f->opaque, block_offset,
+                                      offset, size, bytes_sent);
 
         if (ret != RAM_SAVE_CONTROL_DELAYED) {
             if (bytes_sent && *bytes_sent > 0) {
@@ -239,14 +268,6 @@ static ssize_t qemu_fill_buffer(QEMUFile *f)
     return len;
 }
 
-int qemu_get_fd(QEMUFile *f)
-{
-    if (f->ops->get_fd) {
-        return f->ops->get_fd(f->opaque);
-    }
-    return -1;
-}
-
 void qemu_update_position(QEMUFile *f, size_t size)
 {
     f->pos += size;
@@ -301,11 +322,6 @@ static void add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size)
 
 void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size)
 {
-    if (!f->ops->writev_buffer) {
-        qemu_put_buffer(f, buf, size);
-        return;
-    }
-
     if (f->last_error) {
         return;
     }
@@ -329,9 +345,7 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size)
         }
         memcpy(f->buf + f->buf_index, buf, l);
         f->bytes_xfer += l;
-        if (f->ops->writev_buffer) {
-            add_to_iovec(f, f->buf + f->buf_index, l);
-        }
+        add_to_iovec(f, f->buf + f->buf_index, l);
         f->buf_index += l;
         if (f->buf_index == IO_BUF_SIZE) {
             qemu_fflush(f);
@@ -352,9 +366,7 @@ void qemu_put_byte(QEMUFile *f, int v)
 
     f->buf[f->buf_index] = v;
     f->bytes_xfer++;
-    if (f->ops->writev_buffer) {
-        add_to_iovec(f, f->buf + f->buf_index, 1);
-    }
+    add_to_iovec(f, f->buf + f->buf_index, 1);
     f->buf_index++;
     if (f->buf_index == IO_BUF_SIZE) {
         qemu_fflush(f);
@@ -518,12 +530,8 @@ int64_t qemu_ftell_fast(QEMUFile *f)
     int64_t ret = f->pos;
     int i;
 
-    if (f->ops->writev_buffer) {
-        for (i = 0; i < f->iovcnt; i++) {
-            ret += f->iov[i].iov_len;
-        }
-    } else {
-        ret += f->buf_index;
+    for (i = 0; i < f->iovcnt; i++) {
+        ret += f->iov[i].iov_len;
     }
 
     return ret;
@@ -670,9 +678,7 @@ size_t qemu_get_counted_string(QEMUFile *f, char buf[256])
  */
 void qemu_file_set_blocking(QEMUFile *f, bool block)
 {
-    if (block) {
-        qemu_set_block(qemu_get_fd(f));
-    } else {
-        qemu_set_nonblock(qemu_get_fd(f));
+    if (f->ops->set_blocking) {
+        f->ops->set_blocking(f->opaque, block);
     }
 }
diff --git a/migration/ram.c b/migration/ram.c
index 54e215128c..844ea4694f 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -429,10 +429,8 @@ static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
 static void mig_throttle_guest_down(void)
 {
     MigrationState *s = migrate_get_current();
-    uint64_t pct_initial =
-            s->parameters[MIGRATION_PARAMETER_CPU_THROTTLE_INITIAL];
-    uint64_t pct_icrement =
-            s->parameters[MIGRATION_PARAMETER_CPU_THROTTLE_INCREMENT];
+    uint64_t pct_initial = s->parameters.cpu_throttle_initial;
+    uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
 
     /* We have not started throttling yet. Let's start it. */
     if (!cpu_throttle_active()) {
diff --git a/migration/rdma.c b/migration/rdma.c
index f6a9992b3e..51bafc702b 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -2,10 +2,12 @@
  * RDMA protocol and interfaces
  *
  * Copyright IBM, Corp. 2010-2013
+ * Copyright Red Hat, Inc. 2015-2016
  *
  * Authors:
  *  Michael R. Hines <mrhines@us.ibm.com>
  *  Jiuxing Liu <jl@us.ibm.com>
+ *  Daniel P. Berrange <berrange@redhat.com>
  *
  * This work is licensed under the terms of the GNU GPL, version 2 or
  * later.  See the COPYING file in the top-level directory.
@@ -374,14 +376,20 @@ typedef struct RDMAContext {
     GHashTable *blockmap;
 } RDMAContext;
 
-/*
- * Interface to the rest of the migration call stack.
- */
-typedef struct QEMUFileRDMA {
+#define TYPE_QIO_CHANNEL_RDMA "qio-channel-rdma"
+#define QIO_CHANNEL_RDMA(obj)                                     \
+    OBJECT_CHECK(QIOChannelRDMA, (obj), TYPE_QIO_CHANNEL_RDMA)
+
+typedef struct QIOChannelRDMA QIOChannelRDMA;
+
+
+struct QIOChannelRDMA {
+    QIOChannel parent;
     RDMAContext *rdma;
+    QEMUFile *file;
     size_t len;
-    void *file;
-} QEMUFileRDMA;
+    bool blocking; /* XXX we don't actually honour this yet */
+};
 
 /*
  * Main structure for IB Send/Recv control messages.
@@ -2518,15 +2526,19 @@ static void *qemu_rdma_data_init(const char *host_port, Error **errp)
  * SEND messages for control only.
  * VM's ram is handled with regular RDMA messages.
  */
-static ssize_t qemu_rdma_put_buffer(void *opaque, const uint8_t *buf,
-                                    int64_t pos, size_t size)
-{
-    QEMUFileRDMA *r = opaque;
-    QEMUFile *f = r->file;
-    RDMAContext *rdma = r->rdma;
-    size_t remaining = size;
-    uint8_t * data = (void *) buf;
+static ssize_t qio_channel_rdma_writev(QIOChannel *ioc,
+                                       const struct iovec *iov,
+                                       size_t niov,
+                                       int *fds,
+                                       size_t nfds,
+                                       Error **errp)
+{
+    QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
+    QEMUFile *f = rioc->file;
+    RDMAContext *rdma = rioc->rdma;
     int ret;
+    ssize_t done = 0;
+    size_t i;
 
     CHECK_ERROR_STATE();
 
@@ -2540,27 +2552,31 @@ static ssize_t qemu_rdma_put_buffer(void *opaque, const uint8_t *buf,
         return ret;
     }
 
-    while (remaining) {
-        RDMAControlHeader head;
+    for (i = 0; i < niov; i++) {
+        size_t remaining = iov[i].iov_len;
+        uint8_t * data = (void *)iov[i].iov_base;
+        while (remaining) {
+            RDMAControlHeader head;
 
-        r->len = MIN(remaining, RDMA_SEND_INCREMENT);
-        remaining -= r->len;
+            rioc->len = MIN(remaining, RDMA_SEND_INCREMENT);
+            remaining -= rioc->len;
 
-        /* Guaranteed to fit due to RDMA_SEND_INCREMENT MIN above */
-        head.len = (uint32_t)r->len;
-        head.type = RDMA_CONTROL_QEMU_FILE;
+            head.len = rioc->len;
+            head.type = RDMA_CONTROL_QEMU_FILE;
 
-        ret = qemu_rdma_exchange_send(rdma, &head, data, NULL, NULL, NULL);
+            ret = qemu_rdma_exchange_send(rdma, &head, data, NULL, NULL, NULL);
 
-        if (ret < 0) {
-            rdma->error_state = ret;
-            return ret;
-        }
+            if (ret < 0) {
+                rdma->error_state = ret;
+                return ret;
+            }
 
-        data += r->len;
+            data += rioc->len;
+            done += rioc->len;
+        }
     }
 
-    return size;
+    return done;
 }
 
 static size_t qemu_rdma_fill(RDMAContext *rdma, uint8_t *buf,
@@ -2585,41 +2601,74 @@ static size_t qemu_rdma_fill(RDMAContext *rdma, uint8_t *buf,
  * RDMA links don't use bytestreams, so we have to
  * return bytes to QEMUFile opportunistically.
  */
-static ssize_t qemu_rdma_get_buffer(void *opaque, uint8_t *buf,
-                                    int64_t pos, size_t size)
-{
-    QEMUFileRDMA *r = opaque;
-    RDMAContext *rdma = r->rdma;
+static ssize_t qio_channel_rdma_readv(QIOChannel *ioc,
+                                      const struct iovec *iov,
+                                      size_t niov,
+                                      int **fds,
+                                      size_t *nfds,
+                                      Error **errp)
+{
+    QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
+    RDMAContext *rdma = rioc->rdma;
     RDMAControlHeader head;
     int ret = 0;
+    ssize_t i;
+    size_t done = 0;
 
     CHECK_ERROR_STATE();
 
-    /*
-     * First, we hold on to the last SEND message we
-     * were given and dish out the bytes until we run
-     * out of bytes.
-     */
-    r->len = qemu_rdma_fill(r->rdma, buf, size, 0);
-    if (r->len) {
-        return r->len;
-    }
+    for (i = 0; i < niov; i++) {
+        size_t want = iov[i].iov_len;
+        uint8_t *data = (void *)iov[i].iov_base;
 
-    /*
-     * Once we run out, we block and wait for another
-     * SEND message to arrive.
-     */
-    ret = qemu_rdma_exchange_recv(rdma, &head, RDMA_CONTROL_QEMU_FILE);
+        /*
+         * First, we hold on to the last SEND message we
+         * were given and dish out the bytes until we run
+         * out of bytes.
+         */
+        ret = qemu_rdma_fill(rioc->rdma, data, want, 0);
+        done += ret;
+        want -= ret;
+        /* Got what we needed, so go to next iovec */
+        if (want == 0) {
+            continue;
+        }
 
-    if (ret < 0) {
-        rdma->error_state = ret;
-        return ret;
-    }
+        /* If we got any data so far, then don't wait
+         * for more, just return what we have */
+        if (done > 0) {
+            break;
+        }
 
-    /*
-     * SEND was received with new bytes, now try again.
-     */
-    return qemu_rdma_fill(r->rdma, buf, size, 0);
+
+        /* We've got nothing at all, so lets wait for
+         * more to arrive
+         */
+        ret = qemu_rdma_exchange_recv(rdma, &head, RDMA_CONTROL_QEMU_FILE);
+
+        if (ret < 0) {
+            rdma->error_state = ret;
+            return ret;
+        }
+
+        /*
+         * SEND was received with new bytes, now try again.
+         */
+        ret = qemu_rdma_fill(rioc->rdma, data, want, 0);
+        done += ret;
+        want -= ret;
+
+        /* Still didn't get enough, so lets just return */
+        if (want) {
+            if (done == 0) {
+                return QIO_CHANNEL_ERR_BLOCK;
+            } else {
+                break;
+            }
+        }
+    }
+    rioc->len = done;
+    return rioc->len;
 }
 
 /*
@@ -2646,15 +2695,122 @@ static int qemu_rdma_drain_cq(QEMUFile *f, RDMAContext *rdma)
     return 0;
 }
 
-static int qemu_rdma_close(void *opaque)
+
+static int qio_channel_rdma_set_blocking(QIOChannel *ioc,
+                                         bool blocking,
+                                         Error **errp)
+{
+    QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
+    /* XXX we should make readv/writev actually honour this :-) */
+    rioc->blocking = blocking;
+    return 0;
+}
+
+
+typedef struct QIOChannelRDMASource QIOChannelRDMASource;
+struct QIOChannelRDMASource {
+    GSource parent;
+    QIOChannelRDMA *rioc;
+    GIOCondition condition;
+};
+
+static gboolean
+qio_channel_rdma_source_prepare(GSource *source,
+                                gint *timeout)
+{
+    QIOChannelRDMASource *rsource = (QIOChannelRDMASource *)source;
+    RDMAContext *rdma = rsource->rioc->rdma;
+    GIOCondition cond = 0;
+    *timeout = -1;
+
+    if (rdma->wr_data[0].control_len) {
+        cond |= G_IO_IN;
+    }
+    cond |= G_IO_OUT;
+
+    return cond & rsource->condition;
+}
+
+static gboolean
+qio_channel_rdma_source_check(GSource *source)
+{
+    QIOChannelRDMASource *rsource = (QIOChannelRDMASource *)source;
+    RDMAContext *rdma = rsource->rioc->rdma;
+    GIOCondition cond = 0;
+
+    if (rdma->wr_data[0].control_len) {
+        cond |= G_IO_IN;
+    }
+    cond |= G_IO_OUT;
+
+    return cond & rsource->condition;
+}
+
+static gboolean
+qio_channel_rdma_source_dispatch(GSource *source,
+                                 GSourceFunc callback,
+                                 gpointer user_data)
+{
+    QIOChannelFunc func = (QIOChannelFunc)callback;
+    QIOChannelRDMASource *rsource = (QIOChannelRDMASource *)source;
+    RDMAContext *rdma = rsource->rioc->rdma;
+    GIOCondition cond = 0;
+
+    if (rdma->wr_data[0].control_len) {
+        cond |= G_IO_IN;
+    }
+    cond |= G_IO_OUT;
+
+    return (*func)(QIO_CHANNEL(rsource->rioc),
+                   (cond & rsource->condition),
+                   user_data);
+}
+
+static void
+qio_channel_rdma_source_finalize(GSource *source)
+{
+    QIOChannelRDMASource *ssource = (QIOChannelRDMASource *)source;
+
+    object_unref(OBJECT(ssource->rioc));
+}
+
+GSourceFuncs qio_channel_rdma_source_funcs = {
+    qio_channel_rdma_source_prepare,
+    qio_channel_rdma_source_check,
+    qio_channel_rdma_source_dispatch,
+    qio_channel_rdma_source_finalize
+};
+
+static GSource *qio_channel_rdma_create_watch(QIOChannel *ioc,
+                                              GIOCondition condition)
+{
+    QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
+    QIOChannelRDMASource *ssource;
+    GSource *source;
+
+    source = g_source_new(&qio_channel_rdma_source_funcs,
+                          sizeof(QIOChannelRDMASource));
+    ssource = (QIOChannelRDMASource *)source;
+
+    ssource->rioc = rioc;
+    object_ref(OBJECT(rioc));
+
+    ssource->condition = condition;
+
+    return source;
+}
+
+
+static int qio_channel_rdma_close(QIOChannel *ioc,
+                                  Error **errp)
 {
+    QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
     trace_qemu_rdma_close();
-    QEMUFileRDMA *r = opaque;
-    if (r->rdma) {
-        qemu_rdma_cleanup(r->rdma);
-        g_free(r->rdma);
+    if (rioc->rdma) {
+        qemu_rdma_cleanup(rioc->rdma);
+        g_free(rioc->rdma);
+        rioc->rdma = NULL;
     }
-    g_free(r);
     return 0;
 }
 
@@ -2696,8 +2852,8 @@ static size_t qemu_rdma_save_page(QEMUFile *f, void *opaque,
                                   ram_addr_t block_offset, ram_addr_t offset,
                                   size_t size, uint64_t *bytes_sent)
 {
-    QEMUFileRDMA *rfile = opaque;
-    RDMAContext *rdma = rfile->rdma;
+    QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(opaque);
+    RDMAContext *rdma = rioc->rdma;
     int ret;
 
     CHECK_ERROR_STATE();
@@ -2951,8 +3107,8 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque)
                              };
     RDMAControlHeader blocks = { .type = RDMA_CONTROL_RAM_BLOCKS_RESULT,
                                  .repeat = 1 };
-    QEMUFileRDMA *rfile = opaque;
-    RDMAContext *rdma = rfile->rdma;
+    QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(opaque);
+    RDMAContext *rdma = rioc->rdma;
     RDMALocalBlocks *local = &rdma->local_ram_blocks;
     RDMAControlHeader head;
     RDMARegister *reg, *registers;
@@ -3207,9 +3363,10 @@ out:
  * We've already built our local RAMBlock list, but not yet sent the list to
  * the source.
  */
-static int rdma_block_notification_handle(QEMUFileRDMA *rfile, const char *name)
+static int
+rdma_block_notification_handle(QIOChannelRDMA *rioc, const char *name)
 {
-    RDMAContext *rdma = rfile->rdma;
+    RDMAContext *rdma = rioc->rdma;
     int curr;
     int found = -1;
 
@@ -3251,8 +3408,8 @@ static int rdma_load_hook(QEMUFile *f, void *opaque, uint64_t flags, void *data)
 static int qemu_rdma_registration_start(QEMUFile *f, void *opaque,
                                         uint64_t flags, void *data)
 {
-    QEMUFileRDMA *rfile = opaque;
-    RDMAContext *rdma = rfile->rdma;
+    QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(opaque);
+    RDMAContext *rdma = rioc->rdma;
 
     CHECK_ERROR_STATE();
 
@@ -3271,8 +3428,8 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
                                        uint64_t flags, void *data)
 {
     Error *local_err = NULL, **errp = &local_err;
-    QEMUFileRDMA *rfile = opaque;
-    RDMAContext *rdma = rfile->rdma;
+    QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(opaque);
+    RDMAContext *rdma = rioc->rdma;
     RDMAControlHeader head = { .len = 0, .repeat = 1 };
     int ret = 0;
 
@@ -3368,47 +3525,74 @@ err:
     return ret;
 }
 
-static int qemu_rdma_get_fd(void *opaque)
-{
-    QEMUFileRDMA *rfile = opaque;
-    RDMAContext *rdma = rfile->rdma;
-
-    return rdma->comp_channel->fd;
-}
-
-static const QEMUFileOps rdma_read_ops = {
-    .get_buffer    = qemu_rdma_get_buffer,
-    .get_fd        = qemu_rdma_get_fd,
-    .close         = qemu_rdma_close,
+static const QEMUFileHooks rdma_read_hooks = {
     .hook_ram_load = rdma_load_hook,
 };
 
-static const QEMUFileOps rdma_write_ops = {
-    .put_buffer         = qemu_rdma_put_buffer,
-    .close              = qemu_rdma_close,
+static const QEMUFileHooks rdma_write_hooks = {
     .before_ram_iterate = qemu_rdma_registration_start,
     .after_ram_iterate  = qemu_rdma_registration_stop,
     .save_page          = qemu_rdma_save_page,
 };
 
-static void *qemu_fopen_rdma(RDMAContext *rdma, const char *mode)
+
+static void qio_channel_rdma_finalize(Object *obj)
 {
-    QEMUFileRDMA *r;
+    QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(obj);
+    if (rioc->rdma) {
+        qemu_rdma_cleanup(rioc->rdma);
+        g_free(rioc->rdma);
+        rioc->rdma = NULL;
+    }
+}
+
+static void qio_channel_rdma_class_init(ObjectClass *klass,
+                                        void *class_data G_GNUC_UNUSED)
+{
+    QIOChannelClass *ioc_klass = QIO_CHANNEL_CLASS(klass);
+
+    ioc_klass->io_writev = qio_channel_rdma_writev;
+    ioc_klass->io_readv = qio_channel_rdma_readv;
+    ioc_klass->io_set_blocking = qio_channel_rdma_set_blocking;
+    ioc_klass->io_close = qio_channel_rdma_close;
+    ioc_klass->io_create_watch = qio_channel_rdma_create_watch;
+}
+
+static const TypeInfo qio_channel_rdma_info = {
+    .parent = TYPE_QIO_CHANNEL,
+    .name = TYPE_QIO_CHANNEL_RDMA,
+    .instance_size = sizeof(QIOChannelRDMA),
+    .instance_finalize = qio_channel_rdma_finalize,
+    .class_init = qio_channel_rdma_class_init,
+};
+
+static void qio_channel_rdma_register_types(void)
+{
+    type_register_static(&qio_channel_rdma_info);
+}
+
+type_init(qio_channel_rdma_register_types);
+
+static QEMUFile *qemu_fopen_rdma(RDMAContext *rdma, const char *mode)
+{
+    QIOChannelRDMA *rioc;
 
     if (qemu_file_mode_is_not_valid(mode)) {
         return NULL;
     }
 
-    r = g_new0(QEMUFileRDMA, 1);
-    r->rdma = rdma;
+    rioc = QIO_CHANNEL_RDMA(object_new(TYPE_QIO_CHANNEL_RDMA));
+    rioc->rdma = rdma;
 
     if (mode[0] == 'w') {
-        r->file = qemu_fopen_ops(r, &rdma_write_ops);
+        rioc->file = qemu_fopen_channel_output(QIO_CHANNEL(rioc));
+        qemu_file_set_hooks(rioc->file, &rdma_write_hooks);
     } else {
-        r->file = qemu_fopen_ops(r, &rdma_read_ops);
+        rioc->file = qemu_fopen_channel_input(QIO_CHANNEL(rioc));
+        qemu_file_set_hooks(rioc->file, &rdma_read_hooks);
     }
 
-    return r->file;
+    return rioc->file;
 }
 
 static void rdma_accept_incoming_migration(void *opaque)
@@ -3481,16 +3665,14 @@ void rdma_start_outgoing_migration(void *opaque,
                             const char *host_port, Error **errp)
 {
     MigrationState *s = opaque;
-    Error *local_err = NULL, **temp = &local_err;
-    RDMAContext *rdma = qemu_rdma_data_init(host_port, &local_err);
+    RDMAContext *rdma = qemu_rdma_data_init(host_port, errp);
     int ret = 0;
 
     if (rdma == NULL) {
-        ERROR(temp, "Failed to initialize RDMA data structures! %d", ret);
         goto err;
     }
 
-    ret = qemu_rdma_source_init(rdma, &local_err,
+    ret = qemu_rdma_source_init(rdma, errp,
         s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL]);
 
     if (ret) {
@@ -3498,7 +3680,7 @@ void rdma_start_outgoing_migration(void *opaque,
     }
 
     trace_rdma_start_outgoing_migration_after_rdma_source_init();
-    ret = qemu_rdma_connect(rdma, &local_err);
+    ret = qemu_rdma_connect(rdma, errp);
 
     if (ret) {
         goto err;
@@ -3510,7 +3692,5 @@ void rdma_start_outgoing_migration(void *opaque,
     migrate_fd_connect(s);
     return;
 err:
-    error_propagate(errp, local_err);
     g_free(rdma);
-    migrate_fd_error(s);
 }
diff --git a/migration/savevm.c b/migration/savevm.c
index 65ce0c61a3..6c21231131 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -51,6 +51,8 @@
 #include "block/snapshot.h"
 #include "block/qapi.h"
 #include "qemu/cutils.h"
+#include "io/channel-buffer.h"
+#include "io/channel-file.h"
 
 #ifndef ETH_P_RARP
 #define ETH_P_RARP 0x8035
@@ -158,13 +160,6 @@ static ssize_t block_writev_buffer(void *opaque, struct iovec *iov, int iovcnt,
     return qiov.size;
 }
 
-static ssize_t block_put_buffer(void *opaque, const uint8_t *buf,
-                                int64_t pos, size_t size)
-{
-    bdrv_save_vmstate(opaque, buf, pos, size);
-    return size;
-}
-
 static ssize_t block_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
                                 size_t size)
 {
@@ -182,7 +177,6 @@ static const QEMUFileOps bdrv_read_ops = {
 };
 
 static const QEMUFileOps bdrv_write_ops = {
-    .put_buffer     = block_put_buffer,
     .writev_buffer  = block_writev_buffer,
     .close          = bdrv_fclose
 };
@@ -760,10 +754,8 @@ void qemu_savevm_send_open_return_path(QEMUFile *f)
  *    0 on success
  *    -ve on error
  */
-int qemu_savevm_send_packaged(QEMUFile *f, const QEMUSizedBuffer *qsb)
+int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len)
 {
-    size_t cur_iov;
-    size_t len = qsb_get_length(qsb);
     uint32_t tmp;
 
     if (len > MAX_VM_CMD_PACKAGED_SIZE) {
@@ -777,18 +769,7 @@ int qemu_savevm_send_packaged(QEMUFile *f, const QEMUSizedBuffer *qsb)
     trace_qemu_savevm_send_packaged();
     qemu_savevm_command_send(f, MIG_CMD_PACKAGED, 4, (uint8_t *)&tmp);
 
-    /* all the data follows (concatinating the iov's) */
-    for (cur_iov = 0; cur_iov < qsb->n_iov; cur_iov++) {
-        /* The iov entries are partially filled */
-        size_t towrite = MIN(qsb->iov[cur_iov].iov_len, len);
-        len -= towrite;
-
-        if (!towrite) {
-            break;
-        }
-
-        qemu_put_buffer(f, qsb->iov[cur_iov].iov_base, towrite);
-    }
+    qemu_put_buffer(f, buf, len);
 
     return 0;
 }
@@ -1578,39 +1559,36 @@ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
 static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis)
 {
     int ret;
-    uint8_t *buffer;
-    uint32_t length;
-    QEMUSizedBuffer *qsb;
+    size_t length;
+    QIOChannelBuffer *bioc;
 
     length = qemu_get_be32(mis->from_src_file);
     trace_loadvm_handle_cmd_packaged(length);
 
     if (length > MAX_VM_CMD_PACKAGED_SIZE) {
-        error_report("Unreasonably large packaged state: %u", length);
+        error_report("Unreasonably large packaged state: %zu", length);
         return -1;
     }
-    buffer = g_malloc0(length);
-    ret = qemu_get_buffer(mis->from_src_file, buffer, (int)length);
+
+    bioc = qio_channel_buffer_new(length);
+    ret = qemu_get_buffer(mis->from_src_file,
+                          bioc->data,
+                          length);
     if (ret != length) {
-        g_free(buffer);
-        error_report("CMD_PACKAGED: Buffer receive fail ret=%d length=%d",
+        object_unref(OBJECT(bioc));
+        error_report("CMD_PACKAGED: Buffer receive fail ret=%d length=%zu",
                      ret, length);
         return (ret < 0) ? ret : -EAGAIN;
     }
+    bioc->usage += length;
     trace_loadvm_handle_cmd_packaged_received(ret);
 
-    /* Setup a dummy QEMUFile that actually reads from the buffer */
-    qsb = qsb_create(buffer, length);
-    g_free(buffer); /* Because qsb_create copies */
-    if (!qsb) {
-        error_report("Unable to create qsb");
-    }
-    QEMUFile *packf = qemu_bufopen("r", qsb);
+    QEMUFile *packf = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
 
     ret = qemu_loadvm_state_main(packf, mis);
     trace_loadvm_handle_cmd_packaged_main(ret);
     qemu_fclose(packf);
-    qsb_free(qsb);
+    object_unref(OBJECT(bioc));
 
     return ret;
 }
@@ -2061,6 +2039,7 @@ void hmp_savevm(Monitor *mon, const QDict *qdict)
 void qmp_xen_save_devices_state(const char *filename, Error **errp)
 {
     QEMUFile *f;
+    QIOChannelFile *ioc;
     int saved_vm_running;
     int ret;
 
@@ -2068,11 +2047,11 @@ void qmp_xen_save_devices_state(const char *filename, Error **errp)
     vm_stop(RUN_STATE_SAVE_VM);
     global_state_store_running();
 
-    f = qemu_fopen(filename, "wb");
-    if (!f) {
-        error_setg_file_open(errp, errno, filename);
+    ioc = qio_channel_file_new_path(filename, O_WRONLY | O_CREAT, 0660, errp);
+    if (!ioc) {
         goto the_end;
     }
+    f = qemu_fopen_channel_output(QIO_CHANNEL(ioc));
     ret = qemu_save_device_state(f);
     qemu_fclose(f);
     if (ret < 0) {
diff --git a/migration/socket.c b/migration/socket.c
new file mode 100644
index 0000000000..977a8d3c1d
--- /dev/null
+++ b/migration/socket.c
@@ -0,0 +1,183 @@
+/*
+ * QEMU live migration via Unix Domain Sockets
+ *
+ * Copyright Red Hat, Inc. 2009-2016
+ *
+ * Authors:
+ *  Chris Lalancette <clalance@redhat.com>
+ *  Daniel P. Berrange <berrange@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+
+#include "qemu-common.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "migration/migration.h"
+#include "migration/qemu-file.h"
+#include "io/channel-socket.h"
+#include "trace.h"
+
+
+static SocketAddress *tcp_build_address(const char *host_port, Error **errp)
+{
+    InetSocketAddress *iaddr = inet_parse(host_port, errp);
+    SocketAddress *saddr;
+
+    if (!iaddr) {
+        return NULL;
+    }
+
+    saddr = g_new0(SocketAddress, 1);
+    saddr->type = SOCKET_ADDRESS_KIND_INET;
+    saddr->u.inet.data = iaddr;
+
+    return saddr;
+}
+
+
+static SocketAddress *unix_build_address(const char *path)
+{
+    SocketAddress *saddr;
+
+    saddr = g_new0(SocketAddress, 1);
+    saddr->type = SOCKET_ADDRESS_KIND_UNIX;
+    saddr->u.q_unix.data = g_new0(UnixSocketAddress, 1);
+    saddr->u.q_unix.data->path = g_strdup(path);
+
+    return saddr;
+}
+
+
+struct SocketConnectData {
+    MigrationState *s;
+    char *hostname;
+};
+
+static void socket_connect_data_free(void *opaque)
+{
+    struct SocketConnectData *data = opaque;
+    if (!data) {
+        return;
+    }
+    g_free(data->hostname);
+    g_free(data);
+}
+
+static void socket_outgoing_migration(Object *src,
+                                      Error *err,
+                                      gpointer opaque)
+{
+    struct SocketConnectData *data = opaque;
+    QIOChannel *sioc = QIO_CHANNEL(src);
+
+    if (err) {
+        trace_migration_socket_outgoing_error(error_get_pretty(err));
+        data->s->to_dst_file = NULL;
+        migrate_fd_error(data->s, err);
+    } else {
+        trace_migration_socket_outgoing_connected(data->hostname);
+        migration_set_outgoing_channel(data->s, sioc, data->hostname);
+    }
+    object_unref(src);
+}
+
+static void socket_start_outgoing_migration(MigrationState *s,
+                                            SocketAddress *saddr,
+                                            Error **errp)
+{
+    QIOChannelSocket *sioc = qio_channel_socket_new();
+    struct SocketConnectData *data = g_new0(struct SocketConnectData, 1);
+    data->s = s;
+    if (saddr->type == SOCKET_ADDRESS_KIND_INET) {
+        data->hostname = g_strdup(saddr->u.inet.data->host);
+    }
+    qio_channel_socket_connect_async(sioc,
+                                     saddr,
+                                     socket_outgoing_migration,
+                                     data,
+                                     socket_connect_data_free);
+    qapi_free_SocketAddress(saddr);
+}
+
+void tcp_start_outgoing_migration(MigrationState *s,
+                                  const char *host_port,
+                                  Error **errp)
+{
+    SocketAddress *saddr = tcp_build_address(host_port, errp);
+    socket_start_outgoing_migration(s, saddr, errp);
+}
+
+void unix_start_outgoing_migration(MigrationState *s,
+                                   const char *path,
+                                   Error **errp)
+{
+    SocketAddress *saddr = unix_build_address(path);
+    socket_start_outgoing_migration(s, saddr, errp);
+}
+
+
+static gboolean socket_accept_incoming_migration(QIOChannel *ioc,
+                                                 GIOCondition condition,
+                                                 gpointer opaque)
+{
+    QIOChannelSocket *sioc;
+    Error *err = NULL;
+
+    sioc = qio_channel_socket_accept(QIO_CHANNEL_SOCKET(ioc),
+                                     &err);
+    if (!sioc) {
+        error_report("could not accept migration connection (%s)",
+                     error_get_pretty(err));
+        goto out;
+    }
+
+    trace_migration_socket_incoming_accepted();
+
+    migration_set_incoming_channel(migrate_get_current(),
+                                   QIO_CHANNEL(sioc));
+    object_unref(OBJECT(sioc));
+
+out:
+    /* Close listening socket as its no longer needed */
+    qio_channel_close(ioc, NULL);
+    return FALSE; /* unregister */
+}
+
+
+static void socket_start_incoming_migration(SocketAddress *saddr,
+                                            Error **errp)
+{
+    QIOChannelSocket *listen_ioc = qio_channel_socket_new();
+
+    if (qio_channel_socket_listen_sync(listen_ioc, saddr, errp) < 0) {
+        object_unref(OBJECT(listen_ioc));
+        qapi_free_SocketAddress(saddr);
+        return;
+    }
+
+    qio_channel_add_watch(QIO_CHANNEL(listen_ioc),
+                          G_IO_IN,
+                          socket_accept_incoming_migration,
+                          listen_ioc,
+                          (GDestroyNotify)object_unref);
+    qapi_free_SocketAddress(saddr);
+}
+
+void tcp_start_incoming_migration(const char *host_port, Error **errp)
+{
+    SocketAddress *saddr = tcp_build_address(host_port, errp);
+    socket_start_incoming_migration(saddr, errp);
+}
+
+void unix_start_incoming_migration(const char *path, Error **errp)
+{
+    SocketAddress *saddr = unix_build_address(path);
+    socket_start_incoming_migration(saddr, errp);
+}
diff --git a/migration/tcp.c b/migration/tcp.c
deleted file mode 100644
index e1fa7f8f18..0000000000
--- a/migration/tcp.c
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * QEMU live migration
- *
- * Copyright IBM, Corp. 2008
- *
- * Authors:
- *  Anthony Liguori   <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2.  See
- * the COPYING file in the top-level directory.
- *
- * Contributions after 2012-01-13 are licensed under the terms of the
- * GNU GPL, version 2 or (at your option) any later version.
- */
-
-#include "qemu/osdep.h"
-
-#include "qemu-common.h"
-#include "qemu/error-report.h"
-#include "qemu/sockets.h"
-#include "migration/migration.h"
-#include "migration/qemu-file.h"
-#include "block/block.h"
-#include "qemu/main-loop.h"
-
-//#define DEBUG_MIGRATION_TCP
-
-#ifdef DEBUG_MIGRATION_TCP
-#define DPRINTF(fmt, ...) \
-    do { printf("migration-tcp: " fmt, ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTF(fmt, ...) \
-    do { } while (0)
-#endif
-
-static void tcp_wait_for_connect(int fd, Error *err, void *opaque)
-{
-    MigrationState *s = opaque;
-
-    if (fd < 0) {
-        DPRINTF("migrate connect error: %s\n", error_get_pretty(err));
-        s->to_dst_file = NULL;
-        migrate_fd_error(s);
-    } else {
-        DPRINTF("migrate connect success\n");
-        s->to_dst_file = qemu_fopen_socket(fd, "wb");
-        migrate_fd_connect(s);
-    }
-}
-
-void tcp_start_outgoing_migration(MigrationState *s, const char *host_port, Error **errp)
-{
-    inet_nonblocking_connect(host_port, tcp_wait_for_connect, s, errp);
-}
-
-static void tcp_accept_incoming_migration(void *opaque)
-{
-    struct sockaddr_in addr;
-    socklen_t addrlen = sizeof(addr);
-    int s = (intptr_t)opaque;
-    QEMUFile *f;
-    int c;
-
-    do {
-        c = qemu_accept(s, (struct sockaddr *)&addr, &addrlen);
-    } while (c < 0 && errno == EINTR);
-    qemu_set_fd_handler(s, NULL, NULL, NULL);
-    closesocket(s);
-
-    DPRINTF("accepted migration\n");
-
-    if (c < 0) {
-        error_report("could not accept migration connection (%s)",
-                     strerror(errno));
-        return;
-    }
-
-    f = qemu_fopen_socket(c, "rb");
-    if (f == NULL) {
-        error_report("could not qemu_fopen socket");
-        goto out;
-    }
-
-    process_incoming_migration(f);
-    return;
-
-out:
-    closesocket(c);
-}
-
-void tcp_start_incoming_migration(const char *host_port, Error **errp)
-{
-    int s;
-
-    s = inet_listen(host_port, NULL, 256, SOCK_STREAM, 0, errp);
-    if (s < 0) {
-        return;
-    }
-
-    qemu_set_fd_handler(s, tcp_accept_incoming_migration, NULL,
-                        (void *)(intptr_t)s);
-}
diff --git a/migration/tls.c b/migration/tls.c
new file mode 100644
index 0000000000..75f959ff9c
--- /dev/null
+++ b/migration/tls.c
@@ -0,0 +1,161 @@
+/*
+ * QEMU migration TLS support
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "migration/migration.h"
+#include "io/channel-tls.h"
+#include "crypto/tlscreds.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "trace.h"
+
+static QCryptoTLSCreds *
+migration_tls_get_creds(MigrationState *s,
+                        QCryptoTLSCredsEndpoint endpoint,
+                        Error **errp)
+{
+    Object *creds;
+    QCryptoTLSCreds *ret;
+
+    creds = object_resolve_path_component(
+        object_get_objects_root(), s->parameters.tls_creds);
+    if (!creds) {
+        error_setg(errp, "No TLS credentials with id '%s'",
+                   s->parameters.tls_creds);
+        return NULL;
+    }
+    ret = (QCryptoTLSCreds *)object_dynamic_cast(
+        creds, TYPE_QCRYPTO_TLS_CREDS);
+    if (!ret) {
+        error_setg(errp, "Object with id '%s' is not TLS credentials",
+                   s->parameters.tls_creds);
+        return NULL;
+    }
+    if (ret->endpoint != endpoint) {
+        error_setg(errp,
+                   "Expected TLS credentials for a %s endpoint",
+                   endpoint == QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT ?
+                   "client" : "server");
+        return NULL;
+    }
+
+    object_ref(OBJECT(ret));
+    return ret;
+}
+
+
+static void migration_tls_incoming_handshake(Object *src,
+                                             Error *err,
+                                             gpointer opaque)
+{
+    QIOChannel *ioc = QIO_CHANNEL(src);
+
+    if (err) {
+        trace_migration_tls_incoming_handshake_error(error_get_pretty(err));
+        error_report("%s", error_get_pretty(err));
+    } else {
+        trace_migration_tls_incoming_handshake_complete();
+        migration_set_incoming_channel(migrate_get_current(), ioc);
+    }
+    object_unref(OBJECT(ioc));
+}
+
+void migration_tls_set_incoming_channel(MigrationState *s,
+                                        QIOChannel *ioc,
+                                        Error **errp)
+{
+    QCryptoTLSCreds *creds;
+    QIOChannelTLS *tioc;
+
+    creds = migration_tls_get_creds(
+        s, QCRYPTO_TLS_CREDS_ENDPOINT_SERVER, errp);
+    if (!creds) {
+        return;
+    }
+
+    tioc = qio_channel_tls_new_server(
+        ioc, creds,
+        NULL, /* XXX pass ACL name */
+        errp);
+    if (!tioc) {
+        return;
+    }
+
+    trace_migration_tls_incoming_handshake_start();
+    qio_channel_tls_handshake(tioc,
+                              migration_tls_incoming_handshake,
+                              NULL,
+                              NULL);
+}
+
+
+static void migration_tls_outgoing_handshake(Object *src,
+                                             Error *err,
+                                             gpointer opaque)
+{
+    MigrationState *s = opaque;
+    QIOChannel *ioc = QIO_CHANNEL(src);
+
+    if (err) {
+        trace_migration_tls_outgoing_handshake_error(error_get_pretty(err));
+        s->to_dst_file = NULL;
+        migrate_fd_error(s, err);
+    } else {
+        trace_migration_tls_outgoing_handshake_complete();
+        migration_set_outgoing_channel(s, ioc, NULL);
+    }
+    object_unref(OBJECT(ioc));
+}
+
+
+void migration_tls_set_outgoing_channel(MigrationState *s,
+                                        QIOChannel *ioc,
+                                        const char *hostname,
+                                        Error **errp)
+{
+    QCryptoTLSCreds *creds;
+    QIOChannelTLS *tioc;
+
+    creds = migration_tls_get_creds(
+        s, QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT, errp);
+    if (!creds) {
+        return;
+    }
+
+    if (s->parameters.tls_hostname) {
+        hostname = s->parameters.tls_hostname;
+    }
+    if (!hostname) {
+        error_setg(errp, "No hostname available for TLS");
+        return;
+    }
+
+    tioc = qio_channel_tls_new_client(
+        ioc, creds, hostname, errp);
+    if (!tioc) {
+        return;
+    }
+
+    trace_migration_tls_outgoing_handshake_start(hostname);
+    qio_channel_tls_handshake(tioc,
+                              migration_tls_outgoing_handshake,
+                              s,
+                              NULL);
+}
diff --git a/migration/unix.c b/migration/unix.c
deleted file mode 100644
index d9aac36b9a..0000000000
--- a/migration/unix.c
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * QEMU live migration via Unix Domain Sockets
- *
- * Copyright Red Hat, Inc. 2009
- *
- * Authors:
- *  Chris Lalancette <clalance@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2.  See
- * the COPYING file in the top-level directory.
- *
- * Contributions after 2012-01-13 are licensed under the terms of the
- * GNU GPL, version 2 or (at your option) any later version.
- */
-
-#include "qemu/osdep.h"
-
-#include "qemu-common.h"
-#include "qemu/error-report.h"
-#include "qemu/sockets.h"
-#include "qemu/main-loop.h"
-#include "migration/migration.h"
-#include "migration/qemu-file.h"
-#include "block/block.h"
-
-//#define DEBUG_MIGRATION_UNIX
-
-#ifdef DEBUG_MIGRATION_UNIX
-#define DPRINTF(fmt, ...) \
-    do { printf("migration-unix: " fmt, ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTF(fmt, ...) \
-    do { } while (0)
-#endif
-
-static void unix_wait_for_connect(int fd, Error *err, void *opaque)
-{
-    MigrationState *s = opaque;
-
-    if (fd < 0) {
-        DPRINTF("migrate connect error: %s\n", error_get_pretty(err));
-        s->to_dst_file = NULL;
-        migrate_fd_error(s);
-    } else {
-        DPRINTF("migrate connect success\n");
-        s->to_dst_file = qemu_fopen_socket(fd, "wb");
-        migrate_fd_connect(s);
-    }
-}
-
-void unix_start_outgoing_migration(MigrationState *s, const char *path, Error **errp)
-{
-    unix_nonblocking_connect(path, unix_wait_for_connect, s, errp);
-}
-
-static void unix_accept_incoming_migration(void *opaque)
-{
-    struct sockaddr_un addr;
-    socklen_t addrlen = sizeof(addr);
-    int s = (intptr_t)opaque;
-    QEMUFile *f;
-    int c, err;
-
-    do {
-        c = qemu_accept(s, (struct sockaddr *)&addr, &addrlen);
-        err = errno;
-    } while (c < 0 && err == EINTR);
-    qemu_set_fd_handler(s, NULL, NULL, NULL);
-    close(s);
-
-    DPRINTF("accepted migration\n");
-
-    if (c < 0) {
-        error_report("could not accept migration connection (%s)",
-                     strerror(err));
-        return;
-    }
-
-    f = qemu_fopen_socket(c, "rb");
-    if (f == NULL) {
-        error_report("could not qemu_fopen socket");
-        goto out;
-    }
-
-    process_incoming_migration(f);
-    return;
-
-out:
-    close(c);
-}
-
-void unix_start_incoming_migration(const char *path, Error **errp)
-{
-    int s;
-
-    s = unix_listen(path, NULL, 0, errp);
-    if (s < 0) {
-        return;
-    }
-
-    qemu_set_fd_handler(s, unix_accept_incoming_migration, NULL,
-                        (void *)(intptr_t)s);
-}
diff --git a/monitor.c b/monitor.c
index 6a32b9bf59..404d594bb3 100644
--- a/monitor.c
+++ b/monitor.c
@@ -3432,12 +3432,12 @@ static void vm_completion(ReadLineState *rs, const char *str)
 {
     size_t len;
     BlockDriverState *bs;
-    BdrvNextIterator *it = NULL;
+    BdrvNextIterator it;
 
     len = strlen(str);
     readline_set_completion_index(rs, len);
 
-    while ((it = bdrv_next(it, &bs))) {
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         SnapshotInfoList *snapshots, *snapshot;
         AioContext *ctx = bdrv_get_aio_context(bs);
         bool ok = false;
diff --git a/qapi-schema.json b/qapi-schema.json
index 9a322d1836..8483bdfcce 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -484,6 +484,10 @@
 #        throttled during auto-converge. This is only present when auto-converge
 #        has started throttling guest cpus. (Since 2.7)
 #
+# @error-desc: #optional the human readable error description string, when
+#              @status is 'failed'. Clients should not attempt to parse the
+#              error strings. (Since 2.6)
+#
 # Since: 0.14.0
 ##
 { 'struct': 'MigrationInfo',
@@ -494,7 +498,8 @@
            '*expected-downtime': 'int',
            '*downtime': 'int',
            '*setup-time': 'int',
-           '*cpu-throttle-percentage': 'int'} }
+           '*cpu-throttle-percentage': 'int',
+           '*error-desc': 'str'} }
 
 ##
 # @query-migrate
@@ -612,11 +617,28 @@
 # @cpu-throttle-increment: throttle percentage increase each time
 #                          auto-converge detects that migration is not making
 #                          progress. The default value is 10. (Since 2.7)
+#
+# @tls-creds: ID of the 'tls-creds' object that provides credentials for
+#             establishing a TLS connection over the migration data channel.
+#             On the outgoing side of the migration, the credentials must
+#             be for a 'client' endpoint, while for the incoming side the
+#             credentials must be for a 'server' endpoint. Setting this
+#             will enable TLS for all migrations. The default is unset,
+#             resulting in unsecured migration at the QEMU level. (Since 2.7)
+#
+# @tls-hostname: hostname of the target host for the migration. This is
+#                required when using x509 based TLS credentials and the
+#                migration URI does not already include a hostname. For
+#                example if using fd: or exec: based migration, the
+#                hostname must be provided so that the server's x509
+#                certificate identity canbe validated. (Since 2.7)
+#
 # Since: 2.4
 ##
 { 'enum': 'MigrationParameter',
   'data': ['compress-level', 'compress-threads', 'decompress-threads',
-           'cpu-throttle-initial', 'cpu-throttle-increment'] }
+           'cpu-throttle-initial', 'cpu-throttle-increment',
+           'tls-creds', 'tls-hostname'] }
 
 #
 # @migrate-set-parameters
@@ -636,6 +658,22 @@
 # @cpu-throttle-increment: throttle percentage increase each time
 #                          auto-converge detects that migration is not making
 #                          progress. The default value is 10. (Since 2.7)
+#
+# @tls-creds: ID of the 'tls-creds' object that provides credentials for
+#             establishing a TLS connection over the migration data channel.
+#             On the outgoing side of the migration, the credentials must
+#             be for a 'client' endpoint, while for the incoming side the
+#             credentials must be for a 'server' endpoint. Setting this
+#             will enable TLS for all migrations. The default is unset,
+#             resulting in unsecured migration at the QEMU level. (Since 2.7)
+#
+# @tls-hostname: hostname of the target host for the migration. This is
+#                required when using x509 based TLS credentials and the
+#                migration URI does not already include a hostname. For
+#                example if using fd: or exec: based migration, the
+#                hostname must be provided so that the server's x509
+#                certificate identity canbe validated. (Since 2.7)
+#
 # Since: 2.4
 ##
 { 'command': 'migrate-set-parameters',
@@ -643,7 +681,9 @@
             '*compress-threads': 'int',
             '*decompress-threads': 'int',
             '*cpu-throttle-initial': 'int',
-            '*cpu-throttle-increment': 'int'} }
+            '*cpu-throttle-increment': 'int',
+            '*tls-creds': 'str',
+            '*tls-hostname': 'str'} }
 
 #
 # @MigrationParameters
@@ -662,6 +702,21 @@
 #                          auto-converge detects that migration is not making
 #                          progress. The default value is 10. (Since 2.7)
 #
+# @tls-creds: ID of the 'tls-creds' object that provides credentials for
+#             establishing a TLS connection over the migration data channel.
+#             On the outgoing side of the migration, the credentials must
+#             be for a 'client' endpoint, while for the incoming side the
+#             credentials must be for a 'server' endpoint. Setting this
+#             will enable TLS for all migrations. The default is unset,
+#             resulting in unsecured migration at the QEMU level. (Since 2.6)
+#
+# @tls-hostname: hostname of the target host for the migration. This is
+#                required when using x509 based TLS credentials and the
+#                migration URI does not already include a hostname. For
+#                example if using fd: or exec: based migration, the
+#                hostname must be provided so that the server's x509
+#                certificate identity canbe validated. (Since 2.6)
+#
 # Since: 2.4
 ##
 { 'struct': 'MigrationParameters',
@@ -669,7 +724,9 @@
             'compress-threads': 'int',
             'decompress-threads': 'int',
             'cpu-throttle-initial': 'int',
-            'cpu-throttle-increment': 'int'} }
+            'cpu-throttle-increment': 'int',
+            'tls-creds': 'str',
+            'tls-hostname': 'str'} }
 ##
 # @query-migrate-parameters
 #
diff --git a/qemu-img.c b/qemu-img.c
index 7ed8ef21cb..4b56ad36aa 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -775,7 +775,7 @@ static void common_block_job_cb(void *opaque, int ret)
 
 static void run_block_job(BlockJob *job, Error **errp)
 {
-    AioContext *aio_context = bdrv_get_aio_context(job->bs);
+    AioContext *aio_context = blk_get_aio_context(job->blk);
 
     do {
         aio_poll(aio_context, true);
@@ -1606,8 +1606,8 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
             if (s->has_zero_init) {
                 break;
             }
-            ret = blk_write_zeroes(s->target, sector_num << BDRV_SECTOR_BITS,
-                                   n << BDRV_SECTOR_BITS, 0);
+            ret = blk_pwrite_zeroes(s->target, sector_num << BDRV_SECTOR_BITS,
+                                    n << BDRV_SECTOR_BITS, 0);
             if (ret < 0) {
                 return ret;
             }
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index e766791ffc..09e879f872 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -451,12 +451,12 @@ typedef struct {
     bool done;
 } CoWriteZeroes;
 
-static void coroutine_fn co_write_zeroes_entry(void *opaque)
+static void coroutine_fn co_pwrite_zeroes_entry(void *opaque)
 {
     CoWriteZeroes *data = opaque;
 
-    data->ret = blk_co_write_zeroes(data->blk, data->offset, data->count,
-                                    data->flags);
+    data->ret = blk_co_pwrite_zeroes(data->blk, data->offset, data->count,
+                                     data->flags);
     data->done = true;
     if (data->ret < 0) {
         *data->total = data->ret;
@@ -466,8 +466,8 @@ static void coroutine_fn co_write_zeroes_entry(void *opaque)
     *data->total = data->count;
 }
 
-static int do_co_write_zeroes(BlockBackend *blk, int64_t offset, int64_t count,
-                              int flags, int64_t *total)
+static int do_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+                               int64_t count, int flags, int64_t *total)
 {
     Coroutine *co;
     CoWriteZeroes data = {
@@ -483,7 +483,7 @@ static int do_co_write_zeroes(BlockBackend *blk, int64_t offset, int64_t count,
         return -ERANGE;
     }
 
-    co = qemu_coroutine_create(co_write_zeroes_entry);
+    co = qemu_coroutine_create(co_pwrite_zeroes_entry);
     qemu_coroutine_enter(co, &data);
     while (!data.done) {
         aio_poll(blk_get_aio_context(blk), true);
@@ -901,7 +901,7 @@ static void write_help(void)
 " -C, -- report statistics in a machine parsable format\n"
 " -q, -- quiet mode, do not show I/O statistics\n"
 " -u, -- with -z, allow unmapping\n"
-" -z, -- write zeroes using blk_co_write_zeroes\n"
+" -z, -- write zeroes using blk_co_pwrite_zeroes\n"
 "\n");
 }
 
@@ -1033,7 +1033,7 @@ static int write_f(BlockBackend *blk, int argc, char **argv)
     if (bflag) {
         cnt = do_save_vmstate(blk, buf, offset, count, &total);
     } else if (zflag) {
-        cnt = do_co_write_zeroes(blk, offset, count, flags, &total);
+        cnt = do_co_pwrite_zeroes(blk, offset, count, flags, &total);
     } else if (cflag) {
         cnt = do_write_compressed(blk, buf, offset, count, &total);
     } else {
@@ -1376,7 +1376,7 @@ static void aio_write_help(void)
 " -i, -- treat request as invalid, for exercising stats\n"
 " -q, -- quiet mode, do not show I/O statistics\n"
 " -u, -- with -z, allow unmapping\n"
-" -z, -- write zeroes using blk_aio_write_zeroes\n"
+" -z, -- write zeroes using blk_aio_pwrite_zeroes\n"
 "\n");
 }
 
@@ -1475,8 +1475,8 @@ static int aio_write_f(BlockBackend *blk, int argc, char **argv)
         }
 
         ctx->qiov.size = count;
-        blk_aio_write_zeroes(blk, ctx->offset, count, flags, aio_write_done,
-                             ctx);
+        blk_aio_pwrite_zeroes(blk, ctx->offset, count, flags, aio_write_done,
+                              ctx);
     } else {
         nr_iov = argc - optind;
         ctx->buf = create_iovec(blk, &ctx->qiov, &argv[optind], nr_iov,
diff --git a/qmp.c b/qmp.c
index 8f8ae3a79d..3165f8726b 100644
--- a/qmp.c
+++ b/qmp.c
@@ -181,7 +181,7 @@ void qmp_cont(Error **errp)
     Error *local_err = NULL;
     BlockBackend *blk;
     BlockDriverState *bs;
-    BdrvNextIterator *it;
+    BdrvNextIterator it;
 
     /* if there is a dump in background, we should wait until the dump
      * finished */
@@ -201,8 +201,7 @@ void qmp_cont(Error **errp)
         blk_iostatus_reset(blk);
     }
 
-    it = NULL;
-    while ((it = bdrv_next(it, &bs))) {
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         bdrv_add_key(bs, NULL, &local_err);
         if (local_err) {
             error_propagate(errp, local_err);
diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
index fc7931227d..3b2090e42e 100644
--- a/target-ppc/kvm_ppc.h
+++ b/target-ppc/kvm_ppc.h
@@ -163,7 +163,7 @@ static inline bool kvmppc_spapr_use_multitce(void)
 
 static inline void *kvmppc_create_spapr_tce(uint32_t liobn,
                                             uint32_t window_size, int *fd,
-                                            bool vfio_accel)
+                                            bool need_vfio)
 {
     return NULL;
 }
diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
index 04e6932fa0..17e24800cb 100644
--- a/target-ppc/mmu-hash64.c
+++ b/target-ppc/mmu-hash64.c
@@ -284,8 +284,6 @@ void ppc_hash64_set_external_hpt(PowerPCCPU *cpu, void *hpt, int shift,
     CPUPPCState *env = &cpu->env;
     Error *local_err = NULL;
 
-    cpu_synchronize_state(CPU(cpu));
-
     if (hpt) {
         env->external_htab = hpt;
     } else {
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 745f4de98f..f5ceae5900 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -756,27 +756,20 @@ static void gen_cmpli(DisasContext *ctx)
 /* isel (PowerPC 2.03 specification) */
 static void gen_isel(DisasContext *ctx)
 {
-    TCGLabel *l1, *l2;
     uint32_t bi = rC(ctx->opcode);
-    uint32_t mask;
-    TCGv_i32 t0;
+    uint32_t mask = 0x08 >> (bi & 0x03);
+    TCGv t0 = tcg_temp_new();
+    TCGv zr;
 
-    l1 = gen_new_label();
-    l2 = gen_new_label();
+    tcg_gen_extu_i32_tl(t0, cpu_crf[bi >> 2]);
+    tcg_gen_andi_tl(t0, t0, mask);
 
-    mask = 0x08 >> (bi & 0x03);
-    t0 = tcg_temp_new_i32();
-    tcg_gen_andi_i32(t0, cpu_crf[bi >> 2], mask);
-    tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1);
-    if (rA(ctx->opcode) == 0)
-        tcg_gen_movi_tl(cpu_gpr[rD(ctx->opcode)], 0);
-    else
-        tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
-    tcg_gen_br(l2);
-    gen_set_label(l1);
-    tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
-    gen_set_label(l2);
-    tcg_temp_free_i32(t0);
+    zr = tcg_const_tl(0);
+    tcg_gen_movcond_tl(TCG_COND_NE, cpu_gpr[rD(ctx->opcode)], t0, zr,
+                       rA(ctx->opcode) ? cpu_gpr[rA(ctx->opcode)] : zr,
+                       cpu_gpr[rB(ctx->opcode)]);
+    tcg_temp_free(zr);
+    tcg_temp_free(t0);
 }
 
 /* cmpb: PowerPC 2.05 specification */
@@ -1617,141 +1610,109 @@ static void gen_cntlzd(DisasContext *ctx)
 /* rlwimi & rlwimi. */
 static void gen_rlwimi(DisasContext *ctx)
 {
-    uint32_t mb, me, sh;
+    TCGv t_ra = cpu_gpr[rA(ctx->opcode)];
+    TCGv t_rs = cpu_gpr[rS(ctx->opcode)];
+    uint32_t sh = SH(ctx->opcode);
+    uint32_t mb = MB(ctx->opcode);
+    uint32_t me = ME(ctx->opcode);
 
-    mb = MB(ctx->opcode);
-    me = ME(ctx->opcode);
-    sh = SH(ctx->opcode);
-    if (likely(sh == (31-me) && mb <= me)) {
-        tcg_gen_deposit_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
-                           cpu_gpr[rS(ctx->opcode)], sh, me - mb + 1);
+    if (sh == (31-me) && mb <= me) {
+        tcg_gen_deposit_tl(t_ra, t_ra, t_rs, sh, me - mb + 1);
     } else {
         target_ulong mask;
+        TCGv_i32 t0;
         TCGv t1;
-        TCGv t0 = tcg_temp_new();
-#if defined(TARGET_PPC64)
-        tcg_gen_deposit_i64(t0, cpu_gpr[rS(ctx->opcode)],
-            cpu_gpr[rS(ctx->opcode)], 32, 32);
-        tcg_gen_rotli_i64(t0, t0, sh);
-#else
-        tcg_gen_rotli_i32(t0, cpu_gpr[rS(ctx->opcode)], sh);
-#endif
+
 #if defined(TARGET_PPC64)
         mb += 32;
         me += 32;
 #endif
         mask = MASK(mb, me);
+
+        t0 = tcg_temp_new_i32();
         t1 = tcg_temp_new();
-        tcg_gen_andi_tl(t0, t0, mask);
-        tcg_gen_andi_tl(t1, cpu_gpr[rA(ctx->opcode)], ~mask);
-        tcg_gen_or_tl(cpu_gpr[rA(ctx->opcode)], t0, t1);
-        tcg_temp_free(t0);
+        tcg_gen_trunc_tl_i32(t0, t_rs);
+        tcg_gen_rotli_i32(t0, t0, sh);
+        tcg_gen_extu_i32_tl(t1, t0);
+        tcg_temp_free_i32(t0);
+
+        tcg_gen_andi_tl(t1, t1, mask);
+        tcg_gen_andi_tl(t_ra, t_ra, ~mask);
+        tcg_gen_or_tl(t_ra, t_ra, t1);
         tcg_temp_free(t1);
     }
-    if (unlikely(Rc(ctx->opcode) != 0))
-        gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
+    if (unlikely(Rc(ctx->opcode) != 0)) {
+        gen_set_Rc0(ctx, t_ra);
+    }
 }
 
 /* rlwinm & rlwinm. */
 static void gen_rlwinm(DisasContext *ctx)
 {
-    uint32_t mb, me, sh;
-
-    sh = SH(ctx->opcode);
-    mb = MB(ctx->opcode);
-    me = ME(ctx->opcode);
+    TCGv t_ra = cpu_gpr[rA(ctx->opcode)];
+    TCGv t_rs = cpu_gpr[rS(ctx->opcode)];
+    uint32_t sh = SH(ctx->opcode);
+    uint32_t mb = MB(ctx->opcode);
+    uint32_t me = ME(ctx->opcode);
 
-    if (likely(mb == 0 && me == (31 - sh))) {
-        if (likely(sh == 0)) {
-            tcg_gen_ext32u_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]);
-        } else {
-            TCGv t0 = tcg_temp_new();
-            tcg_gen_ext32u_tl(t0, cpu_gpr[rS(ctx->opcode)]);
-            tcg_gen_shli_tl(t0, t0, sh);
-            tcg_gen_ext32u_tl(cpu_gpr[rA(ctx->opcode)], t0);
-            tcg_temp_free(t0);
-        }
-    } else if (likely(sh != 0 && me == 31 && sh == (32 - mb))) {
-        TCGv t0 = tcg_temp_new();
-        tcg_gen_ext32u_tl(t0, cpu_gpr[rS(ctx->opcode)]);
-        tcg_gen_shri_tl(t0, t0, mb);
-        tcg_gen_ext32u_tl(cpu_gpr[rA(ctx->opcode)], t0);
-        tcg_temp_free(t0);
-    } else if (likely(mb == 0 && me == 31)) {
-        TCGv_i32 t0 = tcg_temp_new_i32();
-        tcg_gen_trunc_tl_i32(t0, cpu_gpr[rS(ctx->opcode)]);
-        tcg_gen_rotli_i32(t0, t0, sh);
-        tcg_gen_extu_i32_tl(cpu_gpr[rA(ctx->opcode)], t0);
-        tcg_temp_free_i32(t0);
+    if (mb == 0 && me == (31 - sh)) {
+        tcg_gen_shli_tl(t_ra, t_rs, sh);
+        tcg_gen_ext32u_tl(t_ra, t_ra);
+    } else if (sh != 0 && me == 31 && sh == (32 - mb)) {
+        tcg_gen_ext32u_tl(t_ra, t_rs);
+        tcg_gen_shri_tl(t_ra, t_ra, mb);
     } else {
-        TCGv t0 = tcg_temp_new();
-#if defined(TARGET_PPC64)
-        tcg_gen_deposit_i64(t0, cpu_gpr[rS(ctx->opcode)],
-            cpu_gpr[rS(ctx->opcode)], 32, 32);
-        tcg_gen_rotli_i64(t0, t0, sh);
-#else
-        tcg_gen_rotli_i32(t0, cpu_gpr[rS(ctx->opcode)], sh);
-#endif
 #if defined(TARGET_PPC64)
         mb += 32;
         me += 32;
 #endif
-        tcg_gen_andi_tl(cpu_gpr[rA(ctx->opcode)], t0, MASK(mb, me));
-        tcg_temp_free(t0);
+        if (sh == 0) {
+            tcg_gen_andi_tl(t_ra, t_rs, MASK(mb, me));
+        } else {
+            TCGv_i32 t0 = tcg_temp_new_i32();
+
+            tcg_gen_trunc_tl_i32(t0, t_rs);
+            tcg_gen_rotli_i32(t0, t0, sh);
+            tcg_gen_andi_i32(t0, t0, MASK(mb, me));
+            tcg_gen_extu_i32_tl(t_ra, t0);
+            tcg_temp_free_i32(t0);
+        }
+    }
+    if (unlikely(Rc(ctx->opcode) != 0)) {
+        gen_set_Rc0(ctx, t_ra);
     }
-    if (unlikely(Rc(ctx->opcode) != 0))
-        gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
 }
 
 /* rlwnm & rlwnm. */
 static void gen_rlwnm(DisasContext *ctx)
 {
-    uint32_t mb, me;
-    mb = MB(ctx->opcode);
-    me = ME(ctx->opcode);
+    TCGv t_ra = cpu_gpr[rA(ctx->opcode)];
+    TCGv t_rs = cpu_gpr[rS(ctx->opcode)];
+    TCGv t_rb = cpu_gpr[rB(ctx->opcode)];
+    uint32_t mb = MB(ctx->opcode);
+    uint32_t me = ME(ctx->opcode);
+    TCGv_i32 t0, t1;
 
-    if (likely(mb == 0 && me == 31)) {
-        TCGv_i32 t0, t1;
-        t0 = tcg_temp_new_i32();
-        t1 = tcg_temp_new_i32();
-        tcg_gen_trunc_tl_i32(t0, cpu_gpr[rB(ctx->opcode)]);
-        tcg_gen_trunc_tl_i32(t1, cpu_gpr[rS(ctx->opcode)]);
-        tcg_gen_andi_i32(t0, t0, 0x1f);
-        tcg_gen_rotl_i32(t1, t1, t0);
-        tcg_gen_extu_i32_tl(cpu_gpr[rA(ctx->opcode)], t1);
-        tcg_temp_free_i32(t0);
-        tcg_temp_free_i32(t1);
-    } else {
-        TCGv t0;
 #if defined(TARGET_PPC64)
-        TCGv t1;
+    mb += 32;
+    me += 32;
 #endif
 
-        t0 = tcg_temp_new();
-        tcg_gen_andi_tl(t0, cpu_gpr[rB(ctx->opcode)], 0x1f);
-#if defined(TARGET_PPC64)
-        t1 = tcg_temp_new_i64();
-        tcg_gen_deposit_i64(t1, cpu_gpr[rS(ctx->opcode)],
-                            cpu_gpr[rS(ctx->opcode)], 32, 32);
-        tcg_gen_rotl_i64(t0, t1, t0);
-        tcg_temp_free_i64(t1);
-#else
-        tcg_gen_rotl_i32(t0, cpu_gpr[rS(ctx->opcode)], t0);
-#endif
-        if (unlikely(mb != 0 || me != 31)) {
-#if defined(TARGET_PPC64)
-            mb += 32;
-            me += 32;
-#endif
-            tcg_gen_andi_tl(cpu_gpr[rA(ctx->opcode)], t0, MASK(mb, me));
-        } else {
-            tcg_gen_andi_tl(t0, t0, MASK(32, 63));
-            tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], t0);
-        }
-        tcg_temp_free(t0);
+    t0 = tcg_temp_new_i32();
+    t1 = tcg_temp_new_i32();
+    tcg_gen_trunc_tl_i32(t0, t_rb);
+    tcg_gen_trunc_tl_i32(t1, t_rs);
+    tcg_gen_andi_i32(t0, t0, 0x1f);
+    tcg_gen_rotl_i32(t1, t1, t0);
+    tcg_temp_free_i32(t0);
+
+    tcg_gen_andi_i32(t1, t1, MASK(mb, me));
+    tcg_gen_extu_i32_tl(t_ra, t1);
+    tcg_temp_free_i32(t1);
+
+    if (unlikely(Rc(ctx->opcode) != 0)) {
+        gen_set_Rc0(ctx, t_ra);
     }
-    if (unlikely(Rc(ctx->opcode) != 0))
-        gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
 }
 
 #if defined(TARGET_PPC64)
@@ -1786,26 +1747,24 @@ static void glue(gen_, name##3)(DisasContext *ctx)                            \
     gen_##name(ctx, 1, 1);                                                    \
 }
 
-static inline void gen_rldinm(DisasContext *ctx, uint32_t mb, uint32_t me,
-                              uint32_t sh)
+static void gen_rldinm(DisasContext *ctx, int mb, int me, int sh)
 {
-    if (likely(sh != 0 && mb == 0 && me == (63 - sh))) {
-        tcg_gen_shli_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)], sh);
-    } else if (likely(sh != 0 && me == 63 && sh == (64 - mb))) {
-        tcg_gen_shri_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)], mb);
+    TCGv t_ra = cpu_gpr[rA(ctx->opcode)];
+    TCGv t_rs = cpu_gpr[rS(ctx->opcode)];
+
+    if (sh != 0 && mb == 0 && me == (63 - sh)) {
+        tcg_gen_shli_tl(t_ra, t_rs, sh);
+    } else if (sh != 0 && me == 63 && sh == (64 - mb)) {
+        tcg_gen_shri_tl(t_ra, t_rs, mb);
     } else {
-        TCGv t0 = tcg_temp_new();
-        tcg_gen_rotli_tl(t0, cpu_gpr[rS(ctx->opcode)], sh);
-        if (likely(mb == 0 && me == 63)) {
-            tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], t0);
-        } else {
-            tcg_gen_andi_tl(cpu_gpr[rA(ctx->opcode)], t0, MASK(mb, me));
-        }
-        tcg_temp_free(t0);
+        tcg_gen_rotli_tl(t_ra, t_rs, sh);
+        tcg_gen_andi_tl(t_ra, t_ra, MASK(mb, me));
+    }
+    if (unlikely(Rc(ctx->opcode) != 0)) {
+        gen_set_Rc0(ctx, t_ra);
     }
-    if (unlikely(Rc(ctx->opcode) != 0))
-        gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
 }
+
 /* rldicl - rldicl. */
 static inline void gen_rldicl(DisasContext *ctx, int mbn, int shn)
 {
@@ -1816,6 +1775,7 @@ static inline void gen_rldicl(DisasContext *ctx, int mbn, int shn)
     gen_rldinm(ctx, mb, 63, sh);
 }
 GEN_PPC64_R4(rldicl, 0x1E, 0x00);
+
 /* rldicr - rldicr. */
 static inline void gen_rldicr(DisasContext *ctx, int men, int shn)
 {
@@ -1826,6 +1786,7 @@ static inline void gen_rldicr(DisasContext *ctx, int men, int shn)
     gen_rldinm(ctx, 0, me, sh);
 }
 GEN_PPC64_R4(rldicr, 0x1E, 0x02);
+
 /* rldic - rldic. */
 static inline void gen_rldic(DisasContext *ctx, int mbn, int shn)
 {
@@ -1837,21 +1798,22 @@ static inline void gen_rldic(DisasContext *ctx, int mbn, int shn)
 }
 GEN_PPC64_R4(rldic, 0x1E, 0x04);
 
-static inline void gen_rldnm(DisasContext *ctx, uint32_t mb, uint32_t me)
+static void gen_rldnm(DisasContext *ctx, int mb, int me)
 {
+    TCGv t_ra = cpu_gpr[rA(ctx->opcode)];
+    TCGv t_rs = cpu_gpr[rS(ctx->opcode)];
+    TCGv t_rb = cpu_gpr[rB(ctx->opcode)];
     TCGv t0;
 
     t0 = tcg_temp_new();
-    tcg_gen_andi_tl(t0, cpu_gpr[rB(ctx->opcode)], 0x3f);
-    tcg_gen_rotl_tl(t0, cpu_gpr[rS(ctx->opcode)], t0);
-    if (unlikely(mb != 0 || me != 63)) {
-        tcg_gen_andi_tl(cpu_gpr[rA(ctx->opcode)], t0, MASK(mb, me));
-    } else {
-        tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], t0);
-    }
+    tcg_gen_andi_tl(t0, t_rb, 0x3f);
+    tcg_gen_rotl_tl(t_ra, t_rs, t0);
     tcg_temp_free(t0);
-    if (unlikely(Rc(ctx->opcode) != 0))
-        gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
+
+    tcg_gen_andi_tl(t_ra, t_ra, MASK(mb, me));
+    if (unlikely(Rc(ctx->opcode) != 0)) {
+        gen_set_Rc0(ctx, t_ra);
+    }
 }
 
 /* rldcl - rldcl. */
@@ -1863,6 +1825,7 @@ static inline void gen_rldcl(DisasContext *ctx, int mbn)
     gen_rldnm(ctx, mb, 63);
 }
 GEN_PPC64_R2(rldcl, 0x1E, 0x08);
+
 /* rldcr - rldcr. */
 static inline void gen_rldcr(DisasContext *ctx, int men)
 {
@@ -1872,32 +1835,31 @@ static inline void gen_rldcr(DisasContext *ctx, int men)
     gen_rldnm(ctx, 0, me);
 }
 GEN_PPC64_R2(rldcr, 0x1E, 0x09);
+
 /* rldimi - rldimi. */
-static inline void gen_rldimi(DisasContext *ctx, int mbn, int shn)
+static void gen_rldimi(DisasContext *ctx, int mbn, int shn)
 {
-    uint32_t sh, mb, me;
+    TCGv t_ra = cpu_gpr[rA(ctx->opcode)];
+    TCGv t_rs = cpu_gpr[rS(ctx->opcode)];
+    uint32_t sh = SH(ctx->opcode) | (shn << 5);
+    uint32_t mb = MB(ctx->opcode) | (mbn << 5);
+    uint32_t me = 63 - sh;
 
-    sh = SH(ctx->opcode) | (shn << 5);
-    mb = MB(ctx->opcode) | (mbn << 5);
-    me = 63 - sh;
-    if (unlikely(sh == 0 && mb == 0)) {
-        tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]);
+    if (mb <= me) {
+        tcg_gen_deposit_tl(t_ra, t_ra, t_rs, sh, me - mb + 1);
     } else {
-        TCGv t0, t1;
-        target_ulong mask;
+        target_ulong mask = MASK(mb, me);
+        TCGv t1 = tcg_temp_new();
 
-        t0 = tcg_temp_new();
-        tcg_gen_rotli_tl(t0, cpu_gpr[rS(ctx->opcode)], sh);
-        t1 = tcg_temp_new();
-        mask = MASK(mb, me);
-        tcg_gen_andi_tl(t0, t0, mask);
-        tcg_gen_andi_tl(t1, cpu_gpr[rA(ctx->opcode)], ~mask);
-        tcg_gen_or_tl(cpu_gpr[rA(ctx->opcode)], t0, t1);
-        tcg_temp_free(t0);
+        tcg_gen_rotli_tl(t1, t_rs, sh);
+        tcg_gen_andi_tl(t1, t1, mask);
+        tcg_gen_andi_tl(t_ra, t_ra, ~mask);
+        tcg_gen_or_tl(t_ra, t_ra, t1);
         tcg_temp_free(t1);
     }
-    if (unlikely(Rc(ctx->opcode) != 0))
-        gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
+    if (unlikely(Rc(ctx->opcode) != 0)) {
+        gen_set_Rc0(ctx, t_ra);
+    }
 }
 GEN_PPC64_R4(rldimi, 0x1E, 0x06);
 #endif
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 954195f5e4..a003c1029d 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -9231,6 +9231,14 @@ static void ppc_cpu_realizefn(DeviceState *dev, Error **errp)
 #if !defined(CONFIG_USER_ONLY)
     cpu->cpu_dt_id = (cs->cpu_index / smp_threads) * max_smt
         + (cs->cpu_index % smp_threads);
+
+    if (kvm_enabled() && !kvm_vcpu_id_is_valid(cpu->cpu_dt_id)) {
+        error_setg(errp, "Can't create CPU with id %d in KVM", cpu->cpu_dt_id);
+        error_append_hint(errp, "Adjust the number of cpus to %d "
+                          "or try to raise the number of threads per core\n",
+                          cpu->cpu_dt_id * smp_threads / max_smt);
+        return;
+    }
 #endif
 
     if (tcg_enabled()) {
diff --git a/tests/Makefile b/tests/Makefile
index 1bbd1ca463..4bc041c8c7 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -434,13 +434,14 @@ tests/test-rcu-list$(EXESUF): tests/test-rcu-list.o $(test-util-obj-y)
 
 tests/test-qdev-global-props$(EXESUF): tests/test-qdev-global-props.o \
 	hw/core/qdev.o hw/core/qdev-properties.o hw/core/hotplug.o\
+	hw/core/bus.o \
 	hw/core/irq.o \
 	hw/core/fw-path-provider.o \
 	$(test-qapi-obj-y)
 tests/test-vmstate$(EXESUF): tests/test-vmstate.o \
-	migration/vmstate.o migration/qemu-file.o migration/qemu-file-buf.o \
-        migration/qemu-file-unix.o migration/qjson.o \
-	$(test-qom-obj-y)
+	migration/vmstate.o migration/qemu-file.o \
+        migration/qemu-file-channel.o migration/qjson.o \
+	$(test-io-obj-y)
 tests/test-timed-average$(EXESUF): tests/test-timed-average.o qemu-timer.o \
 	$(test-util-obj-y)
 tests/test-base64$(EXESUF): tests/test-base64.o \
diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index b1c542f99b..ed1d9d464c 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -207,33 +207,6 @@ class TestSingleBlockdev(TestSingleDrive):
     test_image_not_found = None
     test_small_buffer2 = None
 
-class TestBlockdevAttached(iotests.QMPTestCase):
-    image_len = 1 * 1024 * 1024 # MB
-
-    def setUp(self):
-        iotests.create_image(backing_img, self.image_len)
-        qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, test_img)
-        qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, target_img)
-        self.vm = iotests.VM().add_drive(test_img)
-        self.vm.launch()
-
-    def tearDown(self):
-        self.vm.shutdown()
-        os.remove(test_img)
-        os.remove(target_img)
-
-    def test_blockdev_attached(self):
-        self.assert_no_active_block_jobs()
-        args = {'options':
-                    {'driver': iotests.imgfmt,
-                     'id': 'drive1',
-                     'file': { 'filename': target_img, 'driver': 'file' } } }
-        result = self.vm.qmp("blockdev-add", **args)
-        self.assert_qmp(result, 'return', {})
-        result = self.vm.qmp('blockdev-mirror', device='drive0', sync='full',
-                             target='drive1')
-        self.assert_qmp(result, 'error/class', 'GenericError')
-
 class TestSingleDriveZeroLength(TestSingleDrive):
     image_len = 0
     test_small_buffer2 = None
diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out
index b67d0504a6..b0cadc8245 100644
--- a/tests/qemu-iotests/041.out
+++ b/tests/qemu-iotests/041.out
@@ -1,5 +1,5 @@
-............................................................................
+...........................................................................
 ----------------------------------------------------------------------
-Ran 76 tests
+Ran 75 tests
 
 OK
diff --git a/tests/test-blockjob-txn.c b/tests/test-blockjob-txn.c
index 55fad9507a..828389bb45 100644
--- a/tests/test-blockjob-txn.c
+++ b/tests/test-blockjob-txn.c
@@ -15,6 +15,7 @@
 #include "qapi/error.h"
 #include "qemu/main-loop.h"
 #include "block/blockjob.h"
+#include "sysemu/block-backend.h"
 
 typedef struct {
     BlockJob common;
@@ -30,7 +31,7 @@ static const BlockJobDriver test_block_job_driver = {
 
 static void test_block_job_complete(BlockJob *job, void *opaque)
 {
-    BlockDriverState *bs = job->bs;
+    BlockDriverState *bs = blk_bs(job->blk);
     int rc = (intptr_t)opaque;
 
     if (block_job_is_cancelled(job)) {
diff --git a/tests/test-throttle.c b/tests/test-throttle.c
index 5ec966c8a4..c02be805f7 100644
--- a/tests/test-throttle.c
+++ b/tests/test-throttle.c
@@ -578,9 +578,9 @@ static void test_groups(void)
     BlockBackend *blk1, *blk2, *blk3;
     BlockBackendPublic *blkp1, *blkp2, *blkp3;
 
-    blk1 = blk_new_with_bs(&error_abort);
-    blk2 = blk_new_with_bs(&error_abort);
-    blk3 = blk_new_with_bs(&error_abort);
+    blk1 = blk_new();
+    blk2 = blk_new();
+    blk3 = blk_new();
 
     blkp1 = blk_get_public(blk1);
     blkp2 = blk_get_public(blk2);
diff --git a/tests/test-vmstate.c b/tests/test-vmstate.c
index 713d4443b2..d19b16a60e 100644
--- a/tests/test-vmstate.c
+++ b/tests/test-vmstate.c
@@ -29,6 +29,7 @@
 #include "migration/migration.h"
 #include "migration/vmstate.h"
 #include "qemu/coroutine.h"
+#include "io/channel-file.h"
 
 static char temp_file[] = "/tmp/vmst.test.XXXXXX";
 static int temp_fd;
@@ -44,35 +45,22 @@ void yield_until_fd_readable(int fd)
     select(fd + 1, &fds, NULL, NULL, NULL);
 }
 
-/*
- * Some tests use 'open_test_file' to work on a real fd, some use
- * an in memory file (QEMUSizedBuffer+qemu_bufopen); we could pick one
- * but this way we test both.
- */
 
 /* Duplicate temp_fd and seek to the beginning of the file */
 static QEMUFile *open_test_file(bool write)
 {
     int fd = dup(temp_fd);
+    QIOChannel *ioc;
     lseek(fd, 0, SEEK_SET);
     if (write) {
         g_assert_cmpint(ftruncate(fd, 0), ==, 0);
     }
-    return qemu_fdopen(fd, write ? "wb" : "rb");
-}
-
-/*
- * Check that the contents of the memory-buffered file f match
- * the given size/data.
- */
-static void check_mem_file(QEMUFile *f, void *data, size_t size)
-{
-    uint8_t *result = g_malloc(size);
-    const QEMUSizedBuffer *qsb = qemu_buf_get(f);
-    g_assert_cmpint(qsb_get_length(qsb), ==, size);
-    g_assert_cmpint(qsb_get_buffer(qsb, 0, size, result), ==, size);
-    g_assert_cmpint(memcmp(result, data, size), ==, 0);
-    g_free(result);
+    ioc = QIO_CHANNEL(qio_channel_file_new_fd(fd));
+    if (write) {
+        return qemu_fopen_channel_output(ioc);
+    } else {
+        return qemu_fopen_channel_input(ioc);
+    }
 }
 
 #define SUCCESS(val) \
@@ -392,7 +380,7 @@ static const VMStateDescription vmstate_skipping = {
 
 static void test_save_noskip(void)
 {
-    QEMUFile *fsave = qemu_bufopen("w", NULL);
+    QEMUFile *fsave = open_test_file(true);
     TestStruct obj = { .a = 1, .b = 2, .c = 3, .d = 4, .e = 5, .f = 6,
                        .skip_c_e = false };
     vmstate_save_state(fsave, &vmstate_skipping, &obj, NULL);
@@ -406,13 +394,14 @@ static void test_save_noskip(void)
         0, 0, 0, 5,             /* e */
         0, 0, 0, 0, 0, 0, 0, 6, /* f */
     };
-    check_mem_file(fsave, expected, sizeof(expected));
+
     qemu_fclose(fsave);
+    compare_vmstate(expected, sizeof(expected));
 }
 
 static void test_save_skip(void)
 {
-    QEMUFile *fsave = qemu_bufopen("w", NULL);
+    QEMUFile *fsave = open_test_file(true);
     TestStruct obj = { .a = 1, .b = 2, .c = 3, .d = 4, .e = 5, .f = 6,
                        .skip_c_e = true };
     vmstate_save_state(fsave, &vmstate_skipping, &obj, NULL);
@@ -424,13 +413,14 @@ static void test_save_skip(void)
         0, 0, 0, 0, 0, 0, 0, 4, /* d */
         0, 0, 0, 0, 0, 0, 0, 6, /* f */
     };
-    check_mem_file(fsave, expected, sizeof(expected));
 
     qemu_fclose(fsave);
+    compare_vmstate(expected, sizeof(expected));
 }
 
 static void test_load_noskip(void)
 {
+    QEMUFile *fsave = open_test_file(true);
     uint8_t buf[] = {
         0, 0, 0, 10,             /* a */
         0, 0, 0, 20,             /* b */
@@ -440,10 +430,10 @@ static void test_load_noskip(void)
         0, 0, 0, 0, 0, 0, 0, 60, /* f */
         QEMU_VM_EOF, /* just to ensure we won't get EOF reported prematurely */
     };
+    qemu_put_buffer(fsave, buf, sizeof(buf));
+    qemu_fclose(fsave);
 
-    QEMUSizedBuffer *qsb = qsb_create(buf, sizeof(buf));
-    g_assert(qsb);
-    QEMUFile *loading = qemu_bufopen("r", qsb);
+    QEMUFile *loading = open_test_file(false);
     TestStruct obj = { .skip_c_e = false };
     vmstate_load_state(loading, &vmstate_skipping, &obj, 2);
     g_assert(!qemu_file_get_error(loading));
@@ -454,11 +444,11 @@ static void test_load_noskip(void)
     g_assert_cmpint(obj.e, ==, 50);
     g_assert_cmpint(obj.f, ==, 60);
     qemu_fclose(loading);
-    qsb_free(qsb);
 }
 
 static void test_load_skip(void)
 {
+    QEMUFile *fsave = open_test_file(true);
     uint8_t buf[] = {
         0, 0, 0, 10,             /* a */
         0, 0, 0, 20,             /* b */
@@ -466,10 +456,10 @@ static void test_load_skip(void)
         0, 0, 0, 0, 0, 0, 0, 60, /* f */
         QEMU_VM_EOF, /* just to ensure we won't get EOF reported prematurely */
     };
+    qemu_put_buffer(fsave, buf, sizeof(buf));
+    qemu_fclose(fsave);
 
-    QEMUSizedBuffer *qsb = qsb_create(buf, sizeof(buf));
-    g_assert(qsb);
-    QEMUFile *loading = qemu_bufopen("r", qsb);
+    QEMUFile *loading = open_test_file(false);
     TestStruct obj = { .skip_c_e = true, .c = 300, .e = 500 };
     vmstate_load_state(loading, &vmstate_skipping, &obj, 2);
     g_assert(!qemu_file_get_error(loading));
@@ -480,13 +470,14 @@ static void test_load_skip(void)
     g_assert_cmpint(obj.e, ==, 500);
     g_assert_cmpint(obj.f, ==, 60);
     qemu_fclose(loading);
-    qsb_free(qsb);
 }
 
 int main(int argc, char **argv)
 {
     temp_fd = mkstemp(temp_file);
 
+    module_call_init(MODULE_INIT_QOM);
+
     g_test_init(&argc, &argv, NULL);
     g_test_add_func("/vmstate/simple/primitive", test_simple_primitive);
     g_test_add_func("/vmstate/versioned/load/v1", test_load_v1);
diff --git a/trace-events b/trace-events
index b53c3541a3..b27d1dab65 100644
--- a/trace-events
+++ b/trace-events
@@ -61,6 +61,10 @@ virtio_console_chr_event(unsigned int port, int event) "port %u, event %d"
 bdrv_open_common(void *bs, const char *filename, int flags, const char *format_name) "bs %p filename \"%s\" flags %#x format_name \"%s\""
 bdrv_lock_medium(void *bs, bool locked) "bs %p locked %d"
 
+# block/block-backend.c
+blk_co_preadv(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags %x"
+blk_co_pwritev(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags %x"
+
 # block/io.c
 bdrv_aio_discard(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
 bdrv_aio_flush(void *bs, void *opaque) "bs %p opaque %p"
@@ -68,8 +72,6 @@ bdrv_aio_readv(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %
 bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
 bdrv_aio_write_zeroes(void *bs, int64_t sector_num, int nb_sectors, int flags, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d flags %#x opaque %p"
 bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
-bdrv_co_copy_on_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
-bdrv_co_readv_no_serialising(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
 bdrv_co_writev(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
 bdrv_co_write_zeroes(void *bs, int64_t sector_num, int nb_sector, int flags) "bs %p sector_num %"PRId64" nb_sectors %d flags %#x"
 bdrv_co_do_copy_on_readv(void *bs, int64_t sector_num, int nb_sectors, int64_t cluster_sector_num, int cluster_nb_sectors) "bs %p sector_num %"PRId64" nb_sectors %d cluster_sector_num %"PRId64" cluster_nb_sectors %d"
@@ -1143,7 +1145,7 @@ win_helper_done(uint32_t tl) "tl=%d"
 win_helper_retry(uint32_t tl) "tl=%d"
 
 # dma-helpers.c
-dma_blk_io(void *dbs, void *bs, int64_t sector_num, bool to_dev) "dbs=%p bs=%p sector_num=%" PRId64 " to_dev=%d"
+dma_blk_io(void *dbs, void *bs, int64_t offset, bool to_dev) "dbs=%p bs=%p offset=%" PRId64 " to_dev=%d"
 dma_aio_cancel(void *dbs) "dbs=%p"
 dma_complete(void *dbs, int ret, void *cb) "dbs=%p ret=%d cb=%p"
 dma_blk_cb(void *dbs, int ret) "dbs=%p ret=%d"
@@ -1428,7 +1430,7 @@ spapr_iommu_pci_get(uint64_t liobn, uint64_t ioba, uint64_t ret, uint64_t tce) "
 spapr_iommu_pci_indirect(uint64_t liobn, uint64_t ioba, uint64_t tce, uint64_t iobaN, uint64_t tceN, uint64_t ret) "liobn=%"PRIx64" ioba=0x%"PRIx64" tcelist=0x%"PRIx64" iobaN=0x%"PRIx64" tceN=0x%"PRIx64" ret=%"PRId64
 spapr_iommu_pci_stuff(uint64_t liobn, uint64_t ioba, uint64_t tce_value, uint64_t npages, uint64_t ret) "liobn=%"PRIx64" ioba=0x%"PRIx64" tcevalue=0x%"PRIx64" npages=%"PRId64" ret=%"PRId64
 spapr_iommu_xlate(uint64_t liobn, uint64_t ioba, uint64_t tce, unsigned perm, unsigned pgsize) "liobn=%"PRIx64" 0x%"PRIx64" -> 0x%"PRIx64" perm=%u mask=%x"
-spapr_iommu_new_table(uint64_t liobn, void *tcet, void *table, int fd) "liobn=%"PRIx64" tcet=%p table=%p fd=%d"
+spapr_iommu_new_table(uint64_t liobn, void *table, int fd) "liobn=%"PRIx64" table=%p fd=%d"
 
 # hw/ppc/ppc.c
 ppc_tb_adjust(uint64_t offs1, uint64_t offs2, int64_t diff, int64_t seconds) "adjusted from 0x%"PRIx64" to 0x%"PRIx64", diff %"PRId64" (%"PRId64"s)"
@@ -1481,7 +1483,7 @@ await_return_path_close_on_source_close(void) ""
 await_return_path_close_on_source_joining(void) ""
 migrate_set_state(int new_state) "new state %d"
 migrate_fd_cleanup(void) ""
-migrate_fd_error(void) ""
+migrate_fd_error(const char *error_desc) "error=%s"
 migrate_fd_cancel(void) ""
 migrate_handle_rp_req_pages(const char *rbname, size_t start, size_t len) "in %s at %zx len %zx"
 migrate_pending(uint64_t size, uint64_t max, uint64_t post, uint64_t nonpost) "pending size %" PRIu64 " max %" PRIu64 " (post=%" PRIu64 " nonpost=%" PRIu64 ")"
@@ -1511,6 +1513,8 @@ migrate_state_too_big(void) ""
 migrate_transferred(uint64_t tranferred, uint64_t time_spent, double bandwidth, uint64_t size) "transferred %" PRIu64 " time_spent %" PRIu64 " bandwidth %g max_size %" PRId64
 process_incoming_migration_co_end(int ret, int ps) "ret=%d postcopy-state=%d"
 process_incoming_migration_co_postcopy_end_main(void) ""
+migration_set_incoming_channel(void *ioc, const char *ioctype) "ioc=%p ioctype=%s"
+migration_set_outgoing_channel(void *ioc, const char *ioctype, const char *hostname)  "ioc=%p ioctype=%s hostname=%s"
 
 # migration/rdma.c
 qemu_rdma_accept_incoming_migration(void) ""
@@ -1595,6 +1599,27 @@ postcopy_ram_incoming_cleanup_entry(void) ""
 postcopy_ram_incoming_cleanup_exit(void) ""
 postcopy_ram_incoming_cleanup_join(void) ""
 
+# migration/exec.c
+migration_exec_outgoing(const char *cmd) "cmd=%s"
+migration_exec_incoming(const char *cmd) "cmd=%s"
+
+# migration/fd.c
+migration_fd_outgoing(int fd) "fd=%d"
+migration_fd_incoming(int fd) "fd=%d"
+
+# migration/socket.c
+migration_socket_incoming_accepted(void) ""
+migration_socket_outgoing_connected(const char *hostname) "hostname=%s"
+migration_socket_outgoing_error(const char *err) "error=%s"
+
+# migration/tls.c
+migration_tls_outgoing_handshake_start(const char *hostname) "hostname=%s"
+migration_tls_outgoing_handshake_error(const char *err) "err=%s"
+migration_tls_outgoing_handshake_complete(void) ""
+migration_tls_incoming_handshake_start(void) ""
+migration_tls_incoming_handshake_error(const char *err) "err=%s"
+migration_tls_incoming_handshake_complete(void) ""
+
 # kvm-all.c
 kvm_ioctl(int type, void *arg) "type 0x%x, arg %p"
 kvm_vm_ioctl(int type, void *arg) "type 0x%x, arg %p"
@@ -1711,9 +1736,13 @@ vfio_quirk_ati_bonaire_reset_no_smc(const char *name) "%s"
 vfio_quirk_ati_bonaire_reset_timeout(const char *name) "%s"
 vfio_quirk_ati_bonaire_reset_done(const char *name) "%s"
 vfio_quirk_ati_bonaire_reset(const char *name) "%s"
+vfio_pci_igd_bar4_write(const char *name, uint32_t index, uint32_t data, uint32_t base) "%s [%03x] %08x -> %08x"
+vfio_pci_igd_bdsm_enabled(const char *name, int size) "%s %dMB"
+vfio_pci_igd_opregion_enabled(const char *name) "%s"
+vfio_pci_igd_host_bridge_enabled(const char *name) "%s"
+vfio_pci_igd_lpc_bridge_enabled(const char *name) "%s"
 
-
-# hw/vfio/vfio-common.c
+# hw/vfio/common.c
 vfio_region_write(const char *name, int index, uint64_t addr, uint64_t data, unsigned size) " (%s:region%d+0x%"PRIx64", 0x%"PRIx64 ", %d)"
 vfio_region_read(char *name, int index, uint64_t addr, unsigned size, uint64_t data) " (%s:region%d+0x%"PRIx64", %d) = 0x%"PRIx64
 vfio_iommu_map_notify(uint64_t iova_start, uint64_t iova_end) "iommu map @ %"PRIx64" - %"PRIx64
@@ -1732,6 +1761,9 @@ vfio_region_mmap(const char *name, unsigned long offset, unsigned long end) "Reg
 vfio_region_exit(const char *name, int index) "Device %s, region %d"
 vfio_region_finalize(const char *name, int index) "Device %s, region %d"
 vfio_region_mmaps_set_enabled(const char *name, bool enabled) "Region %s mmaps enabled: %d"
+vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Device %s region %d: %d sparse mmap entries"
+vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]"
+vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%0x8"
 
 # hw/vfio/platform.c
 vfio_platform_base_device_init(char *name, int groupid) "%s belongs to group #%d"
diff --git a/util/error.c b/util/error.c
index cae2511732..9c40b1f458 100644
--- a/util/error.c
+++ b/util/error.c
@@ -217,7 +217,7 @@ ErrorClass error_get_class(const Error *err)
     return err->err_class;
 }
 
-const char *error_get_pretty(Error *err)
+const char *error_get_pretty(const Error *err)
 {
     return err->msg;
 }