summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2016-05-26 14:29:29 +0100
committerPeter Maydell <peter.maydell@linaro.org>2016-05-26 14:29:30 +0100
commit2c56d06bafd8933d2a9c6e0aeb5d45f7c1fb5616 (patch)
tree0afd4cd98a38271bf76f02aee87f0ddfb5a0d0bd
parenta62c89117fa19adc6f9242844468ac31ec535d7e (diff)
parentb75536c9fa742f887304769d0608557bb8e3a27f (diff)
downloadfocaccia-qemu-2c56d06bafd8933d2a9c6e0aeb5d45f7c1fb5616.tar.gz
focaccia-qemu-2c56d06bafd8933d2a9c6e0aeb5d45f7c1fb5616.zip
Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches

# gpg: Signature made Wed 25 May 2016 18:32:40 BST using RSA key ID C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"

* remotes/kevin/tags/for-upstream: (31 commits)
  blockjob: Remove BlockJob.bs
  commit: Use BlockBackend for I/O
  backup: Use BlockBackend for I/O
  backup: Remove bs parameter from backup_do_cow()
  backup: Pack Notifier within BackupBlockJob
  backup: Don't leak BackupBlockJob in error path
  mirror: Use BlockBackend for I/O
  mirror: Allow target that already has a BlockBackend
  stream: Use BlockBackend for I/O
  block: Make blk_co_preadv/pwritev() public
  block: Convert block job core to BlockBackend
  block: Default to enabled write cache in blk_new()
  block: Cancel jobs first in bdrv_close_all()
  block: keep a list of block jobs
  block: Rename blk_write_zeroes()
  dma-helpers: change BlockBackend to opaque value in DMAIOFunc
  dma-helpers: change interface to byte-based
  block: Propagate .drained_begin/end callbacks
  block: Fix reconfiguring graph with drained nodes
  block: Make bdrv_drain() use bdrv_drained_begin/end()
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--block.c245
-rw-r--r--block/backup.c71
-rw-r--r--block/block-backend.c123
-rw-r--r--block/commit.c53
-rw-r--r--block/io.c97
-rw-r--r--block/mirror.c100
-rw-r--r--block/parallels.c4
-rw-r--r--block/snapshot.c55
-rw-r--r--block/stream.c15
-rw-r--r--block/vvfat.c8
-rw-r--r--blockdev.c60
-rw-r--r--blockjob.c62
-rw-r--r--dma-helpers.c54
-rw-r--r--hw/block/nvme.c6
-rw-r--r--hw/ide/ahci.c6
-rw-r--r--hw/ide/core.c20
-rw-r--r--hw/ide/internal.h6
-rw-r--r--hw/ide/macio.c2
-rw-r--r--hw/scsi/scsi-disk.c8
-rw-r--r--include/block/block.h24
-rw-r--r--include/block/block_int.h3
-rw-r--r--include/block/blockjob.h23
-rw-r--r--include/sysemu/block-backend.h23
-rw-r--r--include/sysemu/dma.h20
-rw-r--r--migration/block.c4
-rw-r--r--monitor.c4
-rw-r--r--qemu-img.c6
-rw-r--r--qemu-io-cmds.c22
-rw-r--r--qmp.c5
-rwxr-xr-xtests/qemu-iotests/04127
-rw-r--r--tests/qemu-iotests/041.out4
-rw-r--r--tests/test-blockjob-txn.c3
-rw-r--r--tests/test-throttle.c6
-rw-r--r--trace-events8
34 files changed, 603 insertions, 574 deletions
diff --git a/block.c b/block.c
index 1205ef8860..736432f67e 100644
--- a/block.c
+++ b/block.c
@@ -64,16 +64,16 @@ static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
     QLIST_HEAD_INITIALIZER(bdrv_drivers);
 
-static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
-                             const char *reference, QDict *options, int flags,
-                             BlockDriverState *parent,
-                             const BdrvChildRole *child_role, Error **errp);
+static BlockDriverState *bdrv_open_inherit(const char *filename,
+                                           const char *reference,
+                                           QDict *options, int flags,
+                                           BlockDriverState *parent,
+                                           const BdrvChildRole *child_role,
+                                           Error **errp);
 
 /* If non-zero, use only whitelisted block drivers */
 static int use_bdrv_whitelist;
 
-static void bdrv_close(BlockDriverState *bs);
-
 #ifdef _WIN32
 static int is_windows_drive_prefix(const char *filename)
 {
@@ -220,11 +220,6 @@ void bdrv_register(BlockDriver *bdrv)
     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
 }
 
-BlockDriverState *bdrv_new_root(void)
-{
-    return bdrv_new();
-}
-
 BlockDriverState *bdrv_new(void)
 {
     BlockDriverState *bs;
@@ -664,6 +659,18 @@ int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
     return 0;
 }
 
+static void bdrv_child_cb_drained_begin(BdrvChild *child)
+{
+    BlockDriverState *bs = child->opaque;
+    bdrv_drained_begin(bs);
+}
+
+static void bdrv_child_cb_drained_end(BdrvChild *child)
+{
+    BlockDriverState *bs = child->opaque;
+    bdrv_drained_end(bs);
+}
+
 /*
  * Returns the options and flags that a temporary snapshot should get, based on
  * the originally requested flags (the originally requested image will have
@@ -710,6 +717,8 @@ static void bdrv_inherited_options(int *child_flags, QDict *child_options,
 
 const BdrvChildRole child_file = {
     .inherit_options = bdrv_inherited_options,
+    .drained_begin   = bdrv_child_cb_drained_begin,
+    .drained_end     = bdrv_child_cb_drained_end,
 };
 
 /*
@@ -728,6 +737,8 @@ static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options,
 
 const BdrvChildRole child_format = {
     .inherit_options = bdrv_inherited_fmt_options,
+    .drained_begin   = bdrv_child_cb_drained_begin,
+    .drained_end     = bdrv_child_cb_drained_end,
 };
 
 /*
@@ -755,6 +766,8 @@ static void bdrv_backing_options(int *child_flags, QDict *child_options,
 
 static const BdrvChildRole child_backing = {
     .inherit_options = bdrv_backing_options,
+    .drained_begin   = bdrv_child_cb_drained_begin,
+    .drained_end     = bdrv_child_cb_drained_end,
 };
 
 static int bdrv_open_flags(BlockDriverState *bs, int flags)
@@ -1155,18 +1168,41 @@ static int bdrv_fill_options(QDict **options, const char *filename,
     return 0;
 }
 
+static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
+{
+    BlockDriverState *old_bs = child->bs;
+
+    if (old_bs) {
+        if (old_bs->quiesce_counter && child->role->drained_end) {
+            child->role->drained_end(child);
+        }
+        QLIST_REMOVE(child, next_parent);
+    }
+
+    child->bs = new_bs;
+
+    if (new_bs) {
+        QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
+        if (new_bs->quiesce_counter && child->role->drained_begin) {
+            child->role->drained_begin(child);
+        }
+    }
+}
+
 BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
                                   const char *child_name,
-                                  const BdrvChildRole *child_role)
+                                  const BdrvChildRole *child_role,
+                                  void *opaque)
 {
     BdrvChild *child = g_new(BdrvChild, 1);
     *child = (BdrvChild) {
-        .bs     = child_bs,
+        .bs     = NULL,
         .name   = g_strdup(child_name),
         .role   = child_role,
+        .opaque = opaque,
     };
 
-    QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent);
+    bdrv_replace_child(child, child_bs);
 
     return child;
 }
@@ -1176,7 +1212,8 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
                              const char *child_name,
                              const BdrvChildRole *child_role)
 {
-    BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role);
+    BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role,
+                                              parent_bs);
     QLIST_INSERT_HEAD(&parent_bs->children, child, next);
     return child;
 }
@@ -1187,7 +1224,9 @@ static void bdrv_detach_child(BdrvChild *child)
         QLIST_REMOVE(child, next);
         child->next.le_prev = NULL;
     }
-    QLIST_REMOVE(child, next_parent);
+
+    bdrv_replace_child(child, NULL);
+
     g_free(child->name);
     g_free(child);
 }
@@ -1341,14 +1380,13 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
         qdict_put(options, "driver", qstring_from_str(bs->backing_format));
     }
 
-    backing_hd = NULL;
-    ret = bdrv_open_inherit(&backing_hd,
-                            *backing_filename ? backing_filename : NULL,
-                            reference, options, 0, bs, &child_backing,
-                            errp);
-    if (ret < 0) {
+    backing_hd = bdrv_open_inherit(*backing_filename ? backing_filename : NULL,
+                                   reference, options, 0, bs, &child_backing,
+                                   errp);
+    if (!backing_hd) {
         bs->open_flags |= BDRV_O_NO_BACKING;
         error_prepend(errp, "Could not open backing file: ");
+        ret = -EINVAL;
         goto free_exit;
     }
 
@@ -1388,7 +1426,6 @@ BdrvChild *bdrv_open_child(const char *filename,
     BdrvChild *c = NULL;
     BlockDriverState *bs;
     QDict *image_options;
-    int ret;
     char *bdref_key_dot;
     const char *reference;
 
@@ -1408,10 +1445,9 @@ BdrvChild *bdrv_open_child(const char *filename,
         goto done;
     }
 
-    bs = NULL;
-    ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
-                            parent, child_role, errp);
-    if (ret < 0) {
+    bs = bdrv_open_inherit(filename, reference, image_options, 0,
+                           parent, child_role, errp);
+    if (!bs) {
         goto done;
     }
 
@@ -1422,15 +1458,16 @@ done:
     return c;
 }
 
-static int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags,
-                                     QDict *snapshot_options, Error **errp)
+static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
+                                                   int flags,
+                                                   QDict *snapshot_options,
+                                                   Error **errp)
 {
     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
     char *tmp_filename = g_malloc0(PATH_MAX + 1);
     int64_t total_size;
     QemuOpts *opts = NULL;
     BlockDriverState *bs_snapshot;
-    Error *local_err = NULL;
     int ret;
 
     /* if snapshot, we create a temporary backing file and open it
@@ -1439,7 +1476,6 @@ static int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags,
     /* Get the required size from the image */
     total_size = bdrv_getlength(bs);
     if (total_size < 0) {
-        ret = total_size;
         error_setg_errno(errp, -total_size, "Could not get image size");
         goto out;
     }
@@ -1470,22 +1506,26 @@ static int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags,
     qdict_put(snapshot_options, "driver",
               qstring_from_str("qcow2"));
 
-    bs_snapshot = bdrv_new();
-
-    ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
-                    flags, &local_err);
+    bs_snapshot = bdrv_open(NULL, NULL, snapshot_options, flags, errp);
     snapshot_options = NULL;
-    if (ret < 0) {
-        error_propagate(errp, local_err);
+    if (!bs_snapshot) {
+        ret = -EINVAL;
         goto out;
     }
 
+    /* bdrv_append() consumes a strong reference to bs_snapshot (i.e. it will
+     * call bdrv_unref() on it), so in order to be able to return one, we have
+     * to increase bs_snapshot's refcount here */
+    bdrv_ref(bs_snapshot);
     bdrv_append(bs_snapshot, bs);
 
+    g_free(tmp_filename);
+    return bs_snapshot;
+
 out:
     QDECREF(snapshot_options);
     g_free(tmp_filename);
-    return ret;
+    return NULL;
 }
 
 /*
@@ -1503,10 +1543,12 @@ out:
  * should be opened. If specified, neither options nor a filename may be given,
  * nor can an existing BDS be reused (that is, *pbs has to be NULL).
  */
-static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
-                             const char *reference, QDict *options, int flags,
-                             BlockDriverState *parent,
-                             const BdrvChildRole *child_role, Error **errp)
+static BlockDriverState *bdrv_open_inherit(const char *filename,
+                                           const char *reference,
+                                           QDict *options, int flags,
+                                           BlockDriverState *parent,
+                                           const BdrvChildRole *child_role,
+                                           Error **errp)
 {
     int ret;
     BdrvChild *file = NULL;
@@ -1518,7 +1560,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
     QDict *snapshot_options = NULL;
     int snapshot_flags = 0;
 
-    assert(pbs);
     assert(!child_role || !flags);
     assert(!child_role == !parent);
 
@@ -1526,33 +1567,22 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
         bool options_non_empty = options ? qdict_size(options) : false;
         QDECREF(options);
 
-        if (*pbs) {
-            error_setg(errp, "Cannot reuse an existing BDS when referencing "
-                       "another block device");
-            return -EINVAL;
-        }
-
         if (filename || options_non_empty) {
             error_setg(errp, "Cannot reference an existing block device with "
                        "additional options or a new filename");
-            return -EINVAL;
+            return NULL;
         }
 
         bs = bdrv_lookup_bs(reference, reference, errp);
         if (!bs) {
-            return -ENODEV;
+            return NULL;
         }
 
         bdrv_ref(bs);
-        *pbs = bs;
-        return 0;
+        return bs;
     }
 
-    if (*pbs) {
-        bs = *pbs;
-    } else {
-        bs = bdrv_new();
-    }
+    bs = bdrv_new();
 
     /* NULL means an empty set of options */
     if (options == NULL) {
@@ -1562,7 +1592,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
     /* json: syntax counts as explicit options, as if in the QDict */
     parse_json_protocol(options, &filename, &local_err);
     if (local_err) {
-        ret = -EINVAL;
         goto fail;
     }
 
@@ -1589,7 +1618,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
         drv = bdrv_find_format(drvname);
         if (!drv) {
             error_setg(errp, "Unknown driver: '%s'", drvname);
-            ret = -EINVAL;
             goto fail;
         }
     }
@@ -1619,7 +1647,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
         file = bdrv_open_child(filename, options, "file", bs,
                                &child_file, true, &local_err);
         if (local_err) {
-            ret = -EINVAL;
             goto fail;
         }
     }
@@ -1646,7 +1673,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
         qdict_put(options, "driver", qstring_from_str(drv->format_name));
     } else if (!drv) {
         error_setg(errp, "Must specify either driver or file");
-        ret = -EINVAL;
         goto fail;
     }
 
@@ -1689,7 +1715,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
                        drv->format_name, entry->key);
         }
 
-        ret = -EINVAL;
         goto close_and_fail;
     }
 
@@ -1700,25 +1725,30 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
                && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
         error_setg(errp,
                    "Guest must be stopped for opening of encrypted image");
-        ret = -EBUSY;
         goto close_and_fail;
     }
 
     QDECREF(options);
-    *pbs = bs;
 
     /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
      * temporary snapshot afterwards. */
     if (snapshot_flags) {
-        ret = bdrv_append_temp_snapshot(bs, snapshot_flags, snapshot_options,
-                                        &local_err);
+        BlockDriverState *snapshot_bs;
+        snapshot_bs = bdrv_append_temp_snapshot(bs, snapshot_flags,
+                                                snapshot_options, &local_err);
         snapshot_options = NULL;
         if (local_err) {
             goto close_and_fail;
         }
+        /* We are not going to return bs but the overlay on top of it
+         * (snapshot_bs); thus, we have to drop the strong reference to bs
+         * (which we obtained by calling bdrv_new()). bs will not be deleted,
+         * though, because the overlay still has a reference to it. */
+        bdrv_unref(bs);
+        bs = snapshot_bs;
     }
 
-    return 0;
+    return bs;
 
 fail:
     if (file != NULL) {
@@ -1729,36 +1759,26 @@ fail:
     QDECREF(bs->options);
     QDECREF(options);
     bs->options = NULL;
-    if (!*pbs) {
-        /* If *pbs is NULL, a new BDS has been created in this function and
-           needs to be freed now. Otherwise, it does not need to be closed,
-           since it has not really been opened yet. */
-        bdrv_unref(bs);
-    }
+    bdrv_unref(bs);
     if (local_err) {
         error_propagate(errp, local_err);
     }
-    return ret;
+    return NULL;
 
 close_and_fail:
-    /* See fail path, but now the BDS has to be always closed */
-    if (*pbs) {
-        bdrv_close(bs);
-    } else {
-        bdrv_unref(bs);
-    }
+    bdrv_unref(bs);
     QDECREF(snapshot_options);
     QDECREF(options);
     if (local_err) {
         error_propagate(errp, local_err);
     }
-    return ret;
+    return NULL;
 }
 
-int bdrv_open(BlockDriverState **pbs, const char *filename,
-              const char *reference, QDict *options, int flags, Error **errp)
+BlockDriverState *bdrv_open(const char *filename, const char *reference,
+                            QDict *options, int flags, Error **errp)
 {
-    return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
+    return bdrv_open_inherit(filename, reference, options, flags, NULL,
                              NULL, errp);
 }
 
@@ -2132,6 +2152,7 @@ static void bdrv_close(BlockDriverState *bs)
     BdrvAioNotifier *ban, *ban_next;
 
     assert(!bs->job);
+    assert(!bs->refcnt);
 
     bdrv_drained_begin(bs); /* complete I/O */
     bdrv_flush(bs);
@@ -2140,8 +2161,6 @@ static void bdrv_close(BlockDriverState *bs)
     bdrv_release_named_dirty_bitmaps(bs);
     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
 
-    bdrv_parent_cb_change_media(bs, false);
-
     if (bs->drv) {
         BdrvChild *child, *next;
 
@@ -2190,8 +2209,7 @@ static void bdrv_close(BlockDriverState *bs)
 
 void bdrv_close_all(void)
 {
-    BlockDriverState *bs;
-    AioContext *aio_context;
+    block_job_cancel_sync_all();
 
     /* Drop references from requests still in flight, such as canceled block
      * jobs whose AIO context has not been polled yet */
@@ -2200,25 +2218,7 @@ void bdrv_close_all(void)
     blk_remove_all_bs();
     blockdev_close_all_bdrv_states();
 
-    /* Cancel all block jobs */
-    while (!QTAILQ_EMPTY(&all_bdrv_states)) {
-        QTAILQ_FOREACH(bs, &all_bdrv_states, bs_list) {
-            aio_context = bdrv_get_aio_context(bs);
-
-            aio_context_acquire(aio_context);
-            if (bs->job) {
-                block_job_cancel_sync(bs->job);
-                aio_context_release(aio_context);
-                break;
-            }
-            aio_context_release(aio_context);
-        }
-
-        /* All the remaining BlockDriverStates are referenced directly or
-         * indirectly from block jobs, so there needs to be at least one BDS
-         * directly used by a block job */
-        assert(bs);
-    }
+    assert(QTAILQ_EMPTY(&all_bdrv_states));
 }
 
 static void change_parent_backing_link(BlockDriverState *from,
@@ -2228,10 +2228,8 @@ static void change_parent_backing_link(BlockDriverState *from,
 
     QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
         assert(c->role != &child_backing);
-        c->bs = to;
-        QLIST_REMOVE(c, next_parent);
-        QLIST_INSERT_HEAD(&to->parents, c, next_parent);
         bdrv_ref(to);
+        bdrv_replace_child(c, to);
         bdrv_unref(from);
     }
 }
@@ -3195,9 +3193,9 @@ void bdrv_invalidate_cache_all(Error **errp)
 {
     BlockDriverState *bs;
     Error *local_err = NULL;
-    BdrvNextIterator *it = NULL;
+    BdrvNextIterator it;
 
-    while ((it = bdrv_next(it, &bs)) != NULL) {
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         AioContext *aio_context = bdrv_get_aio_context(bs);
 
         aio_context_acquire(aio_context);
@@ -3239,11 +3237,11 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs,
 int bdrv_inactivate_all(void)
 {
     BlockDriverState *bs = NULL;
-    BdrvNextIterator *it = NULL;
+    BdrvNextIterator it;
     int ret = 0;
     int pass;
 
-    while ((it = bdrv_next(it, &bs)) != NULL) {
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         aio_context_acquire(bdrv_get_aio_context(bs));
     }
 
@@ -3252,8 +3250,7 @@ int bdrv_inactivate_all(void)
      * the second pass sets the BDRV_O_INACTIVE flag so that no further write
      * is allowed. */
     for (pass = 0; pass < 2; pass++) {
-        it = NULL;
-        while ((it = bdrv_next(it, &bs)) != NULL) {
+        for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
             ret = bdrv_inactivate_recurse(bs, pass);
             if (ret < 0) {
                 goto out;
@@ -3262,8 +3259,7 @@ int bdrv_inactivate_all(void)
     }
 
 out:
-    it = NULL;
-    while ((it = bdrv_next(it, &bs)) != NULL) {
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         aio_context_release(bdrv_get_aio_context(bs));
     }
 
@@ -3547,11 +3543,10 @@ void bdrv_img_create(const char *filename, const char *fmt,
                           qstring_from_str(backing_fmt));
             }
 
-            bs = NULL;
-            ret = bdrv_open(&bs, full_backing, NULL, backing_options,
-                            back_flags, &local_err);
+            bs = bdrv_open(full_backing, NULL, backing_options, back_flags,
+                           &local_err);
             g_free(full_backing);
-            if (ret < 0) {
+            if (!bs) {
                 goto out;
             }
             size = bdrv_getlength(bs);
@@ -3753,10 +3748,10 @@ bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
 {
     BlockDriverState *bs;
-    BdrvNextIterator *it = NULL;
+    BdrvNextIterator it;
 
     /* walk down the bs forest recursively */
-    while ((it = bdrv_next(it, &bs)) != NULL) {
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         bool perm;
 
         /* try to recurse in this top level bs */
diff --git a/block/backup.c b/block/backup.c
index fec45e8212..feeb9f8bf2 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -36,7 +36,7 @@ typedef struct CowRequest {
 
 typedef struct BackupBlockJob {
     BlockJob common;
-    BlockDriverState *target;
+    BlockBackend *target;
     /* bitmap for sync=incremental */
     BdrvDirtyBitmap *sync_bitmap;
     MirrorSyncMode sync_mode;
@@ -47,6 +47,7 @@ typedef struct BackupBlockJob {
     uint64_t sectors_read;
     unsigned long *done_bitmap;
     int64_t cluster_size;
+    NotifierWithReturn before_write;
     QLIST_HEAD(, CowRequest) inflight_reqs;
 } BackupBlockJob;
 
@@ -93,12 +94,12 @@ static void cow_request_end(CowRequest *req)
     qemu_co_queue_restart_all(&req->wait_queue);
 }
 
-static int coroutine_fn backup_do_cow(BlockDriverState *bs,
+static int coroutine_fn backup_do_cow(BackupBlockJob *job,
                                       int64_t sector_num, int nb_sectors,
                                       bool *error_is_read,
                                       bool is_write_notifier)
 {
-    BackupBlockJob *job = (BackupBlockJob *)bs->job;
+    BlockBackend *blk = job->common.blk;
     CowRequest cow_request;
     struct iovec iov;
     QEMUIOVector bounce_qiov;
@@ -131,20 +132,15 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
                 start * sectors_per_cluster);
 
         if (!bounce_buffer) {
-            bounce_buffer = qemu_blockalign(bs, job->cluster_size);
+            bounce_buffer = blk_blockalign(blk, job->cluster_size);
         }
         iov.iov_base = bounce_buffer;
         iov.iov_len = n * BDRV_SECTOR_SIZE;
         qemu_iovec_init_external(&bounce_qiov, &iov, 1);
 
-        if (is_write_notifier) {
-            ret = bdrv_co_readv_no_serialising(bs,
-                                           start * sectors_per_cluster,
-                                           n, &bounce_qiov);
-        } else {
-            ret = bdrv_co_readv(bs, start * sectors_per_cluster, n,
-                                &bounce_qiov);
-        }
+        ret = blk_co_preadv(blk, start * job->cluster_size,
+                            bounce_qiov.size, &bounce_qiov,
+                            is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
         if (ret < 0) {
             trace_backup_do_cow_read_fail(job, start, ret);
             if (error_is_read) {
@@ -154,13 +150,11 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
         }
 
         if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
-            ret = bdrv_co_write_zeroes(job->target,
-                                       start * sectors_per_cluster,
-                                       n, BDRV_REQ_MAY_UNMAP);
+            ret = blk_co_pwrite_zeroes(job->target, start * job->cluster_size,
+                                       bounce_qiov.size, BDRV_REQ_MAY_UNMAP);
         } else {
-            ret = bdrv_co_writev(job->target,
-                                 start * sectors_per_cluster, n,
-                                 &bounce_qiov);
+            ret = blk_co_pwritev(job->target, start * job->cluster_size,
+                                 bounce_qiov.size, &bounce_qiov, 0);
         }
         if (ret < 0) {
             trace_backup_do_cow_write_fail(job, start, ret);
@@ -197,14 +191,16 @@ static int coroutine_fn backup_before_write_notify(
         NotifierWithReturn *notifier,
         void *opaque)
 {
+    BackupBlockJob *job = container_of(notifier, BackupBlockJob, before_write);
     BdrvTrackedRequest *req = opaque;
     int64_t sector_num = req->offset >> BDRV_SECTOR_BITS;
     int nb_sectors = req->bytes >> BDRV_SECTOR_BITS;
 
+    assert(req->bs == blk_bs(job->common.blk));
     assert((req->offset & (BDRV_SECTOR_SIZE - 1)) == 0);
     assert((req->bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
 
-    return backup_do_cow(req->bs, sector_num, nb_sectors, NULL, true);
+    return backup_do_cow(job, sector_num, nb_sectors, NULL, true);
 }
 
 static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
@@ -221,7 +217,7 @@ static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
 static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
 {
     BdrvDirtyBitmap *bm;
-    BlockDriverState *bs = job->common.bs;
+    BlockDriverState *bs = blk_bs(job->common.blk);
 
     if (ret < 0 || block_job_is_cancelled(&job->common)) {
         /* Merge the successor back into the parent, delete nothing. */
@@ -279,7 +275,7 @@ static void backup_complete(BlockJob *job, void *opaque)
     BackupBlockJob *s = container_of(job, BackupBlockJob, common);
     BackupCompleteData *data = opaque;
 
-    bdrv_unref(s->target);
+    blk_unref(s->target);
 
     block_job_completed(job, data->ret);
     g_free(data);
@@ -321,7 +317,6 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
     int64_t end;
     int64_t last_cluster = -1;
     int64_t sectors_per_cluster = cluster_size_sectors(job);
-    BlockDriverState *bs = job->common.bs;
     HBitmapIter hbi;
 
     granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap);
@@ -343,7 +338,7 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
                 if (yield_and_check(job)) {
                     return ret;
                 }
-                ret = backup_do_cow(bs, cluster * sectors_per_cluster,
+                ret = backup_do_cow(job, cluster * sectors_per_cluster,
                                     sectors_per_cluster, &error_is_read,
                                     false);
                 if ((ret < 0) &&
@@ -376,11 +371,8 @@ static void coroutine_fn backup_run(void *opaque)
 {
     BackupBlockJob *job = opaque;
     BackupCompleteData *data;
-    BlockDriverState *bs = job->common.bs;
-    BlockDriverState *target = job->target;
-    NotifierWithReturn before_write = {
-        .notify = backup_before_write_notify,
-    };
+    BlockDriverState *bs = blk_bs(job->common.blk);
+    BlockBackend *target = job->target;
     int64_t start, end;
     int64_t sectors_per_cluster = cluster_size_sectors(job);
     int ret = 0;
@@ -393,7 +385,8 @@ static void coroutine_fn backup_run(void *opaque)
 
     job->done_bitmap = bitmap_new(end);
 
-    bdrv_add_before_write_notifier(bs, &before_write);
+    job->before_write.notify = backup_before_write_notify;
+    bdrv_add_before_write_notifier(bs, &job->before_write);
 
     if (job->sync_mode == MIRROR_SYNC_MODE_NONE) {
         while (!block_job_is_cancelled(&job->common)) {
@@ -445,7 +438,7 @@ static void coroutine_fn backup_run(void *opaque)
                 }
             }
             /* FULL sync mode we copy the whole drive. */
-            ret = backup_do_cow(bs, start * sectors_per_cluster,
+            ret = backup_do_cow(job, start * sectors_per_cluster,
                                 sectors_per_cluster, &error_is_read, false);
             if (ret < 0) {
                 /* Depending on error action, fail now or retry cluster */
@@ -461,14 +454,14 @@ static void coroutine_fn backup_run(void *opaque)
         }
     }
 
-    notifier_with_return_remove(&before_write);
+    notifier_with_return_remove(&job->before_write);
 
     /* wait until pending backup_do_cow() calls have completed */
     qemu_co_rwlock_wrlock(&job->flush_rwlock);
     qemu_co_rwlock_unlock(&job->flush_rwlock);
     g_free(job->done_bitmap);
 
-    bdrv_op_unblock_all(target, job->common.blocker);
+    bdrv_op_unblock_all(blk_bs(target), job->common.blocker);
 
     data = g_malloc(sizeof(*data));
     data->ret = ret;
@@ -485,6 +478,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
 {
     int64_t len;
     BlockDriverInfo bdi;
+    BackupBlockJob *job = NULL;
     int ret;
 
     assert(bs);
@@ -542,15 +536,16 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
         goto error;
     }
 
-    BackupBlockJob *job = block_job_create(&backup_job_driver, bs, speed,
-                                           cb, opaque, errp);
+    job = block_job_create(&backup_job_driver, bs, speed, cb, opaque, errp);
     if (!job) {
         goto error;
     }
 
+    job->target = blk_new();
+    blk_insert_bs(job->target, target);
+
     job->on_source_error = on_source_error;
     job->on_target_error = on_target_error;
-    job->target = target;
     job->sync_mode = sync_mode;
     job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_INCREMENTAL ?
                        sync_bitmap : NULL;
@@ -558,7 +553,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
     /* If there is no backing file on the target, we cannot rely on COW if our
      * backup cluster size is smaller than the target cluster size. Even for
      * targets with a backing file, try to avoid COW if possible. */
-    ret = bdrv_get_info(job->target, &bdi);
+    ret = bdrv_get_info(target, &bdi);
     if (ret < 0 && !target->backing) {
         error_setg_errno(errp, -ret,
             "Couldn't determine the cluster size of the target image, "
@@ -584,4 +579,8 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
     if (sync_bitmap) {
         bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL);
     }
+    if (job) {
+        blk_unref(job->target);
+        block_job_unref(&job->common);
+    }
 }
diff --git a/block/block-backend.c b/block/block-backend.c
index 6928d61de4..34500e6080 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -19,6 +19,7 @@
 #include "sysemu/sysemu.h"
 #include "qapi-event.h"
 #include "qemu/id.h"
+#include "trace.h"
 
 /* Number of coroutines to reserve per attached device model */
 #define COROUTINE_POOL_RESERVATION 64
@@ -119,12 +120,14 @@ static const BdrvChildRole child_root = {
  * Store an error through @errp on failure, unless it's null.
  * Return the new BlockBackend on success, null on failure.
  */
-BlockBackend *blk_new(Error **errp)
+BlockBackend *blk_new(void)
 {
     BlockBackend *blk;
 
     blk = g_new0(BlockBackend, 1);
     blk->refcnt = 1;
+    blk_set_enable_write_cache(blk, true);
+
     qemu_co_queue_init(&blk->public.throttled_reqs[0]);
     qemu_co_queue_init(&blk->public.throttled_reqs[1]);
 
@@ -136,27 +139,7 @@ BlockBackend *blk_new(Error **errp)
 }
 
 /*
- * Create a new BlockBackend with a new BlockDriverState attached.
- * Otherwise just like blk_new(), which see.
- */
-BlockBackend *blk_new_with_bs(Error **errp)
-{
-    BlockBackend *blk;
-    BlockDriverState *bs;
-
-    blk = blk_new(errp);
-    if (!blk) {
-        return NULL;
-    }
-
-    bs = bdrv_new_root();
-    blk->root = bdrv_root_attach_child(bs, "root", &child_root);
-    blk->root->opaque = blk;
-    return blk;
-}
-
-/*
- * Calls blk_new_with_bs() and then calls bdrv_open() on the BlockDriverState.
+ * Creates a new BlockBackend, opens a new BlockDriverState, and connects both.
  *
  * Just as with bdrv_open(), after having called this function the reference to
  * @options belongs to the block layer (even on failure).
@@ -171,21 +154,16 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
                            QDict *options, int flags, Error **errp)
 {
     BlockBackend *blk;
-    int ret;
-
-    blk = blk_new_with_bs(errp);
-    if (!blk) {
-        QDECREF(options);
-        return NULL;
-    }
+    BlockDriverState *bs;
 
-    ret = bdrv_open(&blk->root->bs, filename, reference, options, flags, errp);
-    if (ret < 0) {
+    blk = blk_new();
+    bs = bdrv_open(filename, reference, options, flags, errp);
+    if (!bs) {
         blk_unref(blk);
         return NULL;
     }
 
-    blk_set_enable_write_cache(blk, true);
+    blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
 
     return blk;
 }
@@ -286,25 +264,11 @@ BlockBackend *blk_next(BlockBackend *blk)
                : QTAILQ_FIRST(&monitor_block_backends);
 }
 
-struct BdrvNextIterator {
-    enum {
-        BDRV_NEXT_BACKEND_ROOTS,
-        BDRV_NEXT_MONITOR_OWNED,
-    } phase;
-    BlockBackend *blk;
-    BlockDriverState *bs;
-};
-
 /* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
  * the monitor or attached to a BlockBackend */
-BdrvNextIterator *bdrv_next(BdrvNextIterator *it, BlockDriverState **bs)
+BlockDriverState *bdrv_next(BdrvNextIterator *it)
 {
-    if (!it) {
-        it = g_new(BdrvNextIterator, 1);
-        *it = (BdrvNextIterator) {
-            .phase = BDRV_NEXT_BACKEND_ROOTS,
-        };
-    }
+    BlockDriverState *bs;
 
     /* First, return all root nodes of BlockBackends. In order to avoid
      * returning a BDS twice when multiple BBs refer to it, we only return it
@@ -312,11 +276,11 @@ BdrvNextIterator *bdrv_next(BdrvNextIterator *it, BlockDriverState **bs)
     if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
         do {
             it->blk = blk_all_next(it->blk);
-            *bs = it->blk ? blk_bs(it->blk) : NULL;
-        } while (it->blk && (*bs == NULL || bdrv_first_blk(*bs) != it->blk));
+            bs = it->blk ? blk_bs(it->blk) : NULL;
+        } while (it->blk && (bs == NULL || bdrv_first_blk(bs) != it->blk));
 
-        if (*bs) {
-            return it;
+        if (bs) {
+            return bs;
         }
         it->phase = BDRV_NEXT_MONITOR_OWNED;
     }
@@ -326,10 +290,19 @@ BdrvNextIterator *bdrv_next(BdrvNextIterator *it, BlockDriverState **bs)
      * by the above block already */
     do {
         it->bs = bdrv_next_monitor_owned(it->bs);
-        *bs = it->bs;
-    } while (*bs && bdrv_has_blk(*bs));
+        bs = it->bs;
+    } while (bs && bdrv_has_blk(bs));
 
-    return *bs ? it : NULL;
+    return bs;
+}
+
+BlockDriverState *bdrv_first(BdrvNextIterator *it)
+{
+    *it = (BdrvNextIterator) {
+        .phase = BDRV_NEXT_BACKEND_ROOTS,
+    };
+
+    return bdrv_next(it);
 }
 
 /*
@@ -509,8 +482,7 @@ void blk_remove_bs(BlockBackend *blk)
 void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
 {
     bdrv_ref(bs);
-    blk->root = bdrv_root_attach_child(bs, "root", &child_root);
-    blk->root->opaque = blk;
+    blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
 
     notifier_list_notify(&blk->insert_bs_notifiers, blk);
     if (blk->public.throttle_state) {
@@ -770,11 +742,15 @@ static int blk_check_request(BlockBackend *blk, int64_t sector_num,
                                   nb_sectors * BDRV_SECTOR_SIZE);
 }
 
-static int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
-                                      unsigned int bytes, QEMUIOVector *qiov,
-                                      BdrvRequestFlags flags)
+int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
+                               unsigned int bytes, QEMUIOVector *qiov,
+                               BdrvRequestFlags flags)
 {
-    int ret = blk_check_byte_request(blk, offset, bytes);
+    int ret;
+
+    trace_blk_co_preadv(blk, blk_bs(blk), offset, bytes, flags);
+
+    ret = blk_check_byte_request(blk, offset, bytes);
     if (ret < 0) {
         return ret;
     }
@@ -787,12 +763,14 @@ static int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
     return bdrv_co_preadv(blk_bs(blk), offset, bytes, qiov, flags);
 }
 
-static int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
-                                      unsigned int bytes, QEMUIOVector *qiov,
-                                      BdrvRequestFlags flags)
+int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
+                                unsigned int bytes, QEMUIOVector *qiov,
+                                BdrvRequestFlags flags)
 {
     int ret;
 
+    trace_blk_co_pwritev(blk, blk_bs(blk), offset, bytes, flags);
+
     ret = blk_check_byte_request(blk, offset, bytes);
     if (ret < 0) {
         return ret;
@@ -885,8 +863,8 @@ int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
     return ret;
 }
 
-int blk_write_zeroes(BlockBackend *blk, int64_t offset,
-                     int count, BdrvRequestFlags flags)
+int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+                      int count, BdrvRequestFlags flags)
 {
     return blk_prw(blk, offset, NULL, count, blk_write_entry,
                    flags | BDRV_REQ_ZERO_WRITE);
@@ -1001,9 +979,9 @@ static void blk_aio_write_entry(void *opaque)
     blk_aio_complete(acb);
 }
 
-BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t offset,
-                                 int count, BdrvRequestFlags flags,
-                                 BlockCompletionFunc *cb, void *opaque)
+BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+                                  int count, BdrvRequestFlags flags,
+                                  BlockCompletionFunc *cb, void *opaque)
 {
     return blk_aio_prwv(blk, offset, count, NULL, blk_aio_write_entry,
                         flags | BDRV_REQ_ZERO_WRITE, cb, opaque);
@@ -1492,8 +1470,8 @@ void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
     return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque);
 }
 
-int coroutine_fn blk_co_write_zeroes(BlockBackend *blk, int64_t offset,
-                                     int count, BdrvRequestFlags flags)
+int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+                                      int count, BdrvRequestFlags flags)
 {
     return blk_co_pwritev(blk, offset, count, NULL,
                           flags | BDRV_REQ_ZERO_WRITE);
@@ -1704,6 +1682,9 @@ static void blk_root_drained_begin(BdrvChild *child)
 {
     BlockBackend *blk = child->opaque;
 
+    /* Note that blk->root may not be accessible here yet if we are just
+     * attaching to a BlockDriverState that is drained. Use child instead. */
+
     if (blk->public.io_limits_disabled++ == 0) {
         throttle_group_restart_blk(blk);
     }
diff --git a/block/commit.c b/block/commit.c
index f308c8c6f0..8a00e1146c 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -36,28 +36,36 @@ typedef struct CommitBlockJob {
     BlockJob common;
     RateLimit limit;
     BlockDriverState *active;
-    BlockDriverState *top;
-    BlockDriverState *base;
+    BlockBackend *top;
+    BlockBackend *base;
     BlockdevOnError on_error;
     int base_flags;
     int orig_overlay_flags;
     char *backing_file_str;
 } CommitBlockJob;
 
-static int coroutine_fn commit_populate(BlockDriverState *bs,
-                                        BlockDriverState *base,
+static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base,
                                         int64_t sector_num, int nb_sectors,
                                         void *buf)
 {
     int ret = 0;
+    QEMUIOVector qiov;
+    struct iovec iov = {
+        .iov_base = buf,
+        .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
+    };
 
-    ret = bdrv_read(bs, sector_num, buf, nb_sectors);
-    if (ret) {
+    qemu_iovec_init_external(&qiov, &iov, 1);
+
+    ret = blk_co_preadv(bs, sector_num * BDRV_SECTOR_SIZE,
+                        qiov.size, &qiov, 0);
+    if (ret < 0) {
         return ret;
     }
 
-    ret = bdrv_write(base, sector_num, buf, nb_sectors);
-    if (ret) {
+    ret = blk_co_pwritev(base, sector_num * BDRV_SECTOR_SIZE,
+                         qiov.size, &qiov, 0);
+    if (ret < 0) {
         return ret;
     }
 
@@ -73,8 +81,8 @@ static void commit_complete(BlockJob *job, void *opaque)
     CommitBlockJob *s = container_of(job, CommitBlockJob, common);
     CommitCompleteData *data = opaque;
     BlockDriverState *active = s->active;
-    BlockDriverState *top = s->top;
-    BlockDriverState *base = s->base;
+    BlockDriverState *top = blk_bs(s->top);
+    BlockDriverState *base = blk_bs(s->base);
     BlockDriverState *overlay_bs;
     int ret = data->ret;
 
@@ -94,6 +102,8 @@ static void commit_complete(BlockJob *job, void *opaque)
         bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
     }
     g_free(s->backing_file_str);
+    blk_unref(s->top);
+    blk_unref(s->base);
     block_job_completed(&s->common, ret);
     g_free(data);
 }
@@ -102,8 +112,6 @@ static void coroutine_fn commit_run(void *opaque)
 {
     CommitBlockJob *s = opaque;
     CommitCompleteData *data;
-    BlockDriverState *top = s->top;
-    BlockDriverState *base = s->base;
     int64_t sector_num, end;
     int ret = 0;
     int n = 0;
@@ -111,27 +119,27 @@ static void coroutine_fn commit_run(void *opaque)
     int bytes_written = 0;
     int64_t base_len;
 
-    ret = s->common.len = bdrv_getlength(top);
+    ret = s->common.len = blk_getlength(s->top);
 
 
     if (s->common.len < 0) {
         goto out;
     }
 
-    ret = base_len = bdrv_getlength(base);
+    ret = base_len = blk_getlength(s->base);
     if (base_len < 0) {
         goto out;
     }
 
     if (base_len < s->common.len) {
-        ret = bdrv_truncate(base, s->common.len);
+        ret = blk_truncate(s->base, s->common.len);
         if (ret) {
             goto out;
         }
     }
 
     end = s->common.len >> BDRV_SECTOR_BITS;
-    buf = qemu_blockalign(top, COMMIT_BUFFER_SIZE);
+    buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE);
 
     for (sector_num = 0; sector_num < end; sector_num += n) {
         uint64_t delay_ns = 0;
@@ -146,7 +154,8 @@ wait:
             break;
         }
         /* Copy if allocated above the base */
-        ret = bdrv_is_allocated_above(top, base, sector_num,
+        ret = bdrv_is_allocated_above(blk_bs(s->top), blk_bs(s->base),
+                                      sector_num,
                                       COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE,
                                       &n);
         copy = (ret == 1);
@@ -158,7 +167,7 @@ wait:
                     goto wait;
                 }
             }
-            ret = commit_populate(top, base, sector_num, n, buf);
+            ret = commit_populate(s->top, s->base, sector_num, n, buf);
             bytes_written += n * BDRV_SECTOR_SIZE;
         }
         if (ret < 0) {
@@ -253,8 +262,12 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
         return;
     }
 
-    s->base   = base;
-    s->top    = top;
+    s->base = blk_new();
+    blk_insert_bs(s->base, base);
+
+    s->top = blk_new();
+    blk_insert_bs(s->top, top);
+
     s->active = bs;
 
     s->base_flags          = orig_base_flags;
diff --git a/block/io.c b/block/io.c
index 60a6bd8bdb..2d832aa532 100644
--- a/block/io.c
+++ b/block/io.c
@@ -225,6 +225,34 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
     assert(data.done);
 }
 
+void bdrv_drained_begin(BlockDriverState *bs)
+{
+    if (!bs->quiesce_counter++) {
+        aio_disable_external(bdrv_get_aio_context(bs));
+        bdrv_parent_drained_begin(bs);
+    }
+
+    bdrv_io_unplugged_begin(bs);
+    bdrv_drain_recurse(bs);
+    if (qemu_in_coroutine()) {
+        bdrv_co_yield_to_drain(bs);
+    } else {
+        bdrv_drain_poll(bs);
+    }
+    bdrv_io_unplugged_end(bs);
+}
+
+void bdrv_drained_end(BlockDriverState *bs)
+{
+    assert(bs->quiesce_counter > 0);
+    if (--bs->quiesce_counter > 0) {
+        return;
+    }
+
+    bdrv_parent_drained_end(bs);
+    aio_enable_external(bdrv_get_aio_context(bs));
+}
+
 /*
  * Wait for pending requests to complete on a single BlockDriverState subtree,
  * and suspend block driver's internal I/O until next request arrives.
@@ -238,26 +266,15 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
  */
 void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
 {
-    bdrv_parent_drained_begin(bs);
-    bdrv_io_unplugged_begin(bs);
-    bdrv_drain_recurse(bs);
-    bdrv_co_yield_to_drain(bs);
-    bdrv_io_unplugged_end(bs);
-    bdrv_parent_drained_end(bs);
+    assert(qemu_in_coroutine());
+    bdrv_drained_begin(bs);
+    bdrv_drained_end(bs);
 }
 
 void bdrv_drain(BlockDriverState *bs)
 {
-    bdrv_parent_drained_begin(bs);
-    bdrv_io_unplugged_begin(bs);
-    bdrv_drain_recurse(bs);
-    if (qemu_in_coroutine()) {
-        bdrv_co_yield_to_drain(bs);
-    } else {
-        bdrv_drain_poll(bs);
-    }
-    bdrv_io_unplugged_end(bs);
-    bdrv_parent_drained_end(bs);
+    bdrv_drained_begin(bs);
+    bdrv_drained_end(bs);
 }
 
 /*
@@ -271,10 +288,10 @@ void bdrv_drain_all(void)
     /* Always run first iteration so any pending completion BHs run */
     bool busy = true;
     BlockDriverState *bs;
-    BdrvNextIterator *it = NULL;
+    BdrvNextIterator it;
     GSList *aio_ctxs = NULL, *ctx;
 
-    while ((it = bdrv_next(it, &bs))) {
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         AioContext *aio_context = bdrv_get_aio_context(bs);
 
         aio_context_acquire(aio_context);
@@ -302,10 +319,9 @@ void bdrv_drain_all(void)
 
         for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) {
             AioContext *aio_context = ctx->data;
-            it = NULL;
 
             aio_context_acquire(aio_context);
-            while ((it = bdrv_next(it, &bs))) {
+            for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
                 if (aio_context == bdrv_get_aio_context(bs)) {
                     if (bdrv_requests_pending(bs)) {
                         busy = true;
@@ -318,8 +334,7 @@ void bdrv_drain_all(void)
         }
     }
 
-    it = NULL;
-    while ((it = bdrv_next(it, &bs))) {
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         AioContext *aio_context = bdrv_get_aio_context(bs);
 
         aio_context_acquire(aio_context);
@@ -1093,24 +1108,6 @@ int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
     return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
 }
 
-int coroutine_fn bdrv_co_readv_no_serialising(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
-{
-    trace_bdrv_co_readv_no_serialising(bs, sector_num, nb_sectors);
-
-    return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
-                            BDRV_REQ_NO_SERIALISING);
-}
-
-int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
-{
-    trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
-
-    return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
-                            BDRV_REQ_COPY_ON_READ);
-}
-
 #define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768
 
 static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
@@ -2543,23 +2540,3 @@ void bdrv_io_unplugged_end(BlockDriverState *bs)
         }
     }
 }
-
-void bdrv_drained_begin(BlockDriverState *bs)
-{
-    if (!bs->quiesce_counter++) {
-        aio_disable_external(bdrv_get_aio_context(bs));
-    }
-    bdrv_parent_drained_begin(bs);
-    bdrv_drain(bs);
-}
-
-void bdrv_drained_end(BlockDriverState *bs)
-{
-    bdrv_parent_drained_end(bs);
-
-    assert(bs->quiesce_counter > 0);
-    if (--bs->quiesce_counter > 0) {
-        return;
-    }
-    aio_enable_external(bdrv_get_aio_context(bs));
-}
diff --git a/block/mirror.c b/block/mirror.c
index b9986d8218..80fd3c7469 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -20,7 +20,6 @@
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
 #include "qemu/bitmap.h"
-#include "qemu/error-report.h"
 
 #define SLICE_TIME    100000000ULL /* ns */
 #define MAX_IN_FLIGHT 16
@@ -36,7 +35,7 @@ typedef struct MirrorBuffer {
 typedef struct MirrorBlockJob {
     BlockJob common;
     RateLimit limit;
-    BlockDriverState *target;
+    BlockBackend *target;
     BlockDriverState *base;
     /* The name of the graph node to replace */
     char *replaces;
@@ -157,7 +156,8 @@ static void mirror_read_complete(void *opaque, int ret)
         mirror_iteration_done(op, ret);
         return;
     }
-    bdrv_aio_writev(s->target, op->sector_num, &op->qiov, op->nb_sectors,
+    blk_aio_pwritev(s->target, op->sector_num * BDRV_SECTOR_SIZE, &op->qiov,
+                    op->nb_sectors * BDRV_SECTOR_SIZE,
                     mirror_write_complete, op);
 }
 
@@ -186,7 +186,7 @@ static int mirror_cow_align(MirrorBlockJob *s,
     need_cow |= !test_bit((*sector_num + *nb_sectors - 1) / chunk_sectors,
                           s->cow_bitmap);
     if (need_cow) {
-        bdrv_round_to_clusters(s->target, *sector_num, *nb_sectors,
+        bdrv_round_to_clusters(blk_bs(s->target), *sector_num, *nb_sectors,
                                &align_sector_num, &align_nb_sectors);
     }
 
@@ -224,7 +224,7 @@ static inline void mirror_wait_for_io(MirrorBlockJob *s)
 static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
                           int nb_sectors)
 {
-    BlockDriverState *source = s->common.bs;
+    BlockBackend *source = s->common.blk;
     int sectors_per_chunk, nb_chunks;
     int ret = nb_sectors;
     MirrorOp *op;
@@ -274,7 +274,8 @@ static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
     s->sectors_in_flight += nb_sectors;
     trace_mirror_one_iteration(s, sector_num, nb_sectors);
 
-    bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
+    blk_aio_preadv(source, sector_num * BDRV_SECTOR_SIZE, &op->qiov,
+                   nb_sectors * BDRV_SECTOR_SIZE,
                    mirror_read_complete, op);
     return ret;
 }
@@ -296,10 +297,11 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s,
     s->in_flight++;
     s->sectors_in_flight += nb_sectors;
     if (is_discard) {
-        bdrv_aio_discard(s->target, sector_num, op->nb_sectors,
-                         mirror_write_complete, op);
+        blk_aio_discard(s->target, sector_num, op->nb_sectors,
+                        mirror_write_complete, op);
     } else {
-        bdrv_aio_write_zeroes(s->target, sector_num, op->nb_sectors,
+        blk_aio_pwrite_zeroes(s->target, sector_num * BDRV_SECTOR_SIZE,
+                              op->nb_sectors * BDRV_SECTOR_SIZE,
                               s->unmap ? BDRV_REQ_MAY_UNMAP : 0,
                               mirror_write_complete, op);
     }
@@ -307,7 +309,7 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s,
 
 static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
 {
-    BlockDriverState *source = s->common.bs;
+    BlockDriverState *source = blk_bs(s->common.blk);
     int64_t sector_num, first_chunk;
     uint64_t delay_ns = 0;
     /* At least the first dirty chunk is mirrored in one iteration. */
@@ -384,7 +386,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
         } else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) {
             int64_t target_sector_num;
             int target_nb_sectors;
-            bdrv_round_to_clusters(s->target, sector_num, io_sectors,
+            bdrv_round_to_clusters(blk_bs(s->target), sector_num, io_sectors,
                                    &target_sector_num, &target_nb_sectors);
             if (target_sector_num == sector_num &&
                 target_nb_sectors == io_sectors) {
@@ -449,7 +451,8 @@ static void mirror_exit(BlockJob *job, void *opaque)
     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
     MirrorExitData *data = opaque;
     AioContext *replace_aio_context = NULL;
-    BlockDriverState *src = s->common.bs;
+    BlockDriverState *src = blk_bs(s->common.blk);
+    BlockDriverState *target_bs = blk_bs(s->target);
 
     /* Make sure that the source BDS doesn't go away before we called
      * block_job_completed(). */
@@ -461,26 +464,25 @@ static void mirror_exit(BlockJob *job, void *opaque)
     }
 
     if (s->should_complete && data->ret == 0) {
-        BlockDriverState *to_replace = s->common.bs;
+        BlockDriverState *to_replace = src;
         if (s->to_replace) {
             to_replace = s->to_replace;
         }
 
-        /* This was checked in mirror_start_job(), but meanwhile one of the
-         * nodes could have been newly attached to a BlockBackend. */
-        if (bdrv_has_blk(to_replace) && bdrv_has_blk(s->target)) {
-            error_report("block job: Can't create node with two BlockBackends");
-            data->ret = -EINVAL;
-            goto out;
+        if (bdrv_get_flags(target_bs) != bdrv_get_flags(to_replace)) {
+            bdrv_reopen(target_bs, bdrv_get_flags(to_replace), NULL);
         }
 
-        if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
-            bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
-        }
-        bdrv_replace_in_backing_chain(to_replace, s->target);
-    }
+        /* The mirror job has no requests in flight any more, but we need to
+         * drain potential other users of the BDS before changing the graph. */
+        bdrv_drained_begin(target_bs);
+        bdrv_replace_in_backing_chain(to_replace, target_bs);
+        bdrv_drained_end(target_bs);
 
-out:
+        /* We just changed the BDS the job BB refers to */
+        blk_remove_bs(job->blk);
+        blk_insert_bs(job->blk, src);
+    }
     if (s->to_replace) {
         bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
         error_free(s->replace_blocker);
@@ -490,8 +492,8 @@ out:
         aio_context_release(replace_aio_context);
     }
     g_free(s->replaces);
-    bdrv_op_unblock_all(s->target, s->common.blocker);
-    bdrv_unref(s->target);
+    bdrv_op_unblock_all(target_bs, s->common.blocker);
+    blk_unref(s->target);
     block_job_completed(&s->common, data->ret);
     g_free(data);
     bdrv_drained_end(src);
@@ -505,7 +507,8 @@ static void coroutine_fn mirror_run(void *opaque)
 {
     MirrorBlockJob *s = opaque;
     MirrorExitData *data;
-    BlockDriverState *bs = s->common.bs;
+    BlockDriverState *bs = blk_bs(s->common.blk);
+    BlockDriverState *target_bs = blk_bs(s->target);
     int64_t sector_num, end, length;
     uint64_t last_pause_ns;
     BlockDriverInfo bdi;
@@ -541,18 +544,18 @@ static void coroutine_fn mirror_run(void *opaque)
      * the destination do COW.  Instead, we copy sectors around the
      * dirty data if needed.  We need a bitmap to do that.
      */
-    bdrv_get_backing_filename(s->target, backing_filename,
+    bdrv_get_backing_filename(target_bs, backing_filename,
                               sizeof(backing_filename));
-    if (!bdrv_get_info(s->target, &bdi) && bdi.cluster_size) {
+    if (!bdrv_get_info(target_bs, &bdi) && bdi.cluster_size) {
         target_cluster_size = bdi.cluster_size;
     }
-    if (backing_filename[0] && !s->target->backing
+    if (backing_filename[0] && !target_bs->backing
         && s->granularity < target_cluster_size) {
         s->buf_size = MAX(s->buf_size, target_cluster_size);
         s->cow_bitmap = bitmap_new(length);
     }
     s->target_cluster_sectors = target_cluster_size >> BDRV_SECTOR_BITS;
-    s->max_iov = MIN(s->common.bs->bl.max_iov, s->target->bl.max_iov);
+    s->max_iov = MIN(bs->bl.max_iov, target_bs->bl.max_iov);
 
     end = s->bdev_length / BDRV_SECTOR_SIZE;
     s->buf = qemu_try_blockalign(bs, s->buf_size);
@@ -567,7 +570,7 @@ static void coroutine_fn mirror_run(void *opaque)
     if (!s->is_none_mode) {
         /* First part, loop on the sectors and initialize the dirty bitmap.  */
         BlockDriverState *base = s->base;
-        bool mark_all_dirty = s->base == NULL && !bdrv_has_zero_init(s->target);
+        bool mark_all_dirty = s->base == NULL && !bdrv_has_zero_init(target_bs);
 
         for (sector_num = 0; sector_num < end; ) {
             /* Just to make sure we are not exceeding int limit. */
@@ -637,7 +640,7 @@ static void coroutine_fn mirror_run(void *opaque)
         should_complete = false;
         if (s->in_flight == 0 && cnt == 0) {
             trace_mirror_before_flush(s);
-            ret = bdrv_flush(s->target);
+            ret = blk_flush(s->target);
             if (ret < 0) {
                 if (mirror_error_action(s, false, -ret) ==
                     BLOCK_ERROR_ACTION_REPORT) {
@@ -715,7 +718,7 @@ immediate_exit:
     data->ret = ret;
     /* Before we switch to target in mirror_exit, make sure data doesn't
      * change. */
-    bdrv_drained_begin(s->common.bs);
+    bdrv_drained_begin(bs);
     if (qemu_get_aio_context() == bdrv_get_aio_context(bs)) {
         /* FIXME: virtio host notifiers run on iohandler_ctx, therefore the
          * above bdrv_drained_end isn't enough to quiesce it. This is ugly, we
@@ -742,7 +745,8 @@ static void mirror_complete(BlockJob *job, Error **errp)
     Error *local_err = NULL;
     int ret;
 
-    ret = bdrv_open_backing_file(s->target, NULL, "backing", &local_err);
+    ret = bdrv_open_backing_file(blk_bs(s->target), NULL, "backing",
+                                 &local_err);
     if (ret < 0) {
         error_propagate(errp, local_err);
         return;
@@ -804,7 +808,6 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
                              bool is_none_mode, BlockDriverState *base)
 {
     MirrorBlockJob *s;
-    BlockDriverState *replaced_bs;
 
     if (granularity == 0) {
         granularity = bdrv_get_default_bitmap_granularity(target);
@@ -821,30 +824,17 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
         buf_size = DEFAULT_MIRROR_BUF_SIZE;
     }
 
-    /* We can't support this case as long as the block layer can't handle
-     * multiple BlockBackends per BlockDriverState. */
-    if (replaces) {
-        replaced_bs = bdrv_lookup_bs(replaces, replaces, errp);
-        if (replaced_bs == NULL) {
-            return;
-        }
-    } else {
-        replaced_bs = bs;
-    }
-    if (bdrv_has_blk(replaced_bs) && bdrv_has_blk(target)) {
-        error_setg(errp, "Can't create node with two BlockBackends");
-        return;
-    }
-
     s = block_job_create(driver, bs, speed, cb, opaque, errp);
     if (!s) {
         return;
     }
 
+    s->target = blk_new();
+    blk_insert_bs(s->target, target);
+
     s->replaces = g_strdup(replaces);
     s->on_source_error = on_source_error;
     s->on_target_error = on_target_error;
-    s->target = target;
     s->is_none_mode = is_none_mode;
     s->base = base;
     s->granularity = granularity;
@@ -854,11 +844,12 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
     s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
     if (!s->dirty_bitmap) {
         g_free(s->replaces);
+        blk_unref(s->target);
         block_job_unref(&s->common);
         return;
     }
 
-    bdrv_op_block_all(s->target, s->common.blocker);
+    bdrv_op_block_all(target, s->common.blocker);
 
     s->common.co = qemu_coroutine_create(mirror_run);
     trace_mirror_start(bs, s, s->common.co, opaque);
@@ -931,7 +922,6 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
         }
     }
 
-    bdrv_ref(base);
     mirror_start_job(bs, base, NULL, speed, 0, 0,
                      on_error, on_error, false, cb, opaque, &local_err,
                      &commit_active_job_driver, false, base);
diff --git a/block/parallels.c b/block/parallels.c
index 88cfacebe3..99fc0f77ef 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -517,8 +517,8 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
     if (ret < 0) {
         goto exit;
     }
-    ret = blk_write_zeroes(file, BDRV_SECTOR_SIZE,
-                           (bat_sectors - 1) << BDRV_SECTOR_BITS, 0);
+    ret = blk_pwrite_zeroes(file, BDRV_SECTOR_SIZE,
+                            (bat_sectors - 1) << BDRV_SECTOR_BITS, 0);
     if (ret < 0) {
         goto exit;
     }
diff --git a/block/snapshot.c b/block/snapshot.c
index 3917ec5c91..6e6e34fcf4 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -374,9 +374,9 @@ bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs)
 {
     bool ok = true;
     BlockDriverState *bs;
-    BdrvNextIterator *it = NULL;
+    BdrvNextIterator it;
 
-    while (ok && (it = bdrv_next(it, &bs))) {
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         AioContext *ctx = bdrv_get_aio_context(bs);
 
         aio_context_acquire(ctx);
@@ -384,8 +384,12 @@ bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs)
             ok = bdrv_can_snapshot(bs);
         }
         aio_context_release(ctx);
+        if (!ok) {
+            goto fail;
+        }
     }
 
+fail:
     *first_bad_bs = bs;
     return ok;
 }
@@ -395,20 +399,27 @@ int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs,
 {
     int ret = 0;
     BlockDriverState *bs;
-    BdrvNextIterator *it = NULL;
+    BdrvNextIterator it;
     QEMUSnapshotInfo sn1, *snapshot = &sn1;
 
-    while (ret == 0 && (it = bdrv_next(it, &bs))) {
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         AioContext *ctx = bdrv_get_aio_context(bs);
 
         aio_context_acquire(ctx);
         if (bdrv_can_snapshot(bs) &&
                 bdrv_snapshot_find(bs, snapshot, name) >= 0) {
             ret = bdrv_snapshot_delete_by_id_or_name(bs, name, err);
+            if (ret < 0) {
+                goto fail;
+            }
         }
         aio_context_release(ctx);
+        if (ret < 0) {
+            goto fail;
+        }
     }
 
+fail:
     *first_bad_bs = bs;
     return ret;
 }
@@ -418,9 +429,9 @@ int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs)
 {
     int err = 0;
     BlockDriverState *bs;
-    BdrvNextIterator *it = NULL;
+    BdrvNextIterator it;
 
-    while (err == 0 && (it = bdrv_next(it, &bs))) {
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         AioContext *ctx = bdrv_get_aio_context(bs);
 
         aio_context_acquire(ctx);
@@ -428,8 +439,12 @@ int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs)
             err = bdrv_snapshot_goto(bs, name);
         }
         aio_context_release(ctx);
+        if (err < 0) {
+            goto fail;
+        }
     }
 
+fail:
     *first_bad_bs = bs;
     return err;
 }
@@ -439,9 +454,9 @@ int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs)
     QEMUSnapshotInfo sn;
     int err = 0;
     BlockDriverState *bs;
-    BdrvNextIterator *it = NULL;
+    BdrvNextIterator it;
 
-    while (err == 0 && (it = bdrv_next(it, &bs))) {
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         AioContext *ctx = bdrv_get_aio_context(bs);
 
         aio_context_acquire(ctx);
@@ -449,8 +464,12 @@ int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs)
             err = bdrv_snapshot_find(bs, &sn, name);
         }
         aio_context_release(ctx);
+        if (err < 0) {
+            goto fail;
+        }
     }
 
+fail:
     *first_bad_bs = bs;
     return err;
 }
@@ -462,9 +481,9 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn,
 {
     int err = 0;
     BlockDriverState *bs;
-    BdrvNextIterator *it = NULL;
+    BdrvNextIterator it;
 
-    while (err == 0 && (it = bdrv_next(it, &bs))) {
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         AioContext *ctx = bdrv_get_aio_context(bs);
 
         aio_context_acquire(ctx);
@@ -476,24 +495,32 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn,
             err = bdrv_snapshot_create(bs, sn);
         }
         aio_context_release(ctx);
+        if (err < 0) {
+            goto fail;
+        }
     }
 
+fail:
     *first_bad_bs = bs;
     return err;
 }
 
 BlockDriverState *bdrv_all_find_vmstate_bs(void)
 {
-    bool not_found = true;
     BlockDriverState *bs;
-    BdrvNextIterator *it = NULL;
+    BdrvNextIterator it;
 
-    while (not_found && (it = bdrv_next(it, &bs))) {
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         AioContext *ctx = bdrv_get_aio_context(bs);
+        bool found;
 
         aio_context_acquire(ctx);
-        not_found = !bdrv_can_snapshot(bs);
+        found = bdrv_can_snapshot(bs);
         aio_context_release(ctx);
+
+        if (found) {
+            break;
+        }
     }
     return bs;
 }
diff --git a/block/stream.c b/block/stream.c
index 40aa32212e..c0efbda34e 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -39,7 +39,7 @@ typedef struct StreamBlockJob {
     char *backing_file_str;
 } StreamBlockJob;
 
-static int coroutine_fn stream_populate(BlockDriverState *bs,
+static int coroutine_fn stream_populate(BlockBackend *blk,
                                         int64_t sector_num, int nb_sectors,
                                         void *buf)
 {
@@ -52,7 +52,8 @@ static int coroutine_fn stream_populate(BlockDriverState *bs,
     qemu_iovec_init_external(&qiov, &iov, 1);
 
     /* Copy-on-read the unallocated clusters */
-    return bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, &qiov);
+    return blk_co_preadv(blk, sector_num * BDRV_SECTOR_SIZE, qiov.size, &qiov,
+                         BDRV_REQ_COPY_ON_READ);
 }
 
 typedef struct {
@@ -64,6 +65,7 @@ static void stream_complete(BlockJob *job, void *opaque)
 {
     StreamBlockJob *s = container_of(job, StreamBlockJob, common);
     StreamCompleteData *data = opaque;
+    BlockDriverState *bs = blk_bs(job->blk);
     BlockDriverState *base = s->base;
 
     if (!block_job_is_cancelled(&s->common) && data->reached_end &&
@@ -75,8 +77,8 @@ static void stream_complete(BlockJob *job, void *opaque)
                 base_fmt = base->drv->format_name;
             }
         }
-        data->ret = bdrv_change_backing_file(job->bs, base_id, base_fmt);
-        bdrv_set_backing_hd(job->bs, base);
+        data->ret = bdrv_change_backing_file(bs, base_id, base_fmt);
+        bdrv_set_backing_hd(bs, base);
     }
 
     g_free(s->backing_file_str);
@@ -88,7 +90,8 @@ static void coroutine_fn stream_run(void *opaque)
 {
     StreamBlockJob *s = opaque;
     StreamCompleteData *data;
-    BlockDriverState *bs = s->common.bs;
+    BlockBackend *blk = s->common.blk;
+    BlockDriverState *bs = blk_bs(blk);
     BlockDriverState *base = s->base;
     int64_t sector_num = 0;
     int64_t end = -1;
@@ -159,7 +162,7 @@ wait:
                     goto wait;
                 }
             }
-            ret = stream_populate(bs, sector_num, n, buf);
+            ret = stream_populate(blk, sector_num, n, buf);
         }
         if (ret < 0) {
             BlockErrorAction action =
diff --git a/block/vvfat.c b/block/vvfat.c
index 3e484a1dcc..a39dbe67e2 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -2998,12 +2998,12 @@ static int enable_write_target(BDRVVVFATState *s, Error **errp)
         goto err;
     }
 
-    s->qcow = NULL;
     options = qdict_new();
     qdict_put(options, "driver", qstring_from_str("qcow"));
-    ret = bdrv_open(&s->qcow, s->qcow_filename, NULL, options,
-                    BDRV_O_RDWR | BDRV_O_NO_FLUSH, errp);
-    if (ret < 0) {
+    s->qcow = bdrv_open(s->qcow_filename, NULL, options,
+                        BDRV_O_RDWR | BDRV_O_NO_FLUSH, errp);
+    if (!s->qcow) {
+        ret = -EINVAL;
         goto err;
     }
 
diff --git a/blockdev.c b/blockdev.c
index 40e4e6fc6f..717785eb8d 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -567,11 +567,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
     if ((!file || !*file) && !qdict_size(bs_opts)) {
         BlockBackendRootState *blk_rs;
 
-        blk = blk_new(errp);
-        if (!blk) {
-            goto early_err;
-        }
-
+        blk = blk_new();
         blk_rs = blk_get_root_state(blk);
         blk_rs->open_flags    = bdrv_flags;
         blk_rs->read_only     = !(bdrv_flags & BDRV_O_RDWR);
@@ -657,7 +653,6 @@ static BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
     QemuOpts *opts;
     Error *local_error = NULL;
     BlockdevDetectZeroesOptions detect_zeroes;
-    int ret;
     int bdrv_flags = 0;
 
     opts = qemu_opts_create(&qemu_root_bds_opts, NULL, 1, errp);
@@ -688,9 +683,8 @@ static BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
         bdrv_flags |= BDRV_O_INACTIVE;
     }
 
-    bs = NULL;
-    ret = bdrv_open(&bs, NULL, NULL, bs_opts, bdrv_flags, errp);
-    if (ret < 0) {
+    bs = bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp);
+    if (!bs) {
         goto fail_no_bs_opts;
     }
 
@@ -1643,7 +1637,7 @@ typedef struct ExternalSnapshotState {
 static void external_snapshot_prepare(BlkActionState *common,
                                       Error **errp)
 {
-    int flags = 0, ret;
+    int flags = 0;
     QDict *options = NULL;
     Error *local_err = NULL;
     /* Device and node name of the image to generate the snapshot from */
@@ -1768,11 +1762,10 @@ static void external_snapshot_prepare(BlkActionState *common,
         flags |= BDRV_O_NO_BACKING;
     }
 
-    assert(state->new_bs == NULL);
-    ret = bdrv_open(&state->new_bs, new_image_file, snapshot_ref, options,
-                    flags, errp);
+    state->new_bs = bdrv_open(new_image_file, snapshot_ref, options, flags,
+                              errp);
     /* We will manually add the backing_hd field to the bs later */
-    if (ret != 0) {
+    if (!state->new_bs) {
         return;
     }
 
@@ -2540,7 +2533,7 @@ void qmp_blockdev_change_medium(const char *device, const char *filename,
 {
     BlockBackend *blk;
     BlockDriverState *medium_bs = NULL;
-    int bdrv_flags, ret;
+    int bdrv_flags;
     QDict *options = NULL;
     Error *err = NULL;
 
@@ -2584,9 +2577,8 @@ void qmp_blockdev_change_medium(const char *device, const char *filename,
         qdict_put(options, "driver", qstring_from_str(format));
     }
 
-    assert(!medium_bs);
-    ret = bdrv_open(&medium_bs, filename, NULL, options, bdrv_flags, errp);
-    if (ret < 0) {
+    medium_bs = bdrv_open(filename, NULL, options, bdrv_flags, errp);
+    if (!medium_bs) {
         goto fail;
     }
 
@@ -3199,7 +3191,6 @@ static void do_drive_backup(const char *device, const char *target,
     Error *local_err = NULL;
     int flags;
     int64_t size;
-    int ret;
 
     if (!has_speed) {
         speed = 0;
@@ -3283,10 +3274,8 @@ static void do_drive_backup(const char *device, const char *target,
         qdict_put(options, "driver", qstring_from_str(format));
     }
 
-    target_bs = NULL;
-    ret = bdrv_open(&target_bs, target, NULL, options, flags, &local_err);
-    if (ret < 0) {
-        error_propagate(errp, local_err);
+    target_bs = bdrv_open(target, NULL, options, flags, errp);
+    if (!target_bs) {
         goto out;
     }
 
@@ -3304,8 +3293,8 @@ static void do_drive_backup(const char *device, const char *target,
     backup_start(bs, target_bs, speed, sync, bmap,
                  on_source_error, on_target_error,
                  block_job_cb, bs, txn, &local_err);
+    bdrv_unref(target_bs);
     if (local_err != NULL) {
-        bdrv_unref(target_bs);
         error_propagate(errp, local_err);
         goto out;
     }
@@ -3389,12 +3378,10 @@ void do_blockdev_backup(const char *device, const char *target,
     }
     target_bs = blk_bs(target_blk);
 
-    bdrv_ref(target_bs);
     bdrv_set_aio_context(target_bs, aio_context);
     backup_start(bs, target_bs, speed, sync, NULL, on_source_error,
                  on_target_error, block_job_cb, bs, txn, &local_err);
     if (local_err != NULL) {
-        bdrv_unref(target_bs);
         error_propagate(errp, local_err);
     }
 out:
@@ -3470,10 +3457,6 @@ static void blockdev_mirror_common(BlockDriverState *bs,
     if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_MIRROR_TARGET, errp)) {
         return;
     }
-    if (bdrv_has_blk(target)) {
-        error_setg(errp, "Cannot mirror to an attached block device");
-        return;
-    }
 
     if (!bs->backing && sync == MIRROR_SYNC_MODE_TOP) {
         sync = MIRROR_SYNC_MODE_FULL;
@@ -3511,7 +3494,6 @@ void qmp_drive_mirror(const char *device, const char *target,
     QDict *options = NULL;
     int flags;
     int64_t size;
-    int ret;
 
     blk = blk_by_name(device);
     if (!blk) {
@@ -3620,11 +3602,9 @@ void qmp_drive_mirror(const char *device, const char *target,
     /* Mirroring takes care of copy-on-write using the source's backing
      * file.
      */
-    target_bs = NULL;
-    ret = bdrv_open(&target_bs, target, NULL, options,
-                    flags | BDRV_O_NO_BACKING, &local_err);
-    if (ret < 0) {
-        error_propagate(errp, local_err);
+    target_bs = bdrv_open(target, NULL, options, flags | BDRV_O_NO_BACKING,
+                          errp);
+    if (!target_bs) {
         goto out;
     }
 
@@ -3639,9 +3619,9 @@ void qmp_drive_mirror(const char *device, const char *target,
                            has_on_target_error, on_target_error,
                            has_unmap, unmap,
                            &local_err);
+    bdrv_unref(target_bs);
     if (local_err) {
         error_propagate(errp, local_err);
-        bdrv_unref(target_bs);
     }
 out:
     aio_context_release(aio_context);
@@ -3685,7 +3665,6 @@ void qmp_blockdev_mirror(const char *device, const char *target,
     aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
 
-    bdrv_ref(target_bs);
     bdrv_set_aio_context(target_bs, aio_context);
 
     blockdev_mirror_common(bs, target_bs,
@@ -3699,7 +3678,6 @@ void qmp_blockdev_mirror(const char *device, const char *target,
                            &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
-        bdrv_unref(target_bs);
     }
 
     aio_context_release(aio_context);
@@ -4164,9 +4142,9 @@ BlockJobInfoList *qmp_query_block_jobs(Error **errp)
 {
     BlockJobInfoList *head = NULL, **p_next = &head;
     BlockDriverState *bs;
-    BdrvNextIterator *it = NULL;
+    BdrvNextIterator it;
 
-    while ((it = bdrv_next(it, &bs))) {
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         AioContext *aio_context = bdrv_get_aio_context(bs);
 
         aio_context_acquire(aio_context);
diff --git a/blockjob.c b/blockjob.c
index 5b840a7df6..c095cc57cb 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -50,17 +50,31 @@ struct BlockJobTxn {
     int refcnt;
 };
 
+static QLIST_HEAD(, BlockJob) block_jobs = QLIST_HEAD_INITIALIZER(block_jobs);
+
+BlockJob *block_job_next(BlockJob *job)
+{
+    if (!job) {
+        return QLIST_FIRST(&block_jobs);
+    }
+    return QLIST_NEXT(job, job_list);
+}
+
 void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
                        int64_t speed, BlockCompletionFunc *cb,
                        void *opaque, Error **errp)
 {
+    BlockBackend *blk;
     BlockJob *job;
 
     if (bs->job) {
         error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
         return NULL;
     }
-    bdrv_ref(bs);
+
+    blk = blk_new();
+    blk_insert_bs(blk, bs);
+
     job = g_malloc0(driver->instance_size);
     error_setg(&job->blocker, "block device is in use by block job: %s",
                BlockJobType_lookup[driver->job_type]);
@@ -69,13 +83,15 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
 
     job->driver        = driver;
     job->id            = g_strdup(bdrv_get_device_name(bs));
-    job->bs            = bs;
+    job->blk           = blk;
     job->cb            = cb;
     job->opaque        = opaque;
     job->busy          = true;
     job->refcnt        = 1;
     bs->job = job;
 
+    QLIST_INSERT_HEAD(&block_jobs, job, job_list);
+
     /* Only set speed when necessary to avoid NotSupported error */
     if (speed != 0) {
         Error *local_err = NULL;
@@ -98,11 +114,13 @@ void block_job_ref(BlockJob *job)
 void block_job_unref(BlockJob *job)
 {
     if (--job->refcnt == 0) {
-        job->bs->job = NULL;
-        bdrv_op_unblock_all(job->bs, job->blocker);
-        bdrv_unref(job->bs);
+        BlockDriverState *bs = blk_bs(job->blk);
+        bs->job = NULL;
+        bdrv_op_unblock_all(bs, job->blocker);
+        blk_unref(job->blk);
         error_free(job->blocker);
         g_free(job->id);
+        QLIST_REMOVE(job, job_list);
         g_free(job);
     }
 }
@@ -140,7 +158,7 @@ static void block_job_completed_txn_abort(BlockJob *job)
     txn->aborting = true;
     /* We are the first failed job. Cancel other jobs. */
     QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
-        ctx = bdrv_get_aio_context(other_job->bs);
+        ctx = blk_get_aio_context(other_job->blk);
         aio_context_acquire(ctx);
     }
     QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
@@ -157,7 +175,7 @@ static void block_job_completed_txn_abort(BlockJob *job)
         assert(other_job->completed);
     }
     QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) {
-        ctx = bdrv_get_aio_context(other_job->bs);
+        ctx = blk_get_aio_context(other_job->blk);
         block_job_completed_single(other_job);
         aio_context_release(ctx);
     }
@@ -179,7 +197,7 @@ static void block_job_completed_txn_success(BlockJob *job)
     }
     /* We are the last completed job, commit the transaction. */
     QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) {
-        ctx = bdrv_get_aio_context(other_job->bs);
+        ctx = blk_get_aio_context(other_job->blk);
         aio_context_acquire(ctx);
         assert(other_job->ret == 0);
         block_job_completed_single(other_job);
@@ -189,9 +207,7 @@ static void block_job_completed_txn_success(BlockJob *job)
 
 void block_job_completed(BlockJob *job, int ret)
 {
-    BlockDriverState *bs = job->bs;
-
-    assert(bs->job == job);
+    assert(blk_bs(job->blk)->job == job);
     assert(!job->completed);
     job->completed = true;
     job->ret = ret;
@@ -282,11 +298,10 @@ static int block_job_finish_sync(BlockJob *job,
                                  void (*finish)(BlockJob *, Error **errp),
                                  Error **errp)
 {
-    BlockDriverState *bs = job->bs;
     Error *local_err = NULL;
     int ret;
 
-    assert(bs->job == job);
+    assert(blk_bs(job->blk)->job == job);
 
     block_job_ref(job);
     finish(job, &local_err);
@@ -297,7 +312,7 @@ static int block_job_finish_sync(BlockJob *job,
     }
     while (!job->completed) {
         aio_poll(job->deferred_to_main_loop ? qemu_get_aio_context() :
-                                              bdrv_get_aio_context(bs),
+                                              blk_get_aio_context(job->blk),
                  true);
     }
     ret = (job->cancelled && job->ret == 0) ? -ECANCELED : job->ret;
@@ -318,6 +333,19 @@ int block_job_cancel_sync(BlockJob *job)
     return block_job_finish_sync(job, &block_job_cancel_err, NULL);
 }
 
+void block_job_cancel_sync_all(void)
+{
+    BlockJob *job;
+    AioContext *aio_context;
+
+    while ((job = QLIST_FIRST(&block_jobs))) {
+        aio_context = blk_get_aio_context(job->blk);
+        aio_context_acquire(aio_context);
+        block_job_cancel_sync(job);
+        aio_context_release(aio_context);
+    }
+}
+
 int block_job_complete_sync(BlockJob *job, Error **errp)
 {
     return block_job_finish_sync(job, &block_job_complete, errp);
@@ -336,7 +364,7 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns)
     if (block_job_is_paused(job)) {
         qemu_coroutine_yield();
     } else {
-        co_aio_sleep_ns(bdrv_get_aio_context(job->bs), type, ns);
+        co_aio_sleep_ns(blk_get_aio_context(job->blk), type, ns);
     }
     job->busy = true;
 }
@@ -465,7 +493,7 @@ static void block_job_defer_to_main_loop_bh(void *opaque)
     aio_context_acquire(data->aio_context);
 
     /* Fetch BDS AioContext again, in case it has changed */
-    aio_context = bdrv_get_aio_context(data->job->bs);
+    aio_context = blk_get_aio_context(data->job->blk);
     aio_context_acquire(aio_context);
 
     data->job->deferred_to_main_loop = false;
@@ -485,7 +513,7 @@ void block_job_defer_to_main_loop(BlockJob *job,
     BlockJobDeferToMainLoopData *data = g_malloc(sizeof(*data));
     data->job = job;
     data->bh = qemu_bh_new(block_job_defer_to_main_loop_bh, data);
-    data->aio_context = bdrv_get_aio_context(job->bs);
+    data->aio_context = blk_get_aio_context(job->blk);
     data->fn = fn;
     data->opaque = opaque;
     job->deferred_to_main_loop = true;
diff --git a/dma-helpers.c b/dma-helpers.c
index a6cc15f534..b521d84ebd 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -70,7 +70,7 @@ void qemu_sglist_destroy(QEMUSGList *qsg)
 
 typedef struct {
     BlockAIOCB common;
-    BlockBackend *blk;
+    AioContext *ctx;
     BlockAIOCB *acb;
     QEMUSGList *sg;
     uint64_t offset;
@@ -80,6 +80,7 @@ typedef struct {
     QEMUIOVector iov;
     QEMUBH *bh;
     DMAIOFunc *io_func;
+    void *io_func_opaque;
 } DMAAIOCB;
 
 static void dma_blk_cb(void *opaque, int ret);
@@ -154,8 +155,7 @@ static void dma_blk_cb(void *opaque, int ret)
 
     if (dbs->iov.size == 0) {
         trace_dma_map_wait(dbs);
-        dbs->bh = aio_bh_new(blk_get_aio_context(dbs->blk),
-                             reschedule_dma, dbs);
+        dbs->bh = aio_bh_new(dbs->ctx, reschedule_dma, dbs);
         cpu_register_map_client(dbs->bh);
         return;
     }
@@ -164,8 +164,8 @@ static void dma_blk_cb(void *opaque, int ret)
         qemu_iovec_discard_back(&dbs->iov, dbs->iov.size & ~BDRV_SECTOR_MASK);
     }
 
-    dbs->acb = dbs->io_func(dbs->blk, dbs->offset, &dbs->iov, 0,
-                            dma_blk_cb, dbs);
+    dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
+                            dma_blk_cb, dbs, dbs->io_func_opaque);
     assert(dbs->acb);
 }
 
@@ -191,23 +191,25 @@ static const AIOCBInfo dma_aiocb_info = {
     .cancel_async       = dma_aio_cancel,
 };
 
-BlockAIOCB *dma_blk_io(
-    BlockBackend *blk, QEMUSGList *sg, uint64_t sector_num,
-    DMAIOFunc *io_func, BlockCompletionFunc *cb,
+BlockAIOCB *dma_blk_io(AioContext *ctx,
+    QEMUSGList *sg, uint64_t offset,
+    DMAIOFunc *io_func, void *io_func_opaque,
+    BlockCompletionFunc *cb,
     void *opaque, DMADirection dir)
 {
-    DMAAIOCB *dbs = blk_aio_get(&dma_aiocb_info, blk, cb, opaque);
+    DMAAIOCB *dbs = qemu_aio_get(&dma_aiocb_info, NULL, cb, opaque);
 
-    trace_dma_blk_io(dbs, blk, sector_num, (dir == DMA_DIRECTION_TO_DEVICE));
+    trace_dma_blk_io(dbs, io_func_opaque, offset, (dir == DMA_DIRECTION_TO_DEVICE));
 
     dbs->acb = NULL;
-    dbs->blk = blk;
     dbs->sg = sg;
-    dbs->offset = sector_num << BDRV_SECTOR_BITS;
+    dbs->ctx = ctx;
+    dbs->offset = offset;
     dbs->sg_cur_index = 0;
     dbs->sg_cur_byte = 0;
     dbs->dir = dir;
     dbs->io_func = io_func;
+    dbs->io_func_opaque = io_func_opaque;
     dbs->bh = NULL;
     qemu_iovec_init(&dbs->iov, sg->nsg);
     dma_blk_cb(dbs, 0);
@@ -215,19 +217,39 @@ BlockAIOCB *dma_blk_io(
 }
 
 
+static
+BlockAIOCB *dma_blk_read_io_func(int64_t offset, QEMUIOVector *iov,
+                                 BlockCompletionFunc *cb, void *cb_opaque,
+                                 void *opaque)
+{
+    BlockBackend *blk = opaque;
+    return blk_aio_preadv(blk, offset, iov, 0, cb, cb_opaque);
+}
+
 BlockAIOCB *dma_blk_read(BlockBackend *blk,
-                         QEMUSGList *sg, uint64_t sector,
+                         QEMUSGList *sg, uint64_t offset,
                          void (*cb)(void *opaque, int ret), void *opaque)
 {
-    return dma_blk_io(blk, sg, sector, blk_aio_preadv, cb, opaque,
+    return dma_blk_io(blk_get_aio_context(blk),
+                      sg, offset, dma_blk_read_io_func, blk, cb, opaque,
                       DMA_DIRECTION_FROM_DEVICE);
 }
 
+static
+BlockAIOCB *dma_blk_write_io_func(int64_t offset, QEMUIOVector *iov,
+                                  BlockCompletionFunc *cb, void *cb_opaque,
+                                  void *opaque)
+{
+    BlockBackend *blk = opaque;
+    return blk_aio_pwritev(blk, offset, iov, 0, cb, cb_opaque);
+}
+
 BlockAIOCB *dma_blk_write(BlockBackend *blk,
-                          QEMUSGList *sg, uint64_t sector,
+                          QEMUSGList *sg, uint64_t offset,
                           void (*cb)(void *opaque, int ret), void *opaque)
 {
-    return dma_blk_io(blk, sg, sector, blk_aio_pwritev, cb, opaque,
+    return dma_blk_io(blk_get_aio_context(blk),
+                      sg, offset, dma_blk_write_io_func, blk, cb, opaque,
                       DMA_DIRECTION_TO_DEVICE);
 }
 
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 173988ee84..9faad29fad 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -239,7 +239,7 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
     uint8_t lba_index  = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
     uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
     uint64_t data_size = (uint64_t)nlb << data_shift;
-    uint64_t aio_slba  = slba << (data_shift - BDRV_SECTOR_BITS);
+    uint64_t data_offset = slba << data_shift;
     int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0;
     enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ;
 
@@ -258,8 +258,8 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
     req->has_sg = true;
     dma_acct_start(n->conf.blk, &req->acct, &req->qsg, acct);
     req->aiocb = is_write ?
-        dma_blk_write(n->conf.blk, &req->qsg, aio_slba, nvme_rw_cb, req) :
-        dma_blk_read(n->conf.blk, &req->qsg, aio_slba, nvme_rw_cb, req);
+        dma_blk_write(n->conf.blk, &req->qsg, data_offset, nvme_rw_cb, req) :
+        dma_blk_read(n->conf.blk, &req->qsg, data_offset, nvme_rw_cb, req);
 
     return NVME_NO_COMPLETE;
 }
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index f244bc01c9..502d4f1c7b 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -1006,7 +1006,8 @@ static void execute_ncq_command(NCQTransferState *ncq_tfs)
         dma_acct_start(ide_state->blk, &ncq_tfs->acct,
                        &ncq_tfs->sglist, BLOCK_ACCT_READ);
         ncq_tfs->aiocb = dma_blk_read(ide_state->blk, &ncq_tfs->sglist,
-                                      ncq_tfs->lba, ncq_cb, ncq_tfs);
+                                      ncq_tfs->lba << BDRV_SECTOR_BITS,
+                                      ncq_cb, ncq_tfs);
         break;
     case WRITE_FPDMA_QUEUED:
         DPRINTF(port, "NCQ writing %d sectors to LBA %"PRId64", tag %d\n",
@@ -1018,7 +1019,8 @@ static void execute_ncq_command(NCQTransferState *ncq_tfs)
         dma_acct_start(ide_state->blk, &ncq_tfs->acct,
                        &ncq_tfs->sglist, BLOCK_ACCT_WRITE);
         ncq_tfs->aiocb = dma_blk_write(ide_state->blk, &ncq_tfs->sglist,
-                                       ncq_tfs->lba, ncq_cb, ncq_tfs);
+                                       ncq_tfs->lba << BDRV_SECTOR_BITS,
+                                       ncq_cb, ncq_tfs);
         break;
     default:
         DPRINTF(port, "error: unsupported NCQ command (0x%02x) received\n",
diff --git a/hw/ide/core.c b/hw/ide/core.c
index fe2bfba489..029f6b9b12 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -441,13 +441,14 @@ static void ide_issue_trim_cb(void *opaque, int ret)
     }
 }
 
-BlockAIOCB *ide_issue_trim(BlockBackend *blk,
-        int64_t offset, QEMUIOVector *qiov, BdrvRequestFlags flags,
-        BlockCompletionFunc *cb, void *opaque)
+BlockAIOCB *ide_issue_trim(
+        int64_t offset, QEMUIOVector *qiov,
+        BlockCompletionFunc *cb, void *cb_opaque, void *opaque)
 {
+    BlockBackend *blk = opaque;
     TrimAIOCB *iocb;
 
-    iocb = blk_aio_get(&trim_aiocb_info, blk, cb, opaque);
+    iocb = blk_aio_get(&trim_aiocb_info, blk, cb, cb_opaque);
     iocb->blk = blk;
     iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb);
     iocb->ret = 0;
@@ -799,6 +800,7 @@ static void ide_dma_cb(void *opaque, int ret)
     IDEState *s = opaque;
     int n;
     int64_t sector_num;
+    uint64_t offset;
     bool stay_active = false;
 
     if (ret == -ECANCELED) {
@@ -859,18 +861,20 @@ static void ide_dma_cb(void *opaque, int ret)
         return;
     }
 
+    offset = sector_num << BDRV_SECTOR_BITS;
     switch (s->dma_cmd) {
     case IDE_DMA_READ:
-        s->bus->dma->aiocb = dma_blk_read(s->blk, &s->sg, sector_num,
+        s->bus->dma->aiocb = dma_blk_read(s->blk, &s->sg, offset,
                                           ide_dma_cb, s);
         break;
     case IDE_DMA_WRITE:
-        s->bus->dma->aiocb = dma_blk_write(s->blk, &s->sg, sector_num,
+        s->bus->dma->aiocb = dma_blk_write(s->blk, &s->sg, offset,
                                            ide_dma_cb, s);
         break;
     case IDE_DMA_TRIM:
-        s->bus->dma->aiocb = dma_blk_io(s->blk, &s->sg, sector_num,
-                                        ide_issue_trim, ide_dma_cb, s,
+        s->bus->dma->aiocb = dma_blk_io(blk_get_aio_context(s->blk),
+                                        &s->sg, offset,
+                                        ide_issue_trim, s->blk, ide_dma_cb, s,
                                         DMA_DIRECTION_TO_DEVICE);
         break;
     default:
diff --git a/hw/ide/internal.h b/hw/ide/internal.h
index ceb9e5994a..773928af77 100644
--- a/hw/ide/internal.h
+++ b/hw/ide/internal.h
@@ -613,9 +613,9 @@ void ide_transfer_start(IDEState *s, uint8_t *buf, int size,
                         EndTransferFunc *end_transfer_func);
 void ide_transfer_stop(IDEState *s);
 void ide_set_inactive(IDEState *s, bool more);
-BlockAIOCB *ide_issue_trim(BlockBackend *blk,
-        int64_t offset, QEMUIOVector *qiov, BdrvRequestFlags flags,
-        BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *ide_issue_trim(
+        int64_t offset, QEMUIOVector *qiov,
+        BlockCompletionFunc *cb, void *cb_opaque, void *opaque);
 BlockAIOCB *ide_buffered_readv(IDEState *s, int64_t sector_num,
                                QEMUIOVector *iov, int nb_sectors,
                                BlockCompletionFunc *cb, void *opaque);
diff --git a/hw/ide/macio.c b/hw/ide/macio.c
index d7d9c0ff3a..42ad68a1c0 100644
--- a/hw/ide/macio.c
+++ b/hw/ide/macio.c
@@ -230,7 +230,7 @@ static void pmac_dma_trim(BlockBackend *blk,
     s->io_buffer_index += io->len;
     io->len = 0;
 
-    s->bus->dma->aiocb = ide_issue_trim(blk, offset, &io->iov, 0, cb, io);
+    s->bus->dma->aiocb = ide_issue_trim(offset, &io->iov, cb, io, blk);
 }
 
 static void pmac_ide_atapi_transfer_cb(void *opaque, int ret)
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index ce89c98b4e..8865da53e8 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -335,7 +335,8 @@ static void scsi_do_read(SCSIDiskReq *r, int ret)
     if (r->req.sg) {
         dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_READ);
         r->req.resid -= r->req.sg->size;
-        r->req.aiocb = dma_blk_read(s->qdev.conf.blk, r->req.sg, r->sector,
+        r->req.aiocb = dma_blk_read(s->qdev.conf.blk, r->req.sg,
+                                    r->sector << BDRV_SECTOR_BITS,
                                     scsi_dma_complete, r);
     } else {
         scsi_init_iovec(r, SCSI_DMA_BUF_SIZE);
@@ -539,7 +540,8 @@ static void scsi_write_data(SCSIRequest *req)
     if (r->req.sg) {
         dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_WRITE);
         r->req.resid -= r->req.sg->size;
-        r->req.aiocb = dma_blk_write(s->qdev.conf.blk, r->req.sg, r->sector,
+        r->req.aiocb = dma_blk_write(s->qdev.conf.blk, r->req.sg,
+                                     r->sector << BDRV_SECTOR_BITS,
                                      scsi_dma_complete, r);
     } else {
         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
@@ -1778,7 +1780,7 @@ static void scsi_disk_emulate_write_same(SCSIDiskReq *r, uint8_t *inbuf)
         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
                          nb_sectors * s->qdev.blocksize,
                         BLOCK_ACCT_WRITE);
-        r->req.aiocb = blk_aio_write_zeroes(s->qdev.conf.blk,
+        r->req.aiocb = blk_aio_pwrite_zeroes(s->qdev.conf.blk,
                                 r->req.cmd.lba * s->qdev.blocksize,
                                 nb_sectors * s->qdev.blocksize,
                                 flags, scsi_aio_complete, r);
diff --git a/include/block/block.h b/include/block/block.h
index a8c15e36e7..70ea29947c 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -17,7 +17,6 @@ typedef struct BlockJob BlockJob;
 typedef struct BdrvChild BdrvChild;
 typedef struct BdrvChildRole BdrvChildRole;
 typedef struct BlockJobTxn BlockJobTxn;
-typedef struct BdrvNextIterator BdrvNextIterator;
 
 typedef struct BlockDriverInfo {
     /* in bytes, 0 if irrelevant */
@@ -198,7 +197,6 @@ BlockDriver *bdrv_find_format(const char *format_name);
 int bdrv_create(BlockDriver *drv, const char* filename,
                 QemuOpts *opts, Error **errp);
 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp);
-BlockDriverState *bdrv_new_root(void);
 BlockDriverState *bdrv_new(void);
 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top);
 void bdrv_replace_in_backing_chain(BlockDriverState *old,
@@ -214,8 +212,8 @@ BdrvChild *bdrv_open_child(const char *filename,
 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd);
 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
                            const char *bdref_key, Error **errp);
-int bdrv_open(BlockDriverState **pbs, const char *filename,
-              const char *reference, QDict *options, int flags, Error **errp);
+BlockDriverState *bdrv_open(const char *filename, const char *reference,
+                            QDict *options, int flags, Error **errp);
 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
                                     BlockDriverState *bs,
                                     QDict *options, int flags);
@@ -244,10 +242,6 @@ int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
     const void *buf, int count);
 int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
     int nb_sectors, QEMUIOVector *qiov);
-int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
-int coroutine_fn bdrv_co_readv_no_serialising(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
 int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
     int nb_sectors, QEMUIOVector *qiov);
 /*
@@ -402,7 +396,19 @@ BlockDriverState *bdrv_lookup_bs(const char *device,
                                  Error **errp);
 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base);
 BlockDriverState *bdrv_next_node(BlockDriverState *bs);
-BdrvNextIterator *bdrv_next(BdrvNextIterator *it, BlockDriverState **bs);
+
+typedef struct BdrvNextIterator {
+    enum {
+        BDRV_NEXT_BACKEND_ROOTS,
+        BDRV_NEXT_MONITOR_OWNED,
+    } phase;
+    BlockBackend *blk;
+    BlockDriverState *bs;
+} BdrvNextIterator;
+
+BlockDriverState *bdrv_first(BdrvNextIterator *it);
+BlockDriverState *bdrv_next(BdrvNextIterator *it);
+
 BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs);
 int bdrv_is_encrypted(BlockDriverState *bs);
 int bdrv_key_required(BlockDriverState *bs);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index b6f4755725..30a97178c8 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -719,7 +719,8 @@ void hmp_drive_add_node(Monitor *mon, const char *optstr);
 
 BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
                                   const char *child_name,
-                                  const BdrvChildRole *child_role);
+                                  const BdrvChildRole *child_role,
+                                  void *opaque);
 void bdrv_root_unref_child(BdrvChild *child);
 
 const char *bdrv_get_parent_name(const BlockDriverState *bs);
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index 073a433cf8..86d28070b8 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -82,7 +82,7 @@ struct BlockJob {
     const BlockJobDriver *driver;
 
     /** The block device on which the job is operating.  */
-    BlockDriverState *bs;
+    BlockBackend *blk;
 
     /**
      * The ID of the block job. Currently the BlockBackend name of the BDS
@@ -135,6 +135,9 @@ struct BlockJob {
      */
     bool deferred_to_main_loop;
 
+    /** Element of the list of block jobs */
+    QLIST_ENTRY(BlockJob) job_list;
+
     /** Status that is published by the query-block-jobs QMP API */
     BlockDeviceIoStatus iostatus;
 
@@ -173,6 +176,17 @@ struct BlockJob {
 };
 
 /**
+ * block_job_next:
+ * @job: A block job, or %NULL.
+ *
+ * Get the next element from the list of block jobs after @job, or the
+ * first one if @job is %NULL.
+ *
+ * Returns the requested job, or %NULL if there are no more jobs left.
+ */
+BlockJob *block_job_next(BlockJob *job);
+
+/**
  * block_job_create:
  * @job_type: The class object for the newly-created job.
  * @bs: The block
@@ -357,6 +371,13 @@ bool block_job_is_paused(BlockJob *job);
 int block_job_cancel_sync(BlockJob *job);
 
 /**
+ * block_job_cancel_sync_all:
+ *
+ * Synchronously cancels all jobs using block_job_cancel_sync().
+ */
+void block_job_cancel_sync_all(void);
+
+/**
  * block_job_complete_sync:
  * @job: The job to be completed.
  * @errp: Error object which may be set by block_job_complete(); this is not
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 68d92b556e..c04af8ea46 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -78,8 +78,7 @@ typedef struct BlockBackendPublic {
     QLIST_ENTRY(BlockBackendPublic) round_robin;
 } BlockBackendPublic;
 
-BlockBackend *blk_new(Error **errp);
-BlockBackend *blk_new_with_bs(Error **errp);
+BlockBackend *blk_new(void);
 BlockBackend *blk_new_open(const char *filename, const char *reference,
                            QDict *options, int flags, Error **errp);
 int blk_get_refcnt(BlockBackend *blk);
@@ -114,11 +113,17 @@ void *blk_get_attached_dev(BlockBackend *blk);
 void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque);
 int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
                           int count);
-int blk_write_zeroes(BlockBackend *blk, int64_t offset,
-                     int count, BdrvRequestFlags flags);
-BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t offset,
-                                 int count, BdrvRequestFlags flags,
-                                 BlockCompletionFunc *cb, void *opaque);
+int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
+                               unsigned int bytes, QEMUIOVector *qiov,
+                               BdrvRequestFlags flags);
+int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
+                               unsigned int bytes, QEMUIOVector *qiov,
+                               BdrvRequestFlags flags);
+int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+                      int count, BdrvRequestFlags flags);
+BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+                                  int count, BdrvRequestFlags flags,
+                                  BlockCompletionFunc *cb, void *opaque);
 int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count);
 int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count,
                BdrvRequestFlags flags);
@@ -196,8 +201,8 @@ int blk_get_open_flags_from_root_state(BlockBackend *blk);
 
 void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
                   BlockCompletionFunc *cb, void *opaque);
-int coroutine_fn blk_co_write_zeroes(BlockBackend *blk, int64_t offset,
-                                     int count, BdrvRequestFlags flags);
+int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+                                      int count, BdrvRequestFlags flags);
 int blk_write_compressed(BlockBackend *blk, int64_t sector_num,
                          const uint8_t *buf, int nb_sectors);
 int blk_truncate(BlockBackend *blk, int64_t offset);
diff --git a/include/sysemu/dma.h b/include/sysemu/dma.h
index d6e96a4298..34c8eaf64e 100644
--- a/include/sysemu/dma.h
+++ b/include/sysemu/dma.h
@@ -194,19 +194,19 @@ void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len);
 void qemu_sglist_destroy(QEMUSGList *qsg);
 #endif
 
-typedef BlockAIOCB *DMAIOFunc(BlockBackend *blk, int64_t offset,
-                              QEMUIOVector *iov, BdrvRequestFlags flags,
-                              BlockCompletionFunc *cb, void *opaque);
-
-BlockAIOCB *dma_blk_io(BlockBackend *blk,
-                       QEMUSGList *sg, uint64_t sector_num,
-                       DMAIOFunc *io_func, BlockCompletionFunc *cb,
-                       void *opaque, DMADirection dir);
+typedef BlockAIOCB *DMAIOFunc(int64_t offset, QEMUIOVector *iov,
+                              BlockCompletionFunc *cb, void *cb_opaque,
+                              void *opaque);
+
+BlockAIOCB *dma_blk_io(AioContext *ctx,
+                       QEMUSGList *sg, uint64_t offset,
+                       DMAIOFunc *io_func, void *io_func_opaque,
+                       BlockCompletionFunc *cb, void *opaque, DMADirection dir);
 BlockAIOCB *dma_blk_read(BlockBackend *blk,
-                         QEMUSGList *sg, uint64_t sector,
+                         QEMUSGList *sg, uint64_t offset,
                          BlockCompletionFunc *cb, void *opaque);
 BlockAIOCB *dma_blk_write(BlockBackend *blk,
-                          QEMUSGList *sg, uint64_t sector,
+                          QEMUSGList *sg, uint64_t offset,
                           BlockCompletionFunc *cb, void *opaque);
 uint64_t dma_buf_read(uint8_t *ptr, int32_t len, QEMUSGList *sg);
 uint64_t dma_buf_write(uint8_t *ptr, int32_t len, QEMUSGList *sg);
diff --git a/migration/block.c b/migration/block.c
index a7a76a0fb9..e0628d187f 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -383,7 +383,7 @@ static void init_blk_migration(QEMUFile *f)
     BlockDriverState *bs;
     BlkMigDevState *bmds;
     int64_t sectors;
-    BdrvNextIterator *it = NULL;
+    BdrvNextIterator it;
 
     block_mig_state.submitted = 0;
     block_mig_state.read_done = 0;
@@ -394,7 +394,7 @@ static void init_blk_migration(QEMUFile *f)
     block_mig_state.zero_blocks = migrate_zero_blocks();
 
 
-    while ((it = bdrv_next(it, &bs))) {
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         if (bdrv_is_read_only(bs)) {
             continue;
         }
diff --git a/monitor.c b/monitor.c
index 6a32b9bf59..404d594bb3 100644
--- a/monitor.c
+++ b/monitor.c
@@ -3432,12 +3432,12 @@ static void vm_completion(ReadLineState *rs, const char *str)
 {
     size_t len;
     BlockDriverState *bs;
-    BdrvNextIterator *it = NULL;
+    BdrvNextIterator it;
 
     len = strlen(str);
     readline_set_completion_index(rs, len);
 
-    while ((it = bdrv_next(it, &bs))) {
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         SnapshotInfoList *snapshots, *snapshot;
         AioContext *ctx = bdrv_get_aio_context(bs);
         bool ok = false;
diff --git a/qemu-img.c b/qemu-img.c
index 7ed8ef21cb..4b56ad36aa 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -775,7 +775,7 @@ static void common_block_job_cb(void *opaque, int ret)
 
 static void run_block_job(BlockJob *job, Error **errp)
 {
-    AioContext *aio_context = bdrv_get_aio_context(job->bs);
+    AioContext *aio_context = blk_get_aio_context(job->blk);
 
     do {
         aio_poll(aio_context, true);
@@ -1606,8 +1606,8 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
             if (s->has_zero_init) {
                 break;
             }
-            ret = blk_write_zeroes(s->target, sector_num << BDRV_SECTOR_BITS,
-                                   n << BDRV_SECTOR_BITS, 0);
+            ret = blk_pwrite_zeroes(s->target, sector_num << BDRV_SECTOR_BITS,
+                                    n << BDRV_SECTOR_BITS, 0);
             if (ret < 0) {
                 return ret;
             }
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index e766791ffc..09e879f872 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -451,12 +451,12 @@ typedef struct {
     bool done;
 } CoWriteZeroes;
 
-static void coroutine_fn co_write_zeroes_entry(void *opaque)
+static void coroutine_fn co_pwrite_zeroes_entry(void *opaque)
 {
     CoWriteZeroes *data = opaque;
 
-    data->ret = blk_co_write_zeroes(data->blk, data->offset, data->count,
-                                    data->flags);
+    data->ret = blk_co_pwrite_zeroes(data->blk, data->offset, data->count,
+                                     data->flags);
     data->done = true;
     if (data->ret < 0) {
         *data->total = data->ret;
@@ -466,8 +466,8 @@ static void coroutine_fn co_write_zeroes_entry(void *opaque)
     *data->total = data->count;
 }
 
-static int do_co_write_zeroes(BlockBackend *blk, int64_t offset, int64_t count,
-                              int flags, int64_t *total)
+static int do_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+                               int64_t count, int flags, int64_t *total)
 {
     Coroutine *co;
     CoWriteZeroes data = {
@@ -483,7 +483,7 @@ static int do_co_write_zeroes(BlockBackend *blk, int64_t offset, int64_t count,
         return -ERANGE;
     }
 
-    co = qemu_coroutine_create(co_write_zeroes_entry);
+    co = qemu_coroutine_create(co_pwrite_zeroes_entry);
     qemu_coroutine_enter(co, &data);
     while (!data.done) {
         aio_poll(blk_get_aio_context(blk), true);
@@ -901,7 +901,7 @@ static void write_help(void)
 " -C, -- report statistics in a machine parsable format\n"
 " -q, -- quiet mode, do not show I/O statistics\n"
 " -u, -- with -z, allow unmapping\n"
-" -z, -- write zeroes using blk_co_write_zeroes\n"
+" -z, -- write zeroes using blk_co_pwrite_zeroes\n"
 "\n");
 }
 
@@ -1033,7 +1033,7 @@ static int write_f(BlockBackend *blk, int argc, char **argv)
     if (bflag) {
         cnt = do_save_vmstate(blk, buf, offset, count, &total);
     } else if (zflag) {
-        cnt = do_co_write_zeroes(blk, offset, count, flags, &total);
+        cnt = do_co_pwrite_zeroes(blk, offset, count, flags, &total);
     } else if (cflag) {
         cnt = do_write_compressed(blk, buf, offset, count, &total);
     } else {
@@ -1376,7 +1376,7 @@ static void aio_write_help(void)
 " -i, -- treat request as invalid, for exercising stats\n"
 " -q, -- quiet mode, do not show I/O statistics\n"
 " -u, -- with -z, allow unmapping\n"
-" -z, -- write zeroes using blk_aio_write_zeroes\n"
+" -z, -- write zeroes using blk_aio_pwrite_zeroes\n"
 "\n");
 }
 
@@ -1475,8 +1475,8 @@ static int aio_write_f(BlockBackend *blk, int argc, char **argv)
         }
 
         ctx->qiov.size = count;
-        blk_aio_write_zeroes(blk, ctx->offset, count, flags, aio_write_done,
-                             ctx);
+        blk_aio_pwrite_zeroes(blk, ctx->offset, count, flags, aio_write_done,
+                              ctx);
     } else {
         nr_iov = argc - optind;
         ctx->buf = create_iovec(blk, &ctx->qiov, &argv[optind], nr_iov,
diff --git a/qmp.c b/qmp.c
index 8f8ae3a79d..3165f8726b 100644
--- a/qmp.c
+++ b/qmp.c
@@ -181,7 +181,7 @@ void qmp_cont(Error **errp)
     Error *local_err = NULL;
     BlockBackend *blk;
     BlockDriverState *bs;
-    BdrvNextIterator *it;
+    BdrvNextIterator it;
 
     /* if there is a dump in background, we should wait until the dump
      * finished */
@@ -201,8 +201,7 @@ void qmp_cont(Error **errp)
         blk_iostatus_reset(blk);
     }
 
-    it = NULL;
-    while ((it = bdrv_next(it, &bs))) {
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         bdrv_add_key(bs, NULL, &local_err);
         if (local_err) {
             error_propagate(errp, local_err);
diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index b1c542f99b..ed1d9d464c 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -207,33 +207,6 @@ class TestSingleBlockdev(TestSingleDrive):
     test_image_not_found = None
     test_small_buffer2 = None
 
-class TestBlockdevAttached(iotests.QMPTestCase):
-    image_len = 1 * 1024 * 1024 # MB
-
-    def setUp(self):
-        iotests.create_image(backing_img, self.image_len)
-        qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, test_img)
-        qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, target_img)
-        self.vm = iotests.VM().add_drive(test_img)
-        self.vm.launch()
-
-    def tearDown(self):
-        self.vm.shutdown()
-        os.remove(test_img)
-        os.remove(target_img)
-
-    def test_blockdev_attached(self):
-        self.assert_no_active_block_jobs()
-        args = {'options':
-                    {'driver': iotests.imgfmt,
-                     'id': 'drive1',
-                     'file': { 'filename': target_img, 'driver': 'file' } } }
-        result = self.vm.qmp("blockdev-add", **args)
-        self.assert_qmp(result, 'return', {})
-        result = self.vm.qmp('blockdev-mirror', device='drive0', sync='full',
-                             target='drive1')
-        self.assert_qmp(result, 'error/class', 'GenericError')
-
 class TestSingleDriveZeroLength(TestSingleDrive):
     image_len = 0
     test_small_buffer2 = None
diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out
index b67d0504a6..b0cadc8245 100644
--- a/tests/qemu-iotests/041.out
+++ b/tests/qemu-iotests/041.out
@@ -1,5 +1,5 @@
-............................................................................
+...........................................................................
 ----------------------------------------------------------------------
-Ran 76 tests
+Ran 75 tests
 
 OK
diff --git a/tests/test-blockjob-txn.c b/tests/test-blockjob-txn.c
index 55fad9507a..828389bb45 100644
--- a/tests/test-blockjob-txn.c
+++ b/tests/test-blockjob-txn.c
@@ -15,6 +15,7 @@
 #include "qapi/error.h"
 #include "qemu/main-loop.h"
 #include "block/blockjob.h"
+#include "sysemu/block-backend.h"
 
 typedef struct {
     BlockJob common;
@@ -30,7 +31,7 @@ static const BlockJobDriver test_block_job_driver = {
 
 static void test_block_job_complete(BlockJob *job, void *opaque)
 {
-    BlockDriverState *bs = job->bs;
+    BlockDriverState *bs = blk_bs(job->blk);
     int rc = (intptr_t)opaque;
 
     if (block_job_is_cancelled(job)) {
diff --git a/tests/test-throttle.c b/tests/test-throttle.c
index 5ec966c8a4..c02be805f7 100644
--- a/tests/test-throttle.c
+++ b/tests/test-throttle.c
@@ -578,9 +578,9 @@ static void test_groups(void)
     BlockBackend *blk1, *blk2, *blk3;
     BlockBackendPublic *blkp1, *blkp2, *blkp3;
 
-    blk1 = blk_new_with_bs(&error_abort);
-    blk2 = blk_new_with_bs(&error_abort);
-    blk3 = blk_new_with_bs(&error_abort);
+    blk1 = blk_new();
+    blk2 = blk_new();
+    blk3 = blk_new();
 
     blkp1 = blk_get_public(blk1);
     blkp2 = blk_get_public(blk2);
diff --git a/trace-events b/trace-events
index b53c3541a3..4450d8f377 100644
--- a/trace-events
+++ b/trace-events
@@ -61,6 +61,10 @@ virtio_console_chr_event(unsigned int port, int event) "port %u, event %d"
 bdrv_open_common(void *bs, const char *filename, int flags, const char *format_name) "bs %p filename \"%s\" flags %#x format_name \"%s\""
 bdrv_lock_medium(void *bs, bool locked) "bs %p locked %d"
 
+# block/block-backend.c
+blk_co_preadv(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags %x"
+blk_co_pwritev(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags %x"
+
 # block/io.c
 bdrv_aio_discard(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
 bdrv_aio_flush(void *bs, void *opaque) "bs %p opaque %p"
@@ -68,8 +72,6 @@ bdrv_aio_readv(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %
 bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
 bdrv_aio_write_zeroes(void *bs, int64_t sector_num, int nb_sectors, int flags, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d flags %#x opaque %p"
 bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
-bdrv_co_copy_on_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
-bdrv_co_readv_no_serialising(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
 bdrv_co_writev(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
 bdrv_co_write_zeroes(void *bs, int64_t sector_num, int nb_sector, int flags) "bs %p sector_num %"PRId64" nb_sectors %d flags %#x"
 bdrv_co_do_copy_on_readv(void *bs, int64_t sector_num, int nb_sectors, int64_t cluster_sector_num, int cluster_nb_sectors) "bs %p sector_num %"PRId64" nb_sectors %d cluster_sector_num %"PRId64" cluster_nb_sectors %d"
@@ -1143,7 +1145,7 @@ win_helper_done(uint32_t tl) "tl=%d"
 win_helper_retry(uint32_t tl) "tl=%d"
 
 # dma-helpers.c
-dma_blk_io(void *dbs, void *bs, int64_t sector_num, bool to_dev) "dbs=%p bs=%p sector_num=%" PRId64 " to_dev=%d"
+dma_blk_io(void *dbs, void *bs, int64_t offset, bool to_dev) "dbs=%p bs=%p offset=%" PRId64 " to_dev=%d"
 dma_aio_cancel(void *dbs) "dbs=%p"
 dma_complete(void *dbs, int ret, void *cb) "dbs=%p ret=%d cb=%p"
 dma_blk_cb(void *dbs, int ret) "dbs=%p ret=%d"