summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2015-10-17 22:14:52 +0100
committerPeter Maydell <peter.maydell@linaro.org>2015-10-17 22:14:52 +0100
commitc737c7a608f4cd2c06e6600303845c4b469822c5 (patch)
tree062bb9ec0082a16928572cb7ff0a039d79de93bb
parent6d57410a79d51d92673c54f26624b44f27fa6214 (diff)
parent6b826af7b010ed1963b1e7bfb5c389dcdbaff222 (diff)
downloadfocaccia-qemu-c737c7a608f4cd2c06e6600303845c4b469822c5.tar.gz
focaccia-qemu-c737c7a608f4cd2c06e6600303845c4b469822c5.zip
Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches

# gpg: Signature made Fri 16 Oct 2015 14:36:50 BST using RSA key ID C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"

* remotes/kevin/tags/for-upstream: (29 commits)
  blkdebug: Don't confuse image as backing file
  qcow2: Remove forward declaration of QCowAIOCB
  qemu-nbd: always compile in --aio=MODE option
  blockdev: always compile in -drive aio= parsing
  raw-posix: warn about BDRV_O_NATIVE_AIO if libaio is unavailable
  block: auto-generated node-names
  util - add automated ID generation utility
  blkverify: Fix BDS leak in .bdrv_open error path
  block: Allow bdrv_unref_child(bs, NULL)
  block: Remove bdrv_swap()
  block: Add and use bdrv_replace_in_backing_chain()
  blockjob: Store device name at job creation
  block: Implement bdrv_append() without bdrv_swap()
  block: Introduce parents list
  block-backend: Add blk_set_bs()
  block/io: Make bdrv_requests_pending() public
  block: Split bdrv_move_feature_fields()
  block: Manage backing file references in bdrv_set_backing_hd()
  block: Convert bs->backing_hd to BdrvChild
  block: Remove bdrv_open_image()
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--block.c512
-rw-r--r--block/blkdebug.c34
-rw-r--r--block/blkverify.c71
-rw-r--r--block/block-backend.c17
-rw-r--r--block/bochs.c8
-rw-r--r--block/cloop.c10
-rw-r--r--block/dmg.c20
-rw-r--r--block/io.c76
-rw-r--r--block/mirror.c22
-rw-r--r--block/parallels.c38
-rw-r--r--block/qapi.c10
-rw-r--r--block/qcow.c47
-rw-r--r--block/qcow2-cache.c11
-rw-r--r--block/qcow2-cluster.c41
-rw-r--r--block/qcow2-refcount.c45
-rw-r--r--block/qcow2-snapshot.c30
-rw-r--r--block/qcow2.c68
-rw-r--r--block/qcow2.h2
-rw-r--r--block/qed-table.c4
-rw-r--r--block/qed.c51
-rw-r--r--block/quorum.c65
-rw-r--r--block/raw-posix.c11
-rw-r--r--block/raw_bsd.c40
-rw-r--r--block/snapshot.c12
-rw-r--r--block/stream.c34
-rw-r--r--block/vdi.c17
-rw-r--r--block/vhdx-log.c25
-rw-r--r--block/vhdx.c36
-rw-r--r--block/vmdk.c133
-rw-r--r--block/vpc.c34
-rw-r--r--block/vvfat.c19
-rw-r--r--blockdev.c8
-rw-r--r--blockjob.c15
-rw-r--r--include/block/block.h15
-rw-r--r--include/block/block_int.h20
-rw-r--r--include/block/blockjob.h8
-rw-r--r--include/qemu-common.h8
-rw-r--r--include/qemu/queue.h6
-rw-r--r--qemu-img.c20
-rw-r--r--qemu-nbd.c8
-rw-r--r--qmp-commands.hx29
-rwxr-xr-xtests/qemu-iotests/0414
-rwxr-xr-xtests/qemu-iotests/0513
-rw-r--r--tests/qemu-iotests/051.out2
-rw-r--r--tests/qemu-iotests/059.out12
-rwxr-xr-xtests/qemu-iotests/0618
-rw-r--r--tests/qemu-iotests/061.out4
-rwxr-xr-xtests/qemu-iotests/0673
-rw-r--r--tests/qemu-iotests/067.out5
-rwxr-xr-xtests/qemu-iotests/0813
-rw-r--r--tests/qemu-iotests/081.out2
-rw-r--r--tests/qemu-iotests/09669
-rw-r--r--tests/qemu-iotests/096.out5
-rw-r--r--tests/qemu-iotests/common.filter5
-rw-r--r--tests/qemu-iotests/group1
-rw-r--r--util/id.c37
56 files changed, 960 insertions, 883 deletions
diff --git a/block.c b/block.c
index 1f90b4773f..09f2a754f1 100644
--- a/block.c
+++ b/block.c
@@ -721,7 +721,7 @@ const BdrvChildRole child_format = {
 };
 
 /*
- * Returns the flags that bs->backing_hd should get, based on the given flags
+ * Returns the flags that bs->backing should get, based on the given flags
  * for the parent BDS
  */
 static int bdrv_backing_flags(int flags)
@@ -763,12 +763,15 @@ static void bdrv_assign_node_name(BlockDriverState *bs,
                                   const char *node_name,
                                   Error **errp)
 {
-    if (!node_name) {
-        return;
-    }
+    char *gen_node_name = NULL;
 
-    /* Check for empty string or invalid characters */
-    if (!id_wellformed(node_name)) {
+    if (!node_name) {
+        node_name = gen_node_name = id_generate(ID_BLOCK);
+    } else if (!id_wellformed(node_name)) {
+        /*
+         * Check for empty string or invalid characters, but not if it is
+         * generated (generated names use characters not available to the user)
+         */
         error_setg(errp, "Invalid node name");
         return;
     }
@@ -777,18 +780,20 @@ static void bdrv_assign_node_name(BlockDriverState *bs,
     if (blk_by_name(node_name)) {
         error_setg(errp, "node-name=%s is conflicting with a device id",
                    node_name);
-        return;
+        goto out;
     }
 
     /* takes care of avoiding duplicates node names */
     if (bdrv_find_node(node_name)) {
         error_setg(errp, "Duplicate node name");
-        return;
+        goto out;
     }
 
     /* copy node name into the bs and insert it into the graph list */
     pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
     QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
+out:
+    g_free(gen_node_name);
 }
 
 static QemuOptsList bdrv_runtime_opts = {
@@ -809,7 +814,7 @@ static QemuOptsList bdrv_runtime_opts = {
  *
  * Removes all processed options from *options.
  */
-static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
+static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
     QDict *options, int flags, BlockDriver *drv, Error **errp)
 {
     int ret, open_flags;
@@ -823,7 +828,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
     assert(options != NULL && bs->options != options);
 
     if (file != NULL) {
-        filename = file->filename;
+        filename = file->bs->filename;
     } else {
         filename = qdict_get_try_str(options, "filename");
     }
@@ -1090,6 +1095,7 @@ static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
     };
 
     QLIST_INSERT_HEAD(&parent_bs->children, child, next);
+    QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent);
 
     return child;
 }
@@ -1097,50 +1103,62 @@ static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
 static void bdrv_detach_child(BdrvChild *child)
 {
     QLIST_REMOVE(child, next);
+    QLIST_REMOVE(child, next_parent);
     g_free(child);
 }
 
 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
 {
-    BlockDriverState *child_bs = child->bs;
+    BlockDriverState *child_bs;
+
+    if (child == NULL) {
+        return;
+    }
 
     if (child->bs->inherits_from == parent) {
         child->bs->inherits_from = NULL;
     }
 
+    child_bs = child->bs;
     bdrv_detach_child(child);
     bdrv_unref(child_bs);
 }
 
+/*
+ * Sets the backing file link of a BDS. A new reference is created; callers
+ * which don't need their own reference any more must call bdrv_unref().
+ */
 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
 {
+    if (backing_hd) {
+        bdrv_ref(backing_hd);
+    }
 
-    if (bs->backing_hd) {
+    if (bs->backing) {
         assert(bs->backing_blocker);
-        bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
-        bdrv_detach_child(bs->backing_child);
+        bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
+        bdrv_unref_child(bs, bs->backing);
     } else if (backing_hd) {
         error_setg(&bs->backing_blocker,
                    "node is used as backing hd of '%s'",
                    bdrv_get_device_or_node_name(bs));
     }
 
-    bs->backing_hd = backing_hd;
     if (!backing_hd) {
         error_free(bs->backing_blocker);
         bs->backing_blocker = NULL;
-        bs->backing_child = NULL;
+        bs->backing = NULL;
         goto out;
     }
-    bs->backing_child = bdrv_attach_child(bs, backing_hd, &child_backing);
+    bs->backing = bdrv_attach_child(bs, backing_hd, &child_backing);
     bs->open_flags &= ~BDRV_O_NO_BACKING;
     pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
     pstrcpy(bs->backing_format, sizeof(bs->backing_format),
             backing_hd->drv ? backing_hd->drv->format_name : "");
 
-    bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
+    bdrv_op_block_all(backing_hd, bs->backing_blocker);
     /* Otherwise we won't be able to commit due to check in bdrv_commit */
-    bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
+    bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
                     bs->backing_blocker);
 out:
     bdrv_refresh_limits(bs, NULL);
@@ -1161,7 +1179,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
     BlockDriverState *backing_hd;
     Error *local_err = NULL;
 
-    if (bs->backing_hd != NULL) {
+    if (bs->backing != NULL) {
         QDECREF(options);
         goto free_exit;
     }
@@ -1201,7 +1219,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
         qdict_put(options, "driver", qstring_from_str(bs->backing_format));
     }
 
-    assert(bs->backing_hd == NULL);
+    assert(bs->backing == NULL);
     ret = bdrv_open_inherit(&backing_hd,
                             *backing_filename ? backing_filename : NULL,
                             NULL, options, 0, bs, &child_backing, &local_err);
@@ -1215,7 +1233,10 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
         goto free_exit;
     }
 
+    /* Hook up the backing file link; drop our reference, bs owns the
+     * backing_hd reference now */
     bdrv_set_backing_hd(bs, backing_hd);
+    bdrv_unref(backing_hd);
 
 free_exit:
     g_free(backing_filename);
@@ -1279,40 +1300,6 @@ done:
     return c;
 }
 
-/*
- * This is a version of bdrv_open_child() that returns 0/-EINVAL instead of
- * a BdrvChild object.
- *
- * If allow_none is true, no image will be opened if filename is false and no
- * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
- *
- * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
- */
-int bdrv_open_image(BlockDriverState **pbs, const char *filename,
-                    QDict *options, const char *bdref_key,
-                    BlockDriverState* parent, const BdrvChildRole *child_role,
-                    bool allow_none, Error **errp)
-{
-    Error *local_err = NULL;
-    BdrvChild *c;
-
-    assert(pbs);
-    assert(*pbs == NULL);
-
-    c = bdrv_open_child(filename, options, bdref_key, parent, child_role,
-                        allow_none, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return -EINVAL;
-    }
-
-    if (c != NULL) {
-        *pbs = c->bs;
-    }
-
-    return 0;
-}
-
 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
 {
     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
@@ -1401,7 +1388,8 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
                              const BdrvChildRole *child_role, Error **errp)
 {
     int ret;
-    BlockDriverState *file = NULL, *bs;
+    BdrvChild *file = NULL;
+    BlockDriverState *bs;
     BlockDriver *drv = NULL;
     const char *drvname;
     Error *local_err = NULL;
@@ -1485,11 +1473,12 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
             flags = bdrv_backing_flags(flags);
         }
 
-        assert(file == NULL);
         bs->open_flags = flags;
-        ret = bdrv_open_image(&file, filename, options, "file",
-                              bs, &child_file, true, &local_err);
-        if (ret < 0) {
+
+        file = bdrv_open_child(filename, options, "file", bs,
+                               &child_file, true, &local_err);
+        if (local_err) {
+            ret = -EINVAL;
             goto fail;
         }
     }
@@ -1497,7 +1486,7 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
     /* Image format probing */
     bs->probed = !drv;
     if (!drv && file) {
-        ret = find_image_format(file, filename, &drv, &local_err);
+        ret = find_image_format(file->bs, filename, &drv, &local_err);
         if (ret < 0) {
             goto fail;
         }
@@ -1520,7 +1509,7 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
     }
 
     if (file && (bs->file != file)) {
-        bdrv_unref(file);
+        bdrv_unref_child(bs, file);
         file = NULL;
     }
 
@@ -1537,15 +1526,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
 
     bdrv_refresh_filename(bs);
 
-    /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
-     * temporary snapshot afterwards. */
-    if (snapshot_flags) {
-        ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
-        if (local_err) {
-            goto close_and_fail;
-        }
-    }
-
     /* Check if any unknown options were used */
     if (options && (qdict_size(options) != 0)) {
         const QDictEntry *entry = qdict_first(options);
@@ -1577,11 +1557,21 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
 
     QDECREF(options);
     *pbs = bs;
+
+    /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
+     * temporary snapshot afterwards. */
+    if (snapshot_flags) {
+        ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
+        if (local_err) {
+            goto close_and_fail;
+        }
+    }
+
     return 0;
 
 fail:
     if (file != NULL) {
-        bdrv_unref(file);
+        bdrv_unref_child(bs, file);
     }
     QDECREF(bs->options);
     QDECREF(options);
@@ -1922,11 +1912,13 @@ void bdrv_close(BlockDriverState *bs)
         BdrvChild *child, *next;
 
         bs->drv->bdrv_close(bs);
+        bs->drv = NULL;
 
-        if (bs->backing_hd) {
-            BlockDriverState *backing_hd = bs->backing_hd;
-            bdrv_set_backing_hd(bs, NULL);
-            bdrv_unref(backing_hd);
+        bdrv_set_backing_hd(bs, NULL);
+
+        if (bs->file != NULL) {
+            bdrv_unref_child(bs, bs->file);
+            bs->file = NULL;
         }
 
         QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
@@ -1940,7 +1932,6 @@ void bdrv_close(BlockDriverState *bs)
 
         g_free(bs->opaque);
         bs->opaque = NULL;
-        bs->drv = NULL;
         bs->copy_on_read = 0;
         bs->backing_file[0] = '\0';
         bs->backing_format[0] = '\0';
@@ -1953,11 +1944,6 @@ void bdrv_close(BlockDriverState *bs)
         bs->options = NULL;
         QDECREF(bs->full_open_options);
         bs->full_open_options = NULL;
-
-        if (bs->file != NULL) {
-            bdrv_unref(bs->file);
-            bs->file = NULL;
-        }
     }
 
     if (bs->blk) {
@@ -2005,13 +1991,7 @@ void bdrv_make_anon(BlockDriverState *bs)
     bs->node_name[0] = '\0';
 }
 
-static void bdrv_rebind(BlockDriverState *bs)
-{
-    if (bs->drv && bs->drv->bdrv_rebind) {
-        bs->drv->bdrv_rebind(bs);
-    }
-}
-
+/* Fields that need to stay with the top-level BDS */
 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
                                      BlockDriverState *bs_src)
 {
@@ -2023,20 +2003,6 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
 
     bs_dest->enable_write_cache = bs_src->enable_write_cache;
 
-    /* i/o throttled req */
-    bs_dest->throttle_state     = bs_src->throttle_state,
-    bs_dest->io_limits_enabled  = bs_src->io_limits_enabled;
-    bs_dest->pending_reqs[0]    = bs_src->pending_reqs[0];
-    bs_dest->pending_reqs[1]    = bs_src->pending_reqs[1];
-    bs_dest->throttled_reqs[0]  = bs_src->throttled_reqs[0];
-    bs_dest->throttled_reqs[1]  = bs_src->throttled_reqs[1];
-    memcpy(&bs_dest->round_robin,
-           &bs_src->round_robin,
-           sizeof(bs_dest->round_robin));
-    memcpy(&bs_dest->throttle_timers,
-           &bs_src->throttle_timers,
-           sizeof(ThrottleTimers));
-
     /* r/w error */
     bs_dest->on_read_error      = bs_src->on_read_error;
     bs_dest->on_write_error     = bs_src->on_write_error;
@@ -2047,125 +2013,45 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
 
     /* dirty bitmap */
     bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
-
-    /* reference count */
-    bs_dest->refcnt             = bs_src->refcnt;
-
-    /* job */
-    bs_dest->job                = bs_src->job;
-
-    /* keep the same entry in bdrv_states */
-    bs_dest->device_list = bs_src->device_list;
-    bs_dest->blk = bs_src->blk;
-
-    memcpy(bs_dest->op_blockers, bs_src->op_blockers,
-           sizeof(bs_dest->op_blockers));
 }
 
-/*
- * Swap bs contents for two image chains while they are live,
- * while keeping required fields on the BlockDriverState that is
- * actually attached to a device.
- *
- * This will modify the BlockDriverState fields, and swap contents
- * between bs_new and bs_old. Both bs_new and bs_old are modified.
- *
- * bs_new must not be attached to a BlockBackend.
- *
- * This function does not create any image files.
- */
-void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
+static void change_parent_backing_link(BlockDriverState *from,
+                                       BlockDriverState *to)
 {
-    BlockDriverState tmp;
-    BdrvChild *child;
-
-    bdrv_drain(bs_new);
-    bdrv_drain(bs_old);
+    BdrvChild *c, *next;
 
-    /* The code needs to swap the node_name but simply swapping node_list won't
-     * work so first remove the nodes from the graph list, do the swap then
-     * insert them back if needed.
-     */
-    if (bs_new->node_name[0] != '\0') {
-        QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
+    QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
+        assert(c->role != &child_backing);
+        c->bs = to;
+        QLIST_REMOVE(c, next_parent);
+        QLIST_INSERT_HEAD(&to->parents, c, next_parent);
+        bdrv_ref(to);
+        bdrv_unref(from);
     }
-    if (bs_old->node_name[0] != '\0') {
-        QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
-    }
-
-    /* If the BlockDriverState is part of a throttling group acquire
-     * its lock since we're going to mess with the protected fields.
-     * Otherwise there's no need to worry since no one else can touch
-     * them. */
-    if (bs_old->throttle_state) {
-        throttle_group_lock(bs_old);
+    if (from->blk) {
+        blk_set_bs(from->blk, to);
+        if (!to->device_list.tqe_prev) {
+            QTAILQ_INSERT_BEFORE(from, to, device_list);
+        }
+        QTAILQ_REMOVE(&bdrv_states, from, device_list);
     }
+}
 
-    /* bs_new must be unattached and shouldn't have anything fancy enabled */
-    assert(!bs_new->blk);
-    assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
-    assert(bs_new->job == NULL);
-    assert(bs_new->io_limits_enabled == false);
-    assert(bs_new->throttle_state == NULL);
-    assert(!throttle_timers_are_initialized(&bs_new->throttle_timers));
-
-    tmp = *bs_new;
-    *bs_new = *bs_old;
-    *bs_old = tmp;
+static void swap_feature_fields(BlockDriverState *bs_top,
+                                BlockDriverState *bs_new)
+{
+    BlockDriverState tmp;
 
-    /* there are some fields that should not be swapped, move them back */
-    bdrv_move_feature_fields(&tmp, bs_old);
-    bdrv_move_feature_fields(bs_old, bs_new);
+    bdrv_move_feature_fields(&tmp, bs_top);
+    bdrv_move_feature_fields(bs_top, bs_new);
     bdrv_move_feature_fields(bs_new, &tmp);
 
-    /* bs_new must remain unattached */
-    assert(!bs_new->blk);
-
-    /* Check a few fields that should remain attached to the device */
-    assert(bs_new->job == NULL);
-    assert(bs_new->io_limits_enabled == false);
-    assert(bs_new->throttle_state == NULL);
-    assert(!throttle_timers_are_initialized(&bs_new->throttle_timers));
-
-    /* Release the ThrottleGroup lock */
-    if (bs_old->throttle_state) {
-        throttle_group_unlock(bs_old);
-    }
-
-    /* insert the nodes back into the graph node list if needed */
-    if (bs_new->node_name[0] != '\0') {
-        QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
-    }
-    if (bs_old->node_name[0] != '\0') {
-        QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
+    assert(!bs_new->throttle_state);
+    if (bs_top->throttle_state) {
+        assert(bs_top->io_limits_enabled);
+        bdrv_io_limits_enable(bs_new, throttle_group_get_name(bs_top));
+        bdrv_io_limits_disable(bs_top);
     }
-
-    /*
-     * Update lh_first.le_prev for non-empty lists.
-     *
-     * The head of the op blocker list doesn't change because it is moved back
-     * in bdrv_move_feature_fields().
-     */
-    assert(QLIST_EMPTY(&bs_old->tracked_requests));
-    assert(QLIST_EMPTY(&bs_new->tracked_requests));
-
-    QLIST_FIX_HEAD_PTR(&bs_new->children, next);
-    QLIST_FIX_HEAD_PTR(&bs_old->children, next);
-
-    /* Update references in bs->opaque and children */
-    QLIST_FOREACH(child, &bs_old->children, next) {
-        if (child->bs->inherits_from == bs_new) {
-            child->bs->inherits_from = bs_old;
-        }
-    }
-    QLIST_FOREACH(child, &bs_new->children, next) {
-        if (child->bs->inherits_from == bs_old) {
-            child->bs->inherits_from = bs_new;
-        }
-    }
-
-    bdrv_rebind(bs_new);
-    bdrv_rebind(bs_old);
 }
 
 /*
@@ -2178,14 +2064,59 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
  * bs_new must not be attached to a BlockBackend.
  *
  * This function does not create any image files.
+ *
+ * bdrv_append() takes ownership of a bs_new reference and unrefs it because
+ * that's what the callers commonly need. bs_new will be referenced by the old
+ * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
+ * reference of its own, it must call bdrv_ref().
  */
 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
 {
-    bdrv_swap(bs_new, bs_top);
+    assert(!bdrv_requests_pending(bs_top));
+    assert(!bdrv_requests_pending(bs_new));
+
+    bdrv_ref(bs_top);
+    change_parent_backing_link(bs_top, bs_new);
+
+    /* Some fields always stay on top of the backing file chain */
+    swap_feature_fields(bs_top, bs_new);
+
+    bdrv_set_backing_hd(bs_new, bs_top);
+    bdrv_unref(bs_top);
+
+    /* bs_new is now referenced by its new parents, we don't need the
+     * additional reference any more. */
+    bdrv_unref(bs_new);
+}
+
+void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
+{
+    assert(!bdrv_requests_pending(old));
+    assert(!bdrv_requests_pending(new));
+
+    bdrv_ref(old);
 
-    /* The contents of 'tmp' will become bs_top, as we are
-     * swapping bs_new and bs_top contents. */
-    bdrv_set_backing_hd(bs_top, bs_new);
+    if (old->blk) {
+        /* As long as these fields aren't in BlockBackend, but in the top-level
+         * BlockDriverState, it's not possible for a BDS to have two BBs.
+         *
+         * We really want to copy the fields from old to new, but we go for a
+         * swap instead so that pointers aren't duplicated and cause trouble.
+         * (Also, bdrv_swap() used to do the same.) */
+        assert(!new->blk);
+        swap_feature_fields(old, new);
+    }
+    change_parent_backing_link(old, new);
+
+    /* Change backing files if a previously independent node is added to the
+     * chain. For active commit, we replace top by its own (indirect) backing
+     * file and don't do anything here so we don't build a loop. */
+    if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
+        bdrv_set_backing_hd(new, backing_bs(old));
+        bdrv_set_backing_hd(old, NULL);
+    }
+
+    bdrv_unref(old);
 }
 
 static void bdrv_delete(BlockDriverState *bs)
@@ -2237,20 +2168,20 @@ int bdrv_commit(BlockDriverState *bs)
     if (!drv)
         return -ENOMEDIUM;
 
-    if (!bs->backing_hd) {
+    if (!bs->backing) {
         return -ENOTSUP;
     }
 
     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
-        bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
+        bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
         return -EBUSY;
     }
 
-    ro = bs->backing_hd->read_only;
-    open_flags =  bs->backing_hd->open_flags;
+    ro = bs->backing->bs->read_only;
+    open_flags =  bs->backing->bs->open_flags;
 
     if (ro) {
-        if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
+        if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
             return -EACCES;
         }
     }
@@ -2261,7 +2192,7 @@ int bdrv_commit(BlockDriverState *bs)
         goto ro_cleanup;
     }
 
-    backing_length = bdrv_getlength(bs->backing_hd);
+    backing_length = bdrv_getlength(bs->backing->bs);
     if (backing_length < 0) {
         ret = backing_length;
         goto ro_cleanup;
@@ -2271,7 +2202,7 @@ int bdrv_commit(BlockDriverState *bs)
      * grow the backing file image if possible.  If not possible,
      * we must return an error */
     if (length > backing_length) {
-        ret = bdrv_truncate(bs->backing_hd, length);
+        ret = bdrv_truncate(bs->backing->bs, length);
         if (ret < 0) {
             goto ro_cleanup;
         }
@@ -2280,7 +2211,7 @@ int bdrv_commit(BlockDriverState *bs)
     total_sectors = length >> BDRV_SECTOR_BITS;
 
     /* qemu_try_blockalign() for bs will choose an alignment that works for
-     * bs->backing_hd as well, so no need to compare the alignment manually. */
+     * bs->backing->bs as well, so no need to compare the alignment manually. */
     buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
     if (buf == NULL) {
         ret = -ENOMEM;
@@ -2298,7 +2229,7 @@ int bdrv_commit(BlockDriverState *bs)
                 goto ro_cleanup;
             }
 
-            ret = bdrv_write(bs->backing_hd, sector, buf, n);
+            ret = bdrv_write(bs->backing->bs, sector, buf, n);
             if (ret < 0) {
                 goto ro_cleanup;
             }
@@ -2317,8 +2248,8 @@ int bdrv_commit(BlockDriverState *bs)
      * Make sure all data we wrote to the backing device is actually
      * stable on disk.
      */
-    if (bs->backing_hd) {
-        bdrv_flush(bs->backing_hd);
+    if (bs->backing) {
+        bdrv_flush(bs->backing->bs);
     }
 
     ret = 0;
@@ -2327,7 +2258,7 @@ ro_cleanup:
 
     if (ro) {
         /* ignoring error return here */
-        bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
+        bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
     }
 
     return ret;
@@ -2341,7 +2272,7 @@ int bdrv_commit_all(void)
         AioContext *aio_context = bdrv_get_aio_context(bs);
 
         aio_context_acquire(aio_context);
-        if (bs->drv && bs->backing_hd) {
+        if (bs->drv && bs->backing) {
             int ret = bdrv_commit(bs);
             if (ret < 0) {
                 aio_context_release(aio_context);
@@ -2398,8 +2329,8 @@ int bdrv_change_backing_file(BlockDriverState *bs,
 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
                                     BlockDriverState *bs)
 {
-    while (active && bs != active->backing_hd) {
-        active = active->backing_hd;
+    while (active && bs != backing_bs(active)) {
+        active = backing_bs(active);
     }
 
     return active;
@@ -2411,12 +2342,6 @@ BlockDriverState *bdrv_find_base(BlockDriverState *bs)
     return bdrv_find_overlay(bs, NULL);
 }
 
-typedef struct BlkIntermediateStates {
-    BlockDriverState *bs;
-    QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
-} BlkIntermediateStates;
-
-
 /*
  * Drops images above 'base' up to and including 'top', and sets the image
  * above 'top' to have base as its backing file.
@@ -2449,15 +2374,9 @@ typedef struct BlkIntermediateStates {
 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
                            BlockDriverState *base, const char *backing_file_str)
 {
-    BlockDriverState *intermediate;
-    BlockDriverState *base_bs = NULL;
     BlockDriverState *new_top_bs = NULL;
-    BlkIntermediateStates *intermediate_state, *next;
     int ret = -EIO;
 
-    QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
-    QSIMPLEQ_INIT(&states_to_delete);
-
     if (!top->drv || !base->drv) {
         goto exit;
     }
@@ -2469,55 +2388,29 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
         goto exit;
     }
 
-    /* special case of new_top_bs->backing_hd already pointing to base - nothing
+    /* special case of new_top_bs->backing->bs already pointing to base - nothing
      * to do, no intermediate images */
-    if (new_top_bs->backing_hd == base) {
+    if (backing_bs(new_top_bs) == base) {
         ret = 0;
         goto exit;
     }
 
-    intermediate = top;
-
-    /* now we will go down through the list, and add each BDS we find
-     * into our deletion queue, until we hit the 'base'
-     */
-    while (intermediate) {
-        intermediate_state = g_new0(BlkIntermediateStates, 1);
-        intermediate_state->bs = intermediate;
-        QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
-
-        if (intermediate->backing_hd == base) {
-            base_bs = intermediate->backing_hd;
-            break;
-        }
-        intermediate = intermediate->backing_hd;
-    }
-    if (base_bs == NULL) {
-        /* something went wrong, we did not end at the base. safely
-         * unravel everything, and exit with error */
+    /* Make sure that base is in the backing chain of top */
+    if (!bdrv_chain_contains(top, base)) {
         goto exit;
     }
 
     /* success - we can delete the intermediate states, and link top->base */
-    backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
+    backing_file_str = backing_file_str ? backing_file_str : base->filename;
     ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
-                                   base_bs->drv ? base_bs->drv->format_name : "");
+                                   base->drv ? base->drv->format_name : "");
     if (ret) {
         goto exit;
     }
-    bdrv_set_backing_hd(new_top_bs, base_bs);
+    bdrv_set_backing_hd(new_top_bs, base);
 
-    QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
-        /* so that bdrv_close() does not recursively close the chain */
-        bdrv_set_backing_hd(intermediate_state->bs, NULL);
-        bdrv_unref(intermediate_state->bs);
-    }
     ret = 0;
-
 exit:
-    QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
-        g_free(intermediate_state);
-    }
     return ret;
 }
 
@@ -2560,7 +2453,7 @@ int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
         return drv->bdrv_get_allocated_file_size(bs);
     }
     if (bs->file) {
-        return bdrv_get_allocated_file_size(bs->file);
+        return bdrv_get_allocated_file_size(bs->file->bs);
     }
     return -ENOTSUP;
 }
@@ -2709,25 +2602,27 @@ void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
 
 int bdrv_is_encrypted(BlockDriverState *bs)
 {
-    if (bs->backing_hd && bs->backing_hd->encrypted)
+    if (bs->backing && bs->backing->bs->encrypted) {
         return 1;
+    }
     return bs->encrypted;
 }
 
 int bdrv_key_required(BlockDriverState *bs)
 {
-    BlockDriverState *backing_hd = bs->backing_hd;
+    BdrvChild *backing = bs->backing;
 
-    if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
+    if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
         return 1;
+    }
     return (bs->encrypted && !bs->valid_key);
 }
 
 int bdrv_set_key(BlockDriverState *bs, const char *key)
 {
     int ret;
-    if (bs->backing_hd && bs->backing_hd->encrypted) {
-        ret = bdrv_set_key(bs->backing_hd, key);
+    if (bs->backing && bs->backing->bs->encrypted) {
+        ret = bdrv_set_key(bs->backing->bs, key);
         if (ret < 0)
             return ret;
         if (!bs->encrypted)
@@ -2894,7 +2789,7 @@ BlockDriverState *bdrv_lookup_bs(const char *device,
 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
 {
     while (top && top != base) {
-        top = top->backing_hd;
+        top = backing_bs(top);
     }
 
     return top != NULL;
@@ -2952,7 +2847,7 @@ int bdrv_has_zero_init(BlockDriverState *bs)
 
     /* If BS is a copy on write image, it is initialized to
        the contents of the base image, which may not be zeroes.  */
-    if (bs->backing_hd) {
+    if (bs->backing) {
         return 0;
     }
     if (bs->drv->bdrv_has_zero_init) {
@@ -2967,7 +2862,7 @@ bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
 {
     BlockDriverInfo bdi;
 
-    if (bs->backing_hd) {
+    if (bs->backing) {
         return false;
     }
 
@@ -2982,7 +2877,7 @@ bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
 {
     BlockDriverInfo bdi;
 
-    if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
+    if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) {
         return false;
     }
 
@@ -2995,7 +2890,7 @@ bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
 
 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
 {
-    if (bs->backing_hd && bs->backing_hd->encrypted)
+    if (bs->backing && bs->backing->bs->encrypted)
         return bs->backing_file;
     else if (bs->encrypted)
         return bs->filename;
@@ -3042,7 +2937,7 @@ int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
                           const char *tag)
 {
     while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
-        bs = bs->file;
+        bs = bs->file ? bs->file->bs : NULL;
     }
 
     if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
@@ -3055,7 +2950,7 @@ int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
 {
     while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
-        bs = bs->file;
+        bs = bs->file ? bs->file->bs : NULL;
     }
 
     if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
@@ -3068,7 +2963,7 @@ int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
 {
     while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
-        bs = bs->file;
+        bs = bs->file ? bs->file->bs : NULL;
     }
 
     if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
@@ -3081,7 +2976,7 @@ int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
 {
     while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
-        bs = bs->file;
+        bs = bs->file ? bs->file->bs : NULL;
     }
 
     if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
@@ -3120,13 +3015,13 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
 
     is_protocol = path_has_protocol(backing_file);
 
-    for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
+    for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) {
 
         /* If either of the filename paths is actually a protocol, then
          * compare unmodified paths; otherwise make paths relative */
         if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
             if (strcmp(backing_file, curr_bs->backing_file) == 0) {
-                retval = curr_bs->backing_hd;
+                retval = curr_bs->backing->bs;
                 break;
             }
         } else {
@@ -3150,7 +3045,7 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
             }
 
             if (strcmp(backing_file_full, filename_full) == 0) {
-                retval = curr_bs->backing_hd;
+                retval = curr_bs->backing->bs;
                 break;
             }
         }
@@ -3168,11 +3063,11 @@ int bdrv_get_backing_file_depth(BlockDriverState *bs)
         return 0;
     }
 
-    if (!bs->backing_hd) {
+    if (!bs->backing) {
         return 0;
     }
 
-    return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
+    return 1 + bdrv_get_backing_file_depth(bs->backing->bs);
 }
 
 void bdrv_init(void)
@@ -3203,7 +3098,7 @@ void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
     if (bs->drv->bdrv_invalidate_cache) {
         bs->drv->bdrv_invalidate_cache(bs, &local_err);
     } else if (bs->file) {
-        bdrv_invalidate_cache(bs->file, &local_err);
+        bdrv_invalidate_cache(bs->file->bs, &local_err);
     }
     if (local_err) {
         error_propagate(errp, local_err);
@@ -3933,10 +3828,10 @@ void bdrv_detach_aio_context(BlockDriverState *bs)
         bs->drv->bdrv_detach_aio_context(bs);
     }
     if (bs->file) {
-        bdrv_detach_aio_context(bs->file);
+        bdrv_detach_aio_context(bs->file->bs);
     }
-    if (bs->backing_hd) {
-        bdrv_detach_aio_context(bs->backing_hd);
+    if (bs->backing) {
+        bdrv_detach_aio_context(bs->backing->bs);
     }
 
     bs->aio_context = NULL;
@@ -3953,11 +3848,11 @@ void bdrv_attach_aio_context(BlockDriverState *bs,
 
     bs->aio_context = new_context;
 
-    if (bs->backing_hd) {
-        bdrv_attach_aio_context(bs->backing_hd, new_context);
+    if (bs->backing) {
+        bdrv_attach_aio_context(bs->backing->bs, new_context);
     }
     if (bs->file) {
-        bdrv_attach_aio_context(bs->file, new_context);
+        bdrv_attach_aio_context(bs->file->bs, new_context);
     }
     if (bs->drv->bdrv_attach_aio_context) {
         bs->drv->bdrv_attach_aio_context(bs, new_context);
@@ -4169,7 +4064,7 @@ void bdrv_refresh_filename(BlockDriverState *bs)
     /* This BDS's file name will most probably depend on its file's name, so
      * refresh that first */
     if (bs->file) {
-        bdrv_refresh_filename(bs->file);
+        bdrv_refresh_filename(bs->file->bs);
     }
 
     if (drv->bdrv_refresh_filename) {
@@ -4197,19 +4092,20 @@ void bdrv_refresh_filename(BlockDriverState *bs)
 
         /* If no specific options have been given for this BDS, the filename of
          * the underlying file should suffice for this one as well */
-        if (bs->file->exact_filename[0] && !has_open_options) {
-            strcpy(bs->exact_filename, bs->file->exact_filename);
+        if (bs->file->bs->exact_filename[0] && !has_open_options) {
+            strcpy(bs->exact_filename, bs->file->bs->exact_filename);
         }
         /* Reconstructing the full options QDict is simple for most format block
          * drivers, as long as the full options are known for the underlying
          * file BDS. The full options QDict of that file BDS should somehow
          * contain a representation of the filename, therefore the following
          * suffices without querying the (exact_)filename of this BDS. */
-        if (bs->file->full_open_options) {
+        if (bs->file->bs->full_open_options) {
             qdict_put_obj(opts, "driver",
                           QOBJECT(qstring_from_str(drv->format_name)));
-            QINCREF(bs->file->full_open_options);
-            qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
+            QINCREF(bs->file->bs->full_open_options);
+            qdict_put_obj(opts, "file",
+                          QOBJECT(bs->file->bs->full_open_options));
 
             bs->full_open_options = opts;
         } else {
diff --git a/block/blkdebug.c b/block/blkdebug.c
index bc247f46f5..6860a2ba2f 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -426,11 +426,11 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
     /* Set initial state */
     s->state = 1;
 
-    /* Open the backing file */
-    assert(bs->file == NULL);
-    ret = bdrv_open_image(&bs->file, qemu_opt_get(opts, "x-image"), options, "image",
-                          bs, &child_file, false, &local_err);
-    if (ret < 0) {
+    /* Open the image file */
+    bs->file = bdrv_open_child(qemu_opt_get(opts, "x-image"), options, "image",
+                               bs, &child_file, false, &local_err);
+    if (local_err) {
+        ret = -EINVAL;
         error_propagate(errp, local_err);
         goto out;
     }
@@ -449,7 +449,7 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
     goto out;
 
 fail_unref:
-    bdrv_unref(bs->file);
+    bdrv_unref_child(bs, bs->file);
 out:
     qemu_opts_del(opts);
     return ret;
@@ -510,7 +510,8 @@ static BlockAIOCB *blkdebug_aio_readv(BlockDriverState *bs,
         return inject_error(bs, cb, opaque, rule);
     }
 
-    return bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors, cb, opaque);
+    return bdrv_aio_readv(bs->file->bs, sector_num, qiov, nb_sectors,
+                          cb, opaque);
 }
 
 static BlockAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
@@ -532,7 +533,8 @@ static BlockAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
         return inject_error(bs, cb, opaque, rule);
     }
 
-    return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors, cb, opaque);
+    return bdrv_aio_writev(bs->file->bs, sector_num, qiov, nb_sectors,
+                           cb, opaque);
 }
 
 static BlockAIOCB *blkdebug_aio_flush(BlockDriverState *bs,
@@ -551,7 +553,7 @@ static BlockAIOCB *blkdebug_aio_flush(BlockDriverState *bs,
         return inject_error(bs, cb, opaque, rule);
     }
 
-    return bdrv_aio_flush(bs->file, cb, opaque);
+    return bdrv_aio_flush(bs->file->bs, cb, opaque);
 }
 
 
@@ -716,12 +718,12 @@ static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag)
 
 static int64_t blkdebug_getlength(BlockDriverState *bs)
 {
-    return bdrv_getlength(bs->file);
+    return bdrv_getlength(bs->file->bs);
 }
 
 static int blkdebug_truncate(BlockDriverState *bs, int64_t offset)
 {
-    return bdrv_truncate(bs->file, offset);
+    return bdrv_truncate(bs->file->bs, offset);
 }
 
 static void blkdebug_refresh_filename(BlockDriverState *bs)
@@ -741,24 +743,24 @@ static void blkdebug_refresh_filename(BlockDriverState *bs)
         }
     }
 
-    if (force_json && !bs->file->full_open_options) {
+    if (force_json && !bs->file->bs->full_open_options) {
         /* The config file cannot be recreated, so creating a plain filename
          * is impossible */
         return;
     }
 
-    if (!force_json && bs->file->exact_filename[0]) {
+    if (!force_json && bs->file->bs->exact_filename[0]) {
         snprintf(bs->exact_filename, sizeof(bs->exact_filename),
                  "blkdebug:%s:%s",
                  qdict_get_try_str(bs->options, "config") ?: "",
-                 bs->file->exact_filename);
+                 bs->file->bs->exact_filename);
     }
 
     opts = qdict_new();
     qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("blkdebug")));
 
-    QINCREF(bs->file->full_open_options);
-    qdict_put_obj(opts, "image", QOBJECT(bs->file->full_open_options));
+    QINCREF(bs->file->bs->full_open_options);
+    qdict_put_obj(opts, "image", QOBJECT(bs->file->bs->full_open_options));
 
     for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) {
         if (strcmp(qdict_entry_key(e), "x-image") &&
diff --git a/block/blkverify.c b/block/blkverify.c
index d277e63220..c5f8e8dcba 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -14,7 +14,7 @@
 #include "qapi/qmp/qstring.h"
 
 typedef struct {
-    BlockDriverState *test_file;
+    BdrvChild *test_file;
 } BDRVBlkverifyState;
 
 typedef struct BlkverifyAIOCB BlkverifyAIOCB;
@@ -123,26 +123,29 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     /* Open the raw file */
-    assert(bs->file == NULL);
-    ret = bdrv_open_image(&bs->file, qemu_opt_get(opts, "x-raw"), options,
-                          "raw", bs, &child_file, false, &local_err);
-    if (ret < 0) {
+    bs->file = bdrv_open_child(qemu_opt_get(opts, "x-raw"), options, "raw",
+                               bs, &child_file, false, &local_err);
+    if (local_err) {
+        ret = -EINVAL;
         error_propagate(errp, local_err);
         goto fail;
     }
 
     /* Open the test file */
-    assert(s->test_file == NULL);
-    ret = bdrv_open_image(&s->test_file, qemu_opt_get(opts, "x-image"), options,
-                          "test", bs, &child_format, false, &local_err);
-    if (ret < 0) {
+    s->test_file = bdrv_open_child(qemu_opt_get(opts, "x-image"), options,
+                                   "test", bs, &child_format, false,
+                                   &local_err);
+    if (local_err) {
+        ret = -EINVAL;
         error_propagate(errp, local_err);
-        s->test_file = NULL;
         goto fail;
     }
 
     ret = 0;
 fail:
+    if (ret < 0) {
+        bdrv_unref_child(bs, bs->file);
+    }
     qemu_opts_del(opts);
     return ret;
 }
@@ -151,7 +154,7 @@ static void blkverify_close(BlockDriverState *bs)
 {
     BDRVBlkverifyState *s = bs->opaque;
 
-    bdrv_unref(s->test_file);
+    bdrv_unref_child(bs, s->test_file);
     s->test_file = NULL;
 }
 
@@ -159,7 +162,7 @@ static int64_t blkverify_getlength(BlockDriverState *bs)
 {
     BDRVBlkverifyState *s = bs->opaque;
 
-    return bdrv_getlength(s->test_file);
+    return bdrv_getlength(s->test_file->bs);
 }
 
 static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
@@ -238,13 +241,13 @@ static BlockAIOCB *blkverify_aio_readv(BlockDriverState *bs,
                                             nb_sectors, cb, opaque);
 
     acb->verify = blkverify_verify_readv;
-    acb->buf = qemu_blockalign(bs->file, qiov->size);
+    acb->buf = qemu_blockalign(bs->file->bs, qiov->size);
     qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov);
     qemu_iovec_clone(&acb->raw_qiov, qiov, acb->buf);
 
-    bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors,
+    bdrv_aio_readv(s->test_file->bs, sector_num, qiov, nb_sectors,
                    blkverify_aio_cb, acb);
-    bdrv_aio_readv(bs->file, sector_num, &acb->raw_qiov, nb_sectors,
+    bdrv_aio_readv(bs->file->bs, sector_num, &acb->raw_qiov, nb_sectors,
                    blkverify_aio_cb, acb);
     return &acb->common;
 }
@@ -257,9 +260,9 @@ static BlockAIOCB *blkverify_aio_writev(BlockDriverState *bs,
     BlkverifyAIOCB *acb = blkverify_aio_get(bs, true, sector_num, qiov,
                                             nb_sectors, cb, opaque);
 
-    bdrv_aio_writev(s->test_file, sector_num, qiov, nb_sectors,
+    bdrv_aio_writev(s->test_file->bs, sector_num, qiov, nb_sectors,
                     blkverify_aio_cb, acb);
-    bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
+    bdrv_aio_writev(bs->file->bs, sector_num, qiov, nb_sectors,
                     blkverify_aio_cb, acb);
     return &acb->common;
 }
@@ -271,7 +274,7 @@ static BlockAIOCB *blkverify_aio_flush(BlockDriverState *bs,
     BDRVBlkverifyState *s = bs->opaque;
 
     /* Only flush test file, the raw file is not important */
-    return bdrv_aio_flush(s->test_file, cb, opaque);
+    return bdrv_aio_flush(s->test_file->bs, cb, opaque);
 }
 
 static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
@@ -279,13 +282,13 @@ static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
 {
     BDRVBlkverifyState *s = bs->opaque;
 
-    bool perm = bdrv_recurse_is_first_non_filter(bs->file, candidate);
+    bool perm = bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
 
     if (perm) {
         return true;
     }
 
-    return bdrv_recurse_is_first_non_filter(s->test_file, candidate);
+    return bdrv_recurse_is_first_non_filter(s->test_file->bs, candidate);
 }
 
 /* Propagate AioContext changes to ->test_file */
@@ -293,7 +296,7 @@ static void blkverify_detach_aio_context(BlockDriverState *bs)
 {
     BDRVBlkverifyState *s = bs->opaque;
 
-    bdrv_detach_aio_context(s->test_file);
+    bdrv_detach_aio_context(s->test_file->bs);
 }
 
 static void blkverify_attach_aio_context(BlockDriverState *bs,
@@ -301,32 +304,38 @@ static void blkverify_attach_aio_context(BlockDriverState *bs,
 {
     BDRVBlkverifyState *s = bs->opaque;
 
-    bdrv_attach_aio_context(s->test_file, new_context);
+    bdrv_attach_aio_context(s->test_file->bs, new_context);
 }
 
 static void blkverify_refresh_filename(BlockDriverState *bs)
 {
     BDRVBlkverifyState *s = bs->opaque;
 
-    /* bs->file has already been refreshed */
-    bdrv_refresh_filename(s->test_file);
+    /* bs->file->bs has already been refreshed */
+    bdrv_refresh_filename(s->test_file->bs);
 
-    if (bs->file->full_open_options && s->test_file->full_open_options) {
+    if (bs->file->bs->full_open_options
+        && s->test_file->bs->full_open_options)
+    {
         QDict *opts = qdict_new();
         qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("blkverify")));
 
-        QINCREF(bs->file->full_open_options);
-        qdict_put_obj(opts, "raw", QOBJECT(bs->file->full_open_options));
-        QINCREF(s->test_file->full_open_options);
-        qdict_put_obj(opts, "test", QOBJECT(s->test_file->full_open_options));
+        QINCREF(bs->file->bs->full_open_options);
+        qdict_put_obj(opts, "raw", QOBJECT(bs->file->bs->full_open_options));
+        QINCREF(s->test_file->bs->full_open_options);
+        qdict_put_obj(opts, "test",
+                      QOBJECT(s->test_file->bs->full_open_options));
 
         bs->full_open_options = opts;
     }
 
-    if (bs->file->exact_filename[0] && s->test_file->exact_filename[0]) {
+    if (bs->file->bs->exact_filename[0]
+        && s->test_file->bs->exact_filename[0])
+    {
         snprintf(bs->exact_filename, sizeof(bs->exact_filename),
                  "blkverify:%s:%s",
-                 bs->file->exact_filename, s->test_file->exact_filename);
+                 bs->file->bs->exact_filename,
+                 s->test_file->bs->exact_filename);
     }
 }
 
diff --git a/block/block-backend.c b/block/block-backend.c
index c2e873292a..225655126e 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -239,6 +239,23 @@ BlockDriverState *blk_bs(BlockBackend *blk)
 }
 
 /*
+ * Changes the BlockDriverState attached to @blk
+ */
+void blk_set_bs(BlockBackend *blk, BlockDriverState *bs)
+{
+    bdrv_ref(bs);
+
+    if (blk->bs) {
+        blk->bs->blk = NULL;
+        bdrv_unref(blk->bs);
+    }
+    assert(bs->blk == NULL);
+
+    blk->bs = bs;
+    bs->blk = blk;
+}
+
+/*
  * Return @blk's DriveInfo if any, else null.
  */
 DriveInfo *blk_legacy_dinfo(BlockBackend *blk)
diff --git a/block/bochs.c b/block/bochs.c
index 199ac2b9af..18949b9d4f 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -103,7 +103,7 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
 
     bs->read_only = 1; // no write support yet
 
-    ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs));
+    ret = bdrv_pread(bs->file->bs, 0, &bochs, sizeof(bochs));
     if (ret < 0) {
         return ret;
     }
@@ -137,7 +137,7 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
         return -ENOMEM;
     }
 
-    ret = bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_bitmap,
+    ret = bdrv_pread(bs->file->bs, le32_to_cpu(bochs.header), s->catalog_bitmap,
                      s->catalog_size * 4);
     if (ret < 0) {
         goto fail;
@@ -206,7 +206,7 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
         (s->extent_blocks + s->bitmap_blocks));
 
     /* read in bitmap for current extent */
-    ret = bdrv_pread(bs->file, bitmap_offset + (extent_offset / 8),
+    ret = bdrv_pread(bs->file->bs, bitmap_offset + (extent_offset / 8),
                      &bitmap_entry, 1);
     if (ret < 0) {
         return ret;
@@ -229,7 +229,7 @@ static int bochs_read(BlockDriverState *bs, int64_t sector_num,
         if (block_offset < 0) {
             return block_offset;
         } else if (block_offset > 0) {
-            ret = bdrv_pread(bs->file, block_offset, buf, 512);
+            ret = bdrv_pread(bs->file->bs, block_offset, buf, 512);
             if (ret < 0) {
                 return ret;
             }
diff --git a/block/cloop.c b/block/cloop.c
index f328be06f8..4190ae06d7 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -66,7 +66,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
     bs->read_only = 1;
 
     /* read header */
-    ret = bdrv_pread(bs->file, 128, &s->block_size, 4);
+    ret = bdrv_pread(bs->file->bs, 128, &s->block_size, 4);
     if (ret < 0) {
         return ret;
     }
@@ -92,7 +92,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
         return -EINVAL;
     }
 
-    ret = bdrv_pread(bs->file, 128 + 4, &s->n_blocks, 4);
+    ret = bdrv_pread(bs->file->bs, 128 + 4, &s->n_blocks, 4);
     if (ret < 0) {
         return ret;
     }
@@ -123,7 +123,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
         return -ENOMEM;
     }
 
-    ret = bdrv_pread(bs->file, 128 + 4 + 4, s->offsets, offsets_size);
+    ret = bdrv_pread(bs->file->bs, 128 + 4 + 4, s->offsets, offsets_size);
     if (ret < 0) {
         goto fail;
     }
@@ -203,8 +203,8 @@ static inline int cloop_read_block(BlockDriverState *bs, int block_num)
         int ret;
         uint32_t bytes = s->offsets[block_num + 1] - s->offsets[block_num];
 
-        ret = bdrv_pread(bs->file, s->offsets[block_num], s->compressed_block,
-                         bytes);
+        ret = bdrv_pread(bs->file->bs, s->offsets[block_num],
+                         s->compressed_block, bytes);
         if (ret != bytes) {
             return -1;
         }
diff --git a/block/dmg.c b/block/dmg.c
index 9f2528169c..546a6f5330 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -85,7 +85,7 @@ static int read_uint64(BlockDriverState *bs, int64_t offset, uint64_t *result)
     uint64_t buffer;
     int ret;
 
-    ret = bdrv_pread(bs->file, offset, &buffer, 8);
+    ret = bdrv_pread(bs->file->bs, offset, &buffer, 8);
     if (ret < 0) {
         return ret;
     }
@@ -99,7 +99,7 @@ static int read_uint32(BlockDriverState *bs, int64_t offset, uint32_t *result)
     uint32_t buffer;
     int ret;
 
-    ret = bdrv_pread(bs->file, offset, &buffer, 4);
+    ret = bdrv_pread(bs->file->bs, offset, &buffer, 4);
     if (ret < 0) {
         return ret;
     }
@@ -354,7 +354,7 @@ static int dmg_read_resource_fork(BlockDriverState *bs, DmgHeaderState *ds,
         offset += 4;
 
         buffer = g_realloc(buffer, count);
-        ret = bdrv_pread(bs->file, offset, buffer, count);
+        ret = bdrv_pread(bs->file->bs, offset, buffer, count);
         if (ret < 0) {
             goto fail;
         }
@@ -391,7 +391,7 @@ static int dmg_read_plist_xml(BlockDriverState *bs, DmgHeaderState *ds,
 
     buffer = g_malloc(info_length + 1);
     buffer[info_length] = '\0';
-    ret = bdrv_pread(bs->file, info_begin, buffer, info_length);
+    ret = bdrv_pread(bs->file->bs, info_begin, buffer, info_length);
     if (ret != info_length) {
         ret = -EINVAL;
         goto fail;
@@ -446,7 +446,7 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
     ds.max_sectors_per_chunk = 1;
 
     /* locate the UDIF trailer */
-    offset = dmg_find_koly_offset(bs->file, errp);
+    offset = dmg_find_koly_offset(bs->file->bs, errp);
     if (offset < 0) {
         ret = offset;
         goto fail;
@@ -514,9 +514,9 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     /* initialize zlib engine */
-    s->compressed_chunk = qemu_try_blockalign(bs->file,
+    s->compressed_chunk = qemu_try_blockalign(bs->file->bs,
                                               ds.max_compressed_size + 1);
-    s->uncompressed_chunk = qemu_try_blockalign(bs->file,
+    s->uncompressed_chunk = qemu_try_blockalign(bs->file->bs,
                                                 512 * ds.max_sectors_per_chunk);
     if (s->compressed_chunk == NULL || s->uncompressed_chunk == NULL) {
         ret = -ENOMEM;
@@ -592,7 +592,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
         case 0x80000005: { /* zlib compressed */
             /* we need to buffer, because only the chunk as whole can be
              * inflated. */
-            ret = bdrv_pread(bs->file, s->offsets[chunk],
+            ret = bdrv_pread(bs->file->bs, s->offsets[chunk],
                              s->compressed_chunk, s->lengths[chunk]);
             if (ret != s->lengths[chunk]) {
                 return -1;
@@ -616,7 +616,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
         case 0x80000006: /* bzip2 compressed */
             /* we need to buffer, because only the chunk as whole can be
              * inflated. */
-            ret = bdrv_pread(bs->file, s->offsets[chunk],
+            ret = bdrv_pread(bs->file->bs, s->offsets[chunk],
                              s->compressed_chunk, s->lengths[chunk]);
             if (ret != s->lengths[chunk]) {
                 return -1;
@@ -641,7 +641,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
             break;
 #endif /* CONFIG_BZIP2 */
         case 1: /* copy */
-            ret = bdrv_pread(bs->file, s->offsets[chunk],
+            ret = bdrv_pread(bs->file->bs, s->offsets[chunk],
                              s->uncompressed_chunk, s->lengths[chunk]);
             if (ret != s->lengths[chunk]) {
                 return -1;
diff --git a/block/io.c b/block/io.c
index 17293c3dd3..5311473a1d 100644
--- a/block/io.c
+++ b/block/io.c
@@ -156,38 +156,38 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
 
     /* Take some limits from the children as a default */
     if (bs->file) {
-        bdrv_refresh_limits(bs->file, &local_err);
+        bdrv_refresh_limits(bs->file->bs, &local_err);
         if (local_err) {
             error_propagate(errp, local_err);
             return;
         }
-        bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
-        bs->bl.max_transfer_length = bs->file->bl.max_transfer_length;
-        bs->bl.min_mem_alignment = bs->file->bl.min_mem_alignment;
-        bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
+        bs->bl.opt_transfer_length = bs->file->bs->bl.opt_transfer_length;
+        bs->bl.max_transfer_length = bs->file->bs->bl.max_transfer_length;
+        bs->bl.min_mem_alignment = bs->file->bs->bl.min_mem_alignment;
+        bs->bl.opt_mem_alignment = bs->file->bs->bl.opt_mem_alignment;
     } else {
         bs->bl.min_mem_alignment = 512;
         bs->bl.opt_mem_alignment = getpagesize();
     }
 
-    if (bs->backing_hd) {
-        bdrv_refresh_limits(bs->backing_hd, &local_err);
+    if (bs->backing) {
+        bdrv_refresh_limits(bs->backing->bs, &local_err);
         if (local_err) {
             error_propagate(errp, local_err);
             return;
         }
         bs->bl.opt_transfer_length =
             MAX(bs->bl.opt_transfer_length,
-                bs->backing_hd->bl.opt_transfer_length);
+                bs->backing->bs->bl.opt_transfer_length);
         bs->bl.max_transfer_length =
             MIN_NON_ZERO(bs->bl.max_transfer_length,
-                         bs->backing_hd->bl.max_transfer_length);
+                         bs->backing->bs->bl.max_transfer_length);
         bs->bl.opt_mem_alignment =
             MAX(bs->bl.opt_mem_alignment,
-                bs->backing_hd->bl.opt_mem_alignment);
+                bs->backing->bs->bl.opt_mem_alignment);
         bs->bl.min_mem_alignment =
             MAX(bs->bl.min_mem_alignment,
-                bs->backing_hd->bl.min_mem_alignment);
+                bs->backing->bs->bl.min_mem_alignment);
     }
 
     /* Then let the driver override it */
@@ -213,7 +213,7 @@ void bdrv_disable_copy_on_read(BlockDriverState *bs)
 }
 
 /* Check if any requests are in-flight (including throttled requests) */
-static bool bdrv_requests_pending(BlockDriverState *bs)
+bool bdrv_requests_pending(BlockDriverState *bs)
 {
     if (!QLIST_EMPTY(&bs->tracked_requests)) {
         return true;
@@ -224,10 +224,10 @@ static bool bdrv_requests_pending(BlockDriverState *bs)
     if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
         return true;
     }
-    if (bs->file && bdrv_requests_pending(bs->file)) {
+    if (bs->file && bdrv_requests_pending(bs->file->bs)) {
         return true;
     }
-    if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
+    if (bs->backing && bdrv_requests_pending(bs->backing->bs)) {
         return true;
     }
     return false;
@@ -1137,13 +1137,13 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
     if (ret < 0) {
         /* Do nothing, write notifier decided to fail this request */
     } else if (flags & BDRV_REQ_ZERO_WRITE) {
-        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
+        bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
         ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
     } else {
-        BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
+        bdrv_debug_event(bs, BLKDBG_PWRITEV);
         ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
     }
-    BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
+    bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
 
     if (ret == 0 && !bs->enable_write_cache) {
         ret = bdrv_co_flush(bs);
@@ -1192,13 +1192,13 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
         /* RMW the unaligned part before head. */
         mark_request_serialising(req, align);
         wait_serialising_requests(req);
-        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
+        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
         ret = bdrv_aligned_preadv(bs, req, offset & ~(align - 1), align,
                                   align, &local_qiov, 0);
         if (ret < 0) {
             goto fail;
         }
-        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
+        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
 
         memset(buf + head_padding_bytes, 0, zero_bytes);
         ret = bdrv_aligned_pwritev(bs, req, offset & ~(align - 1), align,
@@ -1230,13 +1230,13 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
         /* RMW the unaligned part after tail. */
         mark_request_serialising(req, align);
         wait_serialising_requests(req);
-        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
+        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
         ret = bdrv_aligned_preadv(bs, req, offset, align,
                                   align, &local_qiov, 0);
         if (ret < 0) {
             goto fail;
         }
-        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
+        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
 
         memset(buf, 0, bytes);
         ret = bdrv_aligned_pwritev(bs, req, offset, align,
@@ -1307,13 +1307,13 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
         };
         qemu_iovec_init_external(&head_qiov, &head_iov, 1);
 
-        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
+        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
         ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
                                   align, &head_qiov, 0);
         if (ret < 0) {
             goto fail;
         }
-        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
+        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
 
         qemu_iovec_init(&local_qiov, qiov->niov + 2);
         qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
@@ -1341,13 +1341,13 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
         };
         qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
 
-        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
+        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
         ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
                                   align, &tail_qiov, 0);
         if (ret < 0) {
             goto fail;
         }
-        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
+        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
 
         if (!use_local_qiov) {
             qemu_iovec_init(&local_qiov, qiov->niov + 1);
@@ -1496,7 +1496,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
 
     if (ret & BDRV_BLOCK_RAW) {
         assert(ret & BDRV_BLOCK_OFFSET_VALID);
-        return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
+        return bdrv_get_block_status(bs->file->bs, ret >> BDRV_SECTOR_BITS,
                                      *pnum, pnum);
     }
 
@@ -1505,8 +1505,8 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
     } else {
         if (bdrv_unallocated_blocks_are_zero(bs)) {
             ret |= BDRV_BLOCK_ZERO;
-        } else if (bs->backing_hd) {
-            BlockDriverState *bs2 = bs->backing_hd;
+        } else if (bs->backing) {
+            BlockDriverState *bs2 = bs->backing->bs;
             int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
             if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
                 ret |= BDRV_BLOCK_ZERO;
@@ -1519,7 +1519,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
         (ret & BDRV_BLOCK_OFFSET_VALID)) {
         int file_pnum;
 
-        ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
+        ret2 = bdrv_co_get_block_status(bs->file->bs, ret >> BDRV_SECTOR_BITS,
                                         *pnum, &file_pnum);
         if (ret2 >= 0) {
             /* Ignore errors.  This is just providing extra information, it
@@ -1551,7 +1551,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status_above(BlockDriverState *bs,
     int64_t ret = 0;
 
     assert(bs != base);
-    for (p = bs; p != base; p = p->backing_hd) {
+    for (p = bs; p != base; p = backing_bs(p)) {
         ret = bdrv_co_get_block_status(p, sector_num, nb_sectors, pnum);
         if (ret < 0 || ret & BDRV_BLOCK_ALLOCATED) {
             break;
@@ -1614,7 +1614,7 @@ int64_t bdrv_get_block_status(BlockDriverState *bs,
                               int64_t sector_num,
                               int nb_sectors, int *pnum)
 {
-    return bdrv_get_block_status_above(bs, bs->backing_hd,
+    return bdrv_get_block_status_above(bs, backing_bs(bs),
                                        sector_num, nb_sectors, pnum);
 }
 
@@ -1672,7 +1672,7 @@ int bdrv_is_allocated_above(BlockDriverState *top,
             n = pnum_inter;
         }
 
-        intermediate = intermediate->backing_hd;
+        intermediate = backing_bs(intermediate);
     }
 
     *pnum = n;
@@ -1723,7 +1723,7 @@ int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
     } else if (drv->bdrv_save_vmstate) {
         return drv->bdrv_save_vmstate(bs, qiov, pos);
     } else if (bs->file) {
-        return bdrv_writev_vmstate(bs->file, qiov, pos);
+        return bdrv_writev_vmstate(bs->file->bs, qiov, pos);
     }
 
     return -ENOTSUP;
@@ -1738,7 +1738,7 @@ int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
     if (drv->bdrv_load_vmstate)
         return drv->bdrv_load_vmstate(bs, buf, pos, size);
     if (bs->file)
-        return bdrv_load_vmstate(bs->file, buf, pos, size);
+        return bdrv_load_vmstate(bs->file->bs, buf, pos, size);
     return -ENOTSUP;
 }
 
@@ -2366,7 +2366,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
      * in the case of cache=unsafe, so there are no useless flushes.
      */
 flush_parent:
-    return bdrv_co_flush(bs->file);
+    return bs->file ? bdrv_co_flush(bs->file->bs) : 0;
 }
 
 int bdrv_flush(BlockDriverState *bs)
@@ -2594,7 +2594,7 @@ void bdrv_io_plug(BlockDriverState *bs)
     if (drv && drv->bdrv_io_plug) {
         drv->bdrv_io_plug(bs);
     } else if (bs->file) {
-        bdrv_io_plug(bs->file);
+        bdrv_io_plug(bs->file->bs);
     }
 }
 
@@ -2604,7 +2604,7 @@ void bdrv_io_unplug(BlockDriverState *bs)
     if (drv && drv->bdrv_io_unplug) {
         drv->bdrv_io_unplug(bs);
     } else if (bs->file) {
-        bdrv_io_unplug(bs->file);
+        bdrv_io_unplug(bs->file->bs);
     }
 }
 
@@ -2614,7 +2614,7 @@ void bdrv_flush_io_queue(BlockDriverState *bs)
     if (drv && drv->bdrv_flush_io_queue) {
         drv->bdrv_flush_io_queue(bs);
     } else if (bs->file) {
-        bdrv_flush_io_queue(bs->file);
+        bdrv_flush_io_queue(bs->file->bs);
     }
     bdrv_start_throttled_reqs(bs);
 }
diff --git a/block/mirror.c b/block/mirror.c
index 1ca4aa0da0..7e43511832 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -353,6 +353,11 @@ static void mirror_exit(BlockJob *job, void *opaque)
     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
     MirrorExitData *data = opaque;
     AioContext *replace_aio_context = NULL;
+    BlockDriverState *src = s->common.bs;
+
+    /* Make sure that the source BDS doesn't go away before we called
+     * block_job_completed(). */
+    bdrv_ref(src);
 
     if (s->to_replace) {
         replace_aio_context = bdrv_get_aio_context(s->to_replace);
@@ -367,14 +372,7 @@ static void mirror_exit(BlockJob *job, void *opaque)
         if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
             bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
         }
-        bdrv_swap(s->target, to_replace);
-        if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
-            /* drop the bs loop chain formed by the swap: break the loop then
-             * trigger the unref from the top one */
-            BlockDriverState *p = s->base->backing_hd;
-            bdrv_set_backing_hd(s->base, NULL);
-            bdrv_unref(p);
-        }
+        bdrv_replace_in_backing_chain(to_replace, s->target);
     }
     if (s->to_replace) {
         bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
@@ -388,6 +386,7 @@ static void mirror_exit(BlockJob *job, void *opaque)
     bdrv_unref(s->target);
     block_job_completed(&s->common, data->ret);
     g_free(data);
+    bdrv_unref(src);
 }
 
 static void coroutine_fn mirror_run(void *opaque)
@@ -431,7 +430,7 @@ static void coroutine_fn mirror_run(void *opaque)
      */
     bdrv_get_backing_filename(s->target, backing_filename,
                               sizeof(backing_filename));
-    if (backing_filename[0] && !s->target->backing_hd) {
+    if (backing_filename[0] && !s->target->backing) {
         ret = bdrv_get_info(s->target, &bdi);
         if (ret < 0) {
             goto immediate_exit;
@@ -637,8 +636,7 @@ static void mirror_complete(BlockJob *job, Error **errp)
         return;
     }
     if (!s->synced) {
-        error_setg(errp, QERR_BLOCK_JOB_NOT_READY,
-                   bdrv_get_device_name(job->bs));
+        error_setg(errp, QERR_BLOCK_JOB_NOT_READY, job->id);
         return;
     }
 
@@ -766,7 +764,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
         return;
     }
     is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
-    base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
+    base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
     mirror_start_job(bs, target, replaces,
                      speed, granularity, buf_size,
                      on_source_error, on_target_error, unmap, cb, opaque, errp,
diff --git a/block/parallels.c b/block/parallels.c
index 5cd6ec3349..4f79293826 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -202,13 +202,13 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
 
     to_allocate = (sector_num + *pnum + s->tracks - 1) / s->tracks - idx;
     space = to_allocate * s->tracks;
-    if (s->data_end + space > bdrv_getlength(bs->file) >> BDRV_SECTOR_BITS) {
+    if (s->data_end + space > bdrv_getlength(bs->file->bs) >> BDRV_SECTOR_BITS) {
         int ret;
         space += s->prealloc_size;
         if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
-            ret = bdrv_write_zeroes(bs->file, s->data_end, space, 0);
+            ret = bdrv_write_zeroes(bs->file->bs, s->data_end, space, 0);
         } else {
-            ret = bdrv_truncate(bs->file,
+            ret = bdrv_truncate(bs->file->bs,
                                 (s->data_end + space) << BDRV_SECTOR_BITS);
         }
         if (ret < 0) {
@@ -244,7 +244,8 @@ static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs)
         if (off + to_write > s->header_size) {
             to_write = s->header_size - off;
         }
-        ret = bdrv_pwrite(bs->file, off, (uint8_t *)s->header + off, to_write);
+        ret = bdrv_pwrite(bs->file->bs, off, (uint8_t *)s->header + off,
+                          to_write);
         if (ret < 0) {
             qemu_co_mutex_unlock(&s->lock);
             return ret;
@@ -303,7 +304,7 @@ static coroutine_fn int parallels_co_writev(BlockDriverState *bs,
         qemu_iovec_reset(&hd_qiov);
         qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);
 
-        ret = bdrv_co_writev(bs->file, position, n, &hd_qiov);
+        ret = bdrv_co_writev(bs->file->bs, position, n, &hd_qiov);
         if (ret < 0) {
             break;
         }
@@ -343,7 +344,7 @@ static coroutine_fn int parallels_co_readv(BlockDriverState *bs,
             qemu_iovec_reset(&hd_qiov);
             qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);
 
-            ret = bdrv_co_readv(bs->file, position, n, &hd_qiov);
+            ret = bdrv_co_readv(bs->file->bs, position, n, &hd_qiov);
             if (ret < 0) {
                 break;
             }
@@ -369,7 +370,7 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
     bool flush_bat = false;
     int cluster_size = s->tracks << BDRV_SECTOR_BITS;
 
-    size = bdrv_getlength(bs->file);
+    size = bdrv_getlength(bs->file->bs);
     if (size < 0) {
         res->check_errors++;
         return size;
@@ -424,7 +425,7 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
     }
 
     if (flush_bat) {
-        ret = bdrv_pwrite_sync(bs->file, 0, s->header, s->header_size);
+        ret = bdrv_pwrite_sync(bs->file->bs, 0, s->header, s->header_size);
         if (ret < 0) {
             res->check_errors++;
             return ret;
@@ -440,7 +441,7 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
                 size - res->image_end_offset);
         res->leaks += count;
         if (fix & BDRV_FIX_LEAKS) {
-            ret = bdrv_truncate(bs->file, res->image_end_offset);
+            ret = bdrv_truncate(bs->file->bs, res->image_end_offset);
             if (ret < 0) {
                 res->check_errors++;
                 return ret;
@@ -546,12 +547,13 @@ static int parallels_probe(const uint8_t *buf, int buf_size,
 static int parallels_update_header(BlockDriverState *bs)
 {
     BDRVParallelsState *s = bs->opaque;
-    unsigned size = MAX(bdrv_opt_mem_align(bs->file), sizeof(ParallelsHeader));
+    unsigned size = MAX(bdrv_opt_mem_align(bs->file->bs),
+                        sizeof(ParallelsHeader));
 
     if (size > s->header_size) {
         size = s->header_size;
     }
-    return bdrv_pwrite_sync(bs->file, 0, s->header, size);
+    return bdrv_pwrite_sync(bs->file->bs, 0, s->header, size);
 }
 
 static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
@@ -564,7 +566,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
     Error *local_err = NULL;
     char *buf;
 
-    ret = bdrv_pread(bs->file, 0, &ph, sizeof(ph));
+    ret = bdrv_pread(bs->file->bs, 0, &ph, sizeof(ph));
     if (ret < 0) {
         goto fail;
     }
@@ -603,8 +605,8 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     size = bat_entry_off(s->bat_size);
-    s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file));
-    s->header = qemu_try_blockalign(bs->file, s->header_size);
+    s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file->bs));
+    s->header = qemu_try_blockalign(bs->file->bs, s->header_size);
     if (s->header == NULL) {
         ret = -ENOMEM;
         goto fail;
@@ -619,7 +621,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
         s->header_size = size;
     }
 
-    ret = bdrv_pread(bs->file, 0, s->header, s->header_size);
+    ret = bdrv_pread(bs->file->bs, 0, s->header, s->header_size);
     if (ret < 0) {
         goto fail;
     }
@@ -663,8 +665,8 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
     if (local_err != NULL) {
         goto fail_options;
     }
-    if (!bdrv_has_zero_init(bs->file) ||
-            bdrv_truncate(bs->file, bdrv_getlength(bs->file)) != 0) {
+    if (!bdrv_has_zero_init(bs->file->bs) ||
+            bdrv_truncate(bs->file->bs, bdrv_getlength(bs->file->bs)) != 0) {
         s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
     }
 
@@ -707,7 +709,7 @@ static void parallels_close(BlockDriverState *bs)
     }
 
     if (bs->open_flags & BDRV_O_RDWR) {
-        bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS);
+        bdrv_truncate(bs->file->bs, s->data_end << BDRV_SECTOR_BITS);
     }
 
     g_free(s->bat_dirty_bmap);
diff --git a/block/qapi.c b/block/qapi.c
index 2ce509711d..355ba324f1 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -110,8 +110,8 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp)
             qapi_free_BlockDeviceInfo(info);
             return NULL;
         }
-        if (bs0->drv && bs0->backing_hd) {
-            bs0 = bs0->backing_hd;
+        if (bs0->drv && bs0->backing) {
+            bs0 = bs0->backing->bs;
             (*p_image_info)->has_backing_image = true;
             p_image_info = &((*p_image_info)->backing_image);
         } else {
@@ -359,12 +359,12 @@ static BlockStats *bdrv_query_stats(const BlockDriverState *bs,
 
     if (bs->file) {
         s->has_parent = true;
-        s->parent = bdrv_query_stats(bs->file, query_backing);
+        s->parent = bdrv_query_stats(bs->file->bs, query_backing);
     }
 
-    if (query_backing && bs->backing_hd) {
+    if (query_backing && bs->backing) {
         s->has_backing = true;
-        s->backing = bdrv_query_stats(bs->backing_hd, query_backing);
+        s->backing = bdrv_query_stats(bs->backing->bs, query_backing);
     }
 
     return s;
diff --git a/block/qcow.c b/block/qcow.c
index 6e35db1df8..635085e27b 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -100,7 +100,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
     int ret;
     QCowHeader header;
 
-    ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
+    ret = bdrv_pread(bs->file->bs, 0, &header, sizeof(header));
     if (ret < 0) {
         goto fail;
     }
@@ -193,7 +193,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
         goto fail;
     }
 
-    ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
+    ret = bdrv_pread(bs->file->bs, s->l1_table_offset, s->l1_table,
                s->l1_size * sizeof(uint64_t));
     if (ret < 0) {
         goto fail;
@@ -205,7 +205,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
 
     /* alloc L2 cache (max. 64k * 16 * 8 = 8 MB) */
     s->l2_cache =
-        qemu_try_blockalign(bs->file,
+        qemu_try_blockalign(bs->file->bs,
                             s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
     if (s->l2_cache == NULL) {
         error_setg(errp, "Could not allocate L2 table cache");
@@ -224,7 +224,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
             ret = -EINVAL;
             goto fail;
         }
-        ret = bdrv_pread(bs->file, header.backing_file_offset,
+        ret = bdrv_pread(bs->file->bs, header.backing_file_offset,
                    bs->backing_file, len);
         if (ret < 0) {
             goto fail;
@@ -369,13 +369,13 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
         if (!allocate)
             return 0;
         /* allocate a new l2 entry */
-        l2_offset = bdrv_getlength(bs->file);
+        l2_offset = bdrv_getlength(bs->file->bs);
         /* round to cluster size */
         l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1);
         /* update the L1 entry */
         s->l1_table[l1_index] = l2_offset;
         tmp = cpu_to_be64(l2_offset);
-        if (bdrv_pwrite_sync(bs->file,
+        if (bdrv_pwrite_sync(bs->file->bs,
                 s->l1_table_offset + l1_index * sizeof(tmp),
                 &tmp, sizeof(tmp)) < 0)
             return 0;
@@ -405,11 +405,12 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
     l2_table = s->l2_cache + (min_index << s->l2_bits);
     if (new_l2_table) {
         memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
-        if (bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
+        if (bdrv_pwrite_sync(bs->file->bs, l2_offset, l2_table,
                 s->l2_size * sizeof(uint64_t)) < 0)
             return 0;
     } else {
-        if (bdrv_pread(bs->file, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) !=
+        if (bdrv_pread(bs->file->bs, l2_offset, l2_table,
+                       s->l2_size * sizeof(uint64_t)) !=
             s->l2_size * sizeof(uint64_t))
             return 0;
     }
@@ -430,20 +431,21 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
                overwritten */
             if (decompress_cluster(bs, cluster_offset) < 0)
                 return 0;
-            cluster_offset = bdrv_getlength(bs->file);
+            cluster_offset = bdrv_getlength(bs->file->bs);
             cluster_offset = (cluster_offset + s->cluster_size - 1) &
                 ~(s->cluster_size - 1);
             /* write the cluster content */
-            if (bdrv_pwrite(bs->file, cluster_offset, s->cluster_cache, s->cluster_size) !=
+            if (bdrv_pwrite(bs->file->bs, cluster_offset, s->cluster_cache,
+                            s->cluster_size) !=
                 s->cluster_size)
                 return -1;
         } else {
-            cluster_offset = bdrv_getlength(bs->file);
+            cluster_offset = bdrv_getlength(bs->file->bs);
             if (allocate == 1) {
                 /* round to cluster size */
                 cluster_offset = (cluster_offset + s->cluster_size - 1) &
                     ~(s->cluster_size - 1);
-                bdrv_truncate(bs->file, cluster_offset + s->cluster_size);
+                bdrv_truncate(bs->file->bs, cluster_offset + s->cluster_size);
                 /* if encrypted, we must initialize the cluster
                    content which won't be written */
                 if (bs->encrypted &&
@@ -463,7 +465,8 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
                                 errno = EIO;
                                 return -1;
                             }
-                            if (bdrv_pwrite(bs->file, cluster_offset + i * 512,
+                            if (bdrv_pwrite(bs->file->bs,
+                                            cluster_offset + i * 512,
                                             s->cluster_data, 512) != 512)
                                 return -1;
                         }
@@ -477,7 +480,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
         /* update L2 table */
         tmp = cpu_to_be64(cluster_offset);
         l2_table[l2_index] = tmp;
-        if (bdrv_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp),
+        if (bdrv_pwrite_sync(bs->file->bs, l2_offset + l2_index * sizeof(tmp),
                 &tmp, sizeof(tmp)) < 0)
             return 0;
     }
@@ -546,7 +549,7 @@ static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
     if (s->cluster_cache_offset != coffset) {
         csize = cluster_offset >> (63 - s->cluster_bits);
         csize &= (s->cluster_size - 1);
-        ret = bdrv_pread(bs->file, coffset, s->cluster_data, csize);
+        ret = bdrv_pread(bs->file->bs, coffset, s->cluster_data, csize);
         if (ret != csize)
             return -1;
         if (decompress_buffer(s->cluster_cache, s->cluster_size,
@@ -594,13 +597,13 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
         }
 
         if (!cluster_offset) {
-            if (bs->backing_hd) {
+            if (bs->backing) {
                 /* read from the base image */
                 hd_iov.iov_base = (void *)buf;
                 hd_iov.iov_len = n * 512;
                 qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
                 qemu_co_mutex_unlock(&s->lock);
-                ret = bdrv_co_readv(bs->backing_hd, sector_num,
+                ret = bdrv_co_readv(bs->backing->bs, sector_num,
                                     n, &hd_qiov);
                 qemu_co_mutex_lock(&s->lock);
                 if (ret < 0) {
@@ -625,7 +628,7 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
             hd_iov.iov_len = n * 512;
             qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
             qemu_co_mutex_unlock(&s->lock);
-            ret = bdrv_co_readv(bs->file,
+            ret = bdrv_co_readv(bs->file->bs,
                                 (cluster_offset >> 9) + index_in_cluster,
                                 n, &hd_qiov);
             qemu_co_mutex_lock(&s->lock);
@@ -727,7 +730,7 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
         hd_iov.iov_len = n * 512;
         qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
         qemu_co_mutex_unlock(&s->lock);
-        ret = bdrv_co_writev(bs->file,
+        ret = bdrv_co_writev(bs->file->bs,
                              (cluster_offset >> 9) + index_in_cluster,
                              n, &hd_qiov);
         qemu_co_mutex_lock(&s->lock);
@@ -879,10 +882,10 @@ static int qcow_make_empty(BlockDriverState *bs)
     int ret;
 
     memset(s->l1_table, 0, l1_length);
-    if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
+    if (bdrv_pwrite_sync(bs->file->bs, s->l1_table_offset, s->l1_table,
             l1_length) < 0)
         return -1;
-    ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
+    ret = bdrv_truncate(bs->file->bs, s->l1_table_offset + l1_length);
     if (ret < 0)
         return ret;
 
@@ -962,7 +965,7 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
         }
 
         cluster_offset &= s->cluster_offset_mask;
-        ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
+        ret = bdrv_pwrite(bs->file->bs, cluster_offset, out_buf, out_len);
         if (ret < 0) {
             goto fail;
         }
diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 7b14c5c5a5..86dd7f2bd9 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -127,7 +127,7 @@ Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
     c = g_new0(Qcow2Cache, 1);
     c->size = num_tables;
     c->entries = g_try_new0(Qcow2CachedTable, num_tables);
-    c->table_array = qemu_try_blockalign(bs->file,
+    c->table_array = qemu_try_blockalign(bs->file->bs,
                                          (size_t) num_tables * s->cluster_size);
 
     if (!c->entries || !c->table_array) {
@@ -185,7 +185,7 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
     if (c->depends) {
         ret = qcow2_cache_flush_dependency(bs, c);
     } else if (c->depends_on_flush) {
-        ret = bdrv_flush(bs->file);
+        ret = bdrv_flush(bs->file->bs);
         if (ret >= 0) {
             c->depends_on_flush = false;
         }
@@ -216,7 +216,7 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
         BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
     }
 
-    ret = bdrv_pwrite(bs->file, c->entries[i].offset,
+    ret = bdrv_pwrite(bs->file->bs, c->entries[i].offset,
                       qcow2_cache_get_table_addr(bs, c, i), s->cluster_size);
     if (ret < 0) {
         return ret;
@@ -244,7 +244,7 @@ int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
     }
 
     if (result == 0) {
-        ret = bdrv_flush(bs->file);
+        ret = bdrv_flush(bs->file->bs);
         if (ret < 0) {
             result = ret;
         }
@@ -356,7 +356,8 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
             BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
         }
 
-        ret = bdrv_pread(bs->file, offset, qcow2_cache_get_table_addr(bs, c, i),
+        ret = bdrv_pread(bs->file->bs, offset,
+                         qcow2_cache_get_table_addr(bs, c, i),
                          s->cluster_size);
         if (ret < 0) {
             return ret;
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 6ede629efb..67be0ce2c9 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -72,7 +72,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
 #endif
 
     new_l1_size2 = sizeof(uint64_t) * new_l1_size;
-    new_l1_table = qemu_try_blockalign(bs->file,
+    new_l1_table = qemu_try_blockalign(bs->file->bs,
                                        align_offset(new_l1_size2, 512));
     if (new_l1_table == NULL) {
         return -ENOMEM;
@@ -105,7 +105,8 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
     BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
     for(i = 0; i < s->l1_size; i++)
         new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
-    ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset, new_l1_table, new_l1_size2);
+    ret = bdrv_pwrite_sync(bs->file->bs, new_l1_table_offset,
+                           new_l1_table, new_l1_size2);
     if (ret < 0)
         goto fail;
     for(i = 0; i < s->l1_size; i++)
@@ -115,7 +116,8 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
     BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE);
     cpu_to_be32w((uint32_t*)data, new_l1_size);
     stq_be_p(data + 4, new_l1_table_offset);
-    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size), data,sizeof(data));
+    ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, l1_size),
+                           data, sizeof(data));
     if (ret < 0) {
         goto fail;
     }
@@ -182,8 +184,9 @@ int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index)
     }
 
     BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
-    ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset + 8 * l1_start_index,
-        buf, sizeof(buf));
+    ret = bdrv_pwrite_sync(bs->file->bs,
+                           s->l1_table_offset + 8 * l1_start_index,
+                           buf, sizeof(buf));
     if (ret < 0) {
         return ret;
     }
@@ -440,7 +443,8 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
     }
 
     BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
-    ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + n_start, n, &qiov);
+    ret = bdrv_co_writev(bs->file->bs, (cluster_offset >> 9) + n_start, n,
+                         &qiov);
     if (ret < 0) {
         goto out;
     }
@@ -817,7 +821,6 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
 
     /*
      * If this was a COW, we need to decrease the refcount of the old cluster.
-     * Also flush bs->file to get the right order for L2 and refcount update.
      *
      * Don't discard clusters that reach a refcount of 0 (e.g. compressed
      * clusters), the next write will reuse them anyway.
@@ -1412,7 +1415,8 @@ int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
         sector_offset = coffset & 511;
         csize = nb_csectors * 512 - sector_offset;
         BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
-        ret = bdrv_read(bs->file, coffset >> 9, s->cluster_data, nb_csectors);
+        ret = bdrv_read(bs->file->bs, coffset >> 9, s->cluster_data,
+                        nb_csectors);
         if (ret < 0) {
             return ret;
         }
@@ -1469,7 +1473,7 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
          */
         switch (qcow2_get_cluster_type(old_l2_entry)) {
             case QCOW2_CLUSTER_UNALLOCATED:
-                if (full_discard || !bs->backing_hd) {
+                if (full_discard || !bs->backing) {
                     continue;
                 }
                 break;
@@ -1645,7 +1649,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
     if (!is_active_l1) {
         /* inactive L2 tables require a buffer to be stored in when loading
          * them from disk */
-        l2_table = qemu_try_blockalign(bs->file, s->cluster_size);
+        l2_table = qemu_try_blockalign(bs->file->bs, s->cluster_size);
         if (l2_table == NULL) {
             return -ENOMEM;
         }
@@ -1679,8 +1683,8 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                     (void **)&l2_table);
         } else {
             /* load inactive L2 tables from disk */
-            ret = bdrv_read(bs->file, l2_offset / BDRV_SECTOR_SIZE,
-                    (void *)l2_table, s->cluster_sectors);
+            ret = bdrv_read(bs->file->bs, l2_offset / BDRV_SECTOR_SIZE,
+                            (void *)l2_table, s->cluster_sectors);
         }
         if (ret < 0) {
             goto fail;
@@ -1703,7 +1707,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
             }
 
             if (!preallocated) {
-                if (!bs->backing_hd) {
+                if (!bs->backing) {
                     /* not backed; therefore we can simply deallocate the
                      * cluster */
                     l2_table[j] = 0;
@@ -1754,7 +1758,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                 goto fail;
             }
 
-            ret = bdrv_write_zeroes(bs->file, offset / BDRV_SECTOR_SIZE,
+            ret = bdrv_write_zeroes(bs->file->bs, offset / BDRV_SECTOR_SIZE,
                                     s->cluster_sectors, 0);
             if (ret < 0) {
                 if (!preallocated) {
@@ -1787,8 +1791,8 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                     goto fail;
                 }
 
-                ret = bdrv_write(bs->file, l2_offset / BDRV_SECTOR_SIZE,
-                        (void *)l2_table, s->cluster_sectors);
+                ret = bdrv_write(bs->file->bs, l2_offset / BDRV_SECTOR_SIZE,
+                                 (void *)l2_table, s->cluster_sectors);
                 if (ret < 0) {
                     goto fail;
                 }
@@ -1861,8 +1865,9 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs,
 
         l1_table = g_realloc(l1_table, l1_sectors * BDRV_SECTOR_SIZE);
 
-        ret = bdrv_read(bs->file, s->snapshots[i].l1_table_offset /
-                BDRV_SECTOR_SIZE, (void *)l1_table, l1_sectors);
+        ret = bdrv_read(bs->file->bs,
+                        s->snapshots[i].l1_table_offset / BDRV_SECTOR_SIZE,
+                        (void *)l1_table, l1_sectors);
         if (ret < 0) {
             goto fail;
         }
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 2110839da4..4b81c8db61 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -101,7 +101,7 @@ int qcow2_refcount_init(BlockDriverState *bs)
             goto fail;
         }
         BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD);
-        ret = bdrv_pread(bs->file, s->refcount_table_offset,
+        ret = bdrv_pread(bs->file->bs, s->refcount_table_offset,
                          s->refcount_table, refcount_table_size2);
         if (ret < 0) {
             goto fail;
@@ -431,7 +431,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
     if (refcount_table_index < s->refcount_table_size) {
         uint64_t data64 = cpu_to_be64(new_block);
         BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_HOOKUP);
-        ret = bdrv_pwrite_sync(bs->file,
+        ret = bdrv_pwrite_sync(bs->file->bs,
             s->refcount_table_offset + refcount_table_index * sizeof(uint64_t),
             &data64, sizeof(data64));
         if (ret < 0) {
@@ -535,7 +535,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
 
     /* Write refcount blocks to disk */
     BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS);
-    ret = bdrv_pwrite_sync(bs->file, meta_offset, new_blocks,
+    ret = bdrv_pwrite_sync(bs->file->bs, meta_offset, new_blocks,
         blocks_clusters * s->cluster_size);
     g_free(new_blocks);
     new_blocks = NULL;
@@ -549,7 +549,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
     }
 
     BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE);
-    ret = bdrv_pwrite_sync(bs->file, table_offset, new_table,
+    ret = bdrv_pwrite_sync(bs->file->bs, table_offset, new_table,
         table_size * sizeof(uint64_t));
     if (ret < 0) {
         goto fail_table;
@@ -564,8 +564,9 @@ static int alloc_refcount_block(BlockDriverState *bs,
     cpu_to_be64w((uint64_t*)data, table_offset);
     cpu_to_be32w((uint32_t*)(data + 8), table_clusters);
     BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE);
-    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, refcount_table_offset),
-        data, sizeof(data));
+    ret = bdrv_pwrite_sync(bs->file->bs,
+                           offsetof(QCowHeader, refcount_table_offset),
+                           data, sizeof(data));
     if (ret < 0) {
         goto fail_table;
     }
@@ -613,7 +614,7 @@ void qcow2_process_discards(BlockDriverState *bs, int ret)
 
         /* Discard is optional, ignore the return value */
         if (ret >= 0) {
-            bdrv_discard(bs->file,
+            bdrv_discard(bs->file->bs,
                          d->offset >> BDRV_SECTOR_BITS,
                          d->bytes >> BDRV_SECTOR_BITS);
         }
@@ -1068,7 +1069,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
         }
         l1_allocated = true;
 
-        ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
+        ret = bdrv_pread(bs->file->bs, l1_table_offset, l1_table, l1_size2);
         if (ret < 0) {
             goto fail;
         }
@@ -1221,7 +1222,8 @@ fail:
             cpu_to_be64s(&l1_table[i]);
         }
 
-        ret = bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table, l1_size2);
+        ret = bdrv_pwrite_sync(bs->file->bs, l1_table_offset,
+                               l1_table, l1_size2);
 
         for (i = 0; i < l1_size; i++) {
             be64_to_cpus(&l1_table[i]);
@@ -1376,7 +1378,7 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
     l2_size = s->l2_size * sizeof(uint64_t);
     l2_table = g_malloc(l2_size);
 
-    ret = bdrv_pread(bs->file, l2_offset, l2_table, l2_size);
+    ret = bdrv_pread(bs->file->bs, l2_offset, l2_table, l2_size);
     if (ret < 0) {
         fprintf(stderr, "ERROR: I/O error in check_refcounts_l2\n");
         res->check_errors++;
@@ -1508,7 +1510,7 @@ static int check_refcounts_l1(BlockDriverState *bs,
             res->check_errors++;
             goto fail;
         }
-        ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
+        ret = bdrv_pread(bs->file->bs, l1_table_offset, l1_table, l1_size2);
         if (ret < 0) {
             fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
             res->check_errors++;
@@ -1606,7 +1608,7 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
             }
         }
 
-        ret = bdrv_pread(bs->file, l2_offset, l2_table,
+        ret = bdrv_pread(bs->file->bs, l2_offset, l2_table,
                          s->l2_size * sizeof(uint64_t));
         if (ret < 0) {
             fprintf(stderr, "ERROR: Could not read L2 table: %s\n",
@@ -1658,7 +1660,8 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
                 goto fail;
             }
 
-            ret = bdrv_pwrite(bs->file, l2_offset, l2_table, s->cluster_size);
+            ret = bdrv_pwrite(bs->file->bs, l2_offset, l2_table,
+                              s->cluster_size);
             if (ret < 0) {
                 fprintf(stderr, "ERROR: Could not write L2 table: %s\n",
                         strerror(-ret));
@@ -1713,11 +1716,11 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
                     goto resize_fail;
                 }
 
-                ret = bdrv_truncate(bs->file, offset + s->cluster_size);
+                ret = bdrv_truncate(bs->file->bs, offset + s->cluster_size);
                 if (ret < 0) {
                     goto resize_fail;
                 }
-                size = bdrv_getlength(bs->file);
+                size = bdrv_getlength(bs->file->bs);
                 if (size < 0) {
                     ret = size;
                     goto resize_fail;
@@ -2091,7 +2094,7 @@ write_refblocks:
         on_disk_refblock = (void *)((char *) *refcount_table +
                                     refblock_index * s->cluster_size);
 
-        ret = bdrv_write(bs->file, refblock_offset / BDRV_SECTOR_SIZE,
+        ret = bdrv_write(bs->file->bs, refblock_offset / BDRV_SECTOR_SIZE,
                          on_disk_refblock, s->cluster_sectors);
         if (ret < 0) {
             fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
@@ -2140,7 +2143,7 @@ write_refblocks:
     }
 
     assert(reftable_size < INT_MAX / sizeof(uint64_t));
-    ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable,
+    ret = bdrv_pwrite(bs->file->bs, reftable_offset, on_disk_reftable,
                       reftable_size * sizeof(uint64_t));
     if (ret < 0) {
         fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
@@ -2152,8 +2155,8 @@ write_refblocks:
                  reftable_offset);
     cpu_to_be32w(&reftable_offset_and_clusters.reftable_clusters,
                  size_to_clusters(s, reftable_size * sizeof(uint64_t)));
-    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader,
-                                              refcount_table_offset),
+    ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader,
+                                                  refcount_table_offset),
                            &reftable_offset_and_clusters,
                            sizeof(reftable_offset_and_clusters));
     if (ret < 0) {
@@ -2191,7 +2194,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
     bool rebuild = false;
     int ret;
 
-    size = bdrv_getlength(bs->file);
+    size = bdrv_getlength(bs->file->bs);
     if (size < 0) {
         res->check_errors++;
         return size;
@@ -2400,7 +2403,7 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
                 return -ENOMEM;
             }
 
-            ret = bdrv_pread(bs->file, l1_ofs, l1, l1_sz2);
+            ret = bdrv_pread(bs->file->bs, l1_ofs, l1, l1_sz2);
             if (ret < 0) {
                 g_free(l1);
                 return ret;
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index 92f4dfc083..def720164d 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -64,7 +64,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)
     for(i = 0; i < s->nb_snapshots; i++) {
         /* Read statically sized part of the snapshot header */
         offset = align_offset(offset, 8);
-        ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
+        ret = bdrv_pread(bs->file->bs, offset, &h, sizeof(h));
         if (ret < 0) {
             goto fail;
         }
@@ -83,7 +83,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)
         name_size = be16_to_cpu(h.name_size);
 
         /* Read extra data */
-        ret = bdrv_pread(bs->file, offset, &extra,
+        ret = bdrv_pread(bs->file->bs, offset, &extra,
                          MIN(sizeof(extra), extra_data_size));
         if (ret < 0) {
             goto fail;
@@ -102,7 +102,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)
 
         /* Read snapshot ID */
         sn->id_str = g_malloc(id_str_size + 1);
-        ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
+        ret = bdrv_pread(bs->file->bs, offset, sn->id_str, id_str_size);
         if (ret < 0) {
             goto fail;
         }
@@ -111,7 +111,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)
 
         /* Read snapshot name */
         sn->name = g_malloc(name_size + 1);
-        ret = bdrv_pread(bs->file, offset, sn->name, name_size);
+        ret = bdrv_pread(bs->file->bs, offset, sn->name, name_size);
         if (ret < 0) {
             goto fail;
         }
@@ -214,25 +214,25 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
         h.name_size = cpu_to_be16(name_size);
         offset = align_offset(offset, 8);
 
-        ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
+        ret = bdrv_pwrite(bs->file->bs, offset, &h, sizeof(h));
         if (ret < 0) {
             goto fail;
         }
         offset += sizeof(h);
 
-        ret = bdrv_pwrite(bs->file, offset, &extra, sizeof(extra));
+        ret = bdrv_pwrite(bs->file->bs, offset, &extra, sizeof(extra));
         if (ret < 0) {
             goto fail;
         }
         offset += sizeof(extra);
 
-        ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size);
+        ret = bdrv_pwrite(bs->file->bs, offset, sn->id_str, id_str_size);
         if (ret < 0) {
             goto fail;
         }
         offset += id_str_size;
 
-        ret = bdrv_pwrite(bs->file, offset, sn->name, name_size);
+        ret = bdrv_pwrite(bs->file->bs, offset, sn->name, name_size);
         if (ret < 0) {
             goto fail;
         }
@@ -254,7 +254,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
     header_data.nb_snapshots        = cpu_to_be32(s->nb_snapshots);
     header_data.snapshots_offset    = cpu_to_be64(snapshots_offset);
 
-    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
+    ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, nb_snapshots),
                            &header_data, sizeof(header_data));
     if (ret < 0) {
         goto fail;
@@ -396,7 +396,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
         goto fail;
     }
 
-    ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
+    ret = bdrv_pwrite(bs->file->bs, sn->l1_table_offset, l1_table,
                       s->l1_size * sizeof(uint64_t));
     if (ret < 0) {
         goto fail;
@@ -509,7 +509,8 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
         goto fail;
     }
 
-    ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_table, sn_l1_bytes);
+    ret = bdrv_pread(bs->file->bs, sn->l1_table_offset,
+                     sn_l1_table, sn_l1_bytes);
     if (ret < 0) {
         goto fail;
     }
@@ -526,7 +527,7 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
         goto fail;
     }
 
-    ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
+    ret = bdrv_pwrite_sync(bs->file->bs, s->l1_table_offset, sn_l1_table,
                            cur_l1_bytes);
     if (ret < 0) {
         goto fail;
@@ -706,13 +707,14 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs,
         return -EFBIG;
     }
     new_l1_bytes = sn->l1_size * sizeof(uint64_t);
-    new_l1_table = qemu_try_blockalign(bs->file,
+    new_l1_table = qemu_try_blockalign(bs->file->bs,
                                        align_offset(new_l1_bytes, 512));
     if (new_l1_table == NULL) {
         return -ENOMEM;
     }
 
-    ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes);
+    ret = bdrv_pread(bs->file->bs, sn->l1_table_offset,
+                     new_l1_table, new_l1_bytes);
     if (ret < 0) {
         error_setg(errp, "Failed to read l1 table for snapshot");
         qemu_vfree(new_l1_table);
diff --git a/block/qcow2.c b/block/qcow2.c
index 56ad808f6b..bacc4f2e11 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -104,7 +104,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
         printf("attempting to read extended header in offset %lu\n", offset);
 #endif
 
-        ret = bdrv_pread(bs->file, offset, &ext, sizeof(ext));
+        ret = bdrv_pread(bs->file->bs, offset, &ext, sizeof(ext));
         if (ret < 0) {
             error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: "
                              "pread fail from offset %" PRIu64, offset);
@@ -132,7 +132,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
                            sizeof(bs->backing_format));
                 return 2;
             }
-            ret = bdrv_pread(bs->file, offset, bs->backing_format, ext.len);
+            ret = bdrv_pread(bs->file->bs, offset, bs->backing_format, ext.len);
             if (ret < 0) {
                 error_setg_errno(errp, -ret, "ERROR: ext_backing_format: "
                                  "Could not read format name");
@@ -148,7 +148,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
         case QCOW2_EXT_MAGIC_FEATURE_TABLE:
             if (p_feature_table != NULL) {
                 void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature));
-                ret = bdrv_pread(bs->file, offset , feature_table, ext.len);
+                ret = bdrv_pread(bs->file->bs, offset , feature_table, ext.len);
                 if (ret < 0) {
                     error_setg_errno(errp, -ret, "ERROR: ext_feature_table: "
                                      "Could not read table");
@@ -169,7 +169,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
                 uext->len = ext.len;
                 QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next);
 
-                ret = bdrv_pread(bs->file, offset , uext->data, uext->len);
+                ret = bdrv_pread(bs->file->bs, offset , uext->data, uext->len);
                 if (ret < 0) {
                     error_setg_errno(errp, -ret, "ERROR: unknown extension: "
                                      "Could not read data");
@@ -260,12 +260,12 @@ int qcow2_mark_dirty(BlockDriverState *bs)
     }
 
     val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
-    ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features),
+    ret = bdrv_pwrite(bs->file->bs, offsetof(QCowHeader, incompatible_features),
                       &val, sizeof(val));
     if (ret < 0) {
         return ret;
     }
-    ret = bdrv_flush(bs->file);
+    ret = bdrv_flush(bs->file->bs);
     if (ret < 0) {
         return ret;
     }
@@ -828,7 +828,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
     uint64_t ext_end;
     uint64_t l1_vm_state_index;
 
-    ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
+    ret = bdrv_pread(bs->file->bs, 0, &header, sizeof(header));
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Could not read qcow2 header");
         goto fail;
@@ -903,7 +903,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
     if (header.header_length > sizeof(header)) {
         s->unknown_header_fields_size = header.header_length - sizeof(header);
         s->unknown_header_fields = g_malloc(s->unknown_header_fields_size);
-        ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields,
+        ret = bdrv_pread(bs->file->bs, sizeof(header), s->unknown_header_fields,
                          s->unknown_header_fields_size);
         if (ret < 0) {
             error_setg_errno(errp, -ret, "Could not read unknown qcow2 header "
@@ -1056,14 +1056,14 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
 
 
     if (s->l1_size > 0) {
-        s->l1_table = qemu_try_blockalign(bs->file,
+        s->l1_table = qemu_try_blockalign(bs->file->bs,
             align_offset(s->l1_size * sizeof(uint64_t), 512));
         if (s->l1_table == NULL) {
             error_setg(errp, "Could not allocate L1 table");
             ret = -ENOMEM;
             goto fail;
         }
-        ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
+        ret = bdrv_pread(bs->file->bs, s->l1_table_offset, s->l1_table,
                          s->l1_size * sizeof(uint64_t));
         if (ret < 0) {
             error_setg_errno(errp, -ret, "Could not read L1 table");
@@ -1082,7 +1082,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
 
     s->cluster_cache = g_malloc(s->cluster_size);
     /* one more sector for decompressed data alignment */
-    s->cluster_data = qemu_try_blockalign(bs->file, QCOW_MAX_CRYPT_CLUSTERS
+    s->cluster_data = qemu_try_blockalign(bs->file->bs, QCOW_MAX_CRYPT_CLUSTERS
                                                     * s->cluster_size + 512);
     if (s->cluster_data == NULL) {
         error_setg(errp, "Could not allocate temporary cluster buffer");
@@ -1119,7 +1119,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
             ret = -EINVAL;
             goto fail;
         }
-        ret = bdrv_pread(bs->file, header.backing_file_offset,
+        ret = bdrv_pread(bs->file->bs, header.backing_file_offset,
                          bs->backing_file, len);
         if (ret < 0) {
             error_setg_errno(errp, -ret, "Could not read backing file name");
@@ -1369,9 +1369,9 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
         switch (ret) {
         case QCOW2_CLUSTER_UNALLOCATED:
 
-            if (bs->backing_hd) {
+            if (bs->backing) {
                 /* read from the base image */
-                n1 = qcow2_backing_read1(bs->backing_hd, &hd_qiov,
+                n1 = qcow2_backing_read1(bs->backing->bs, &hd_qiov,
                     sector_num, cur_nr_sectors);
                 if (n1 > 0) {
                     QEMUIOVector local_qiov;
@@ -1382,7 +1382,7 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
 
                     BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
                     qemu_co_mutex_unlock(&s->lock);
-                    ret = bdrv_co_readv(bs->backing_hd, sector_num,
+                    ret = bdrv_co_readv(bs->backing->bs, sector_num,
                                         n1, &local_qiov);
                     qemu_co_mutex_lock(&s->lock);
 
@@ -1429,8 +1429,9 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
                  */
                 if (!cluster_data) {
                     cluster_data =
-                        qemu_try_blockalign(bs->file, QCOW_MAX_CRYPT_CLUSTERS
-                                                      * s->cluster_size);
+                        qemu_try_blockalign(bs->file->bs,
+                                            QCOW_MAX_CRYPT_CLUSTERS
+                                            * s->cluster_size);
                     if (cluster_data == NULL) {
                         ret = -ENOMEM;
                         goto fail;
@@ -1446,7 +1447,7 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
 
             BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
             qemu_co_mutex_unlock(&s->lock);
-            ret = bdrv_co_readv(bs->file,
+            ret = bdrv_co_readv(bs->file->bs,
                                 (cluster_offset >> 9) + index_in_cluster,
                                 cur_nr_sectors, &hd_qiov);
             qemu_co_mutex_lock(&s->lock);
@@ -1543,7 +1544,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
             Error *err = NULL;
             assert(s->cipher);
             if (!cluster_data) {
-                cluster_data = qemu_try_blockalign(bs->file,
+                cluster_data = qemu_try_blockalign(bs->file->bs,
                                                    QCOW_MAX_CRYPT_CLUSTERS
                                                    * s->cluster_size);
                 if (cluster_data == NULL) {
@@ -1580,7 +1581,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
         BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
         trace_qcow2_writev_data(qemu_coroutine_self(),
                                 (cluster_offset >> 9) + index_in_cluster);
-        ret = bdrv_co_writev(bs->file,
+        ret = bdrv_co_writev(bs->file->bs,
                              (cluster_offset >> 9) + index_in_cluster,
                              cur_nr_sectors, &hd_qiov);
         qemu_co_mutex_lock(&s->lock);
@@ -1703,7 +1704,7 @@ static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp)
 
     qcow2_close(bs);
 
-    bdrv_invalidate_cache(bs->file, &local_err);
+    bdrv_invalidate_cache(bs->file->bs, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         return;
@@ -1911,7 +1912,7 @@ int qcow2_update_header(BlockDriverState *bs)
     }
 
     /* Write the new header */
-    ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size);
+    ret = bdrv_pwrite(bs->file->bs, 0, header, s->cluster_size);
     if (ret < 0) {
         goto fail;
     }
@@ -1991,7 +1992,8 @@ static int preallocate(BlockDriverState *bs)
     if (host_offset != 0) {
         uint8_t buf[BDRV_SECTOR_SIZE];
         memset(buf, 0, BDRV_SECTOR_SIZE);
-        ret = bdrv_write(bs->file, (host_offset >> BDRV_SECTOR_BITS) + num - 1,
+        ret = bdrv_write(bs->file->bs,
+                         (host_offset >> BDRV_SECTOR_BITS) + num - 1,
                          buf, 1);
         if (ret < 0) {
             return ret;
@@ -2403,7 +2405,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
 
     /* write updated header.size */
     offset = cpu_to_be64(offset);
-    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
+    ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, size),
                            &offset, sizeof(uint64_t));
     if (ret < 0) {
         return ret;
@@ -2427,8 +2429,8 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
     if (nb_sectors == 0) {
         /* align end of file to a sector boundary to ease reading with
            sector based I/Os */
-        cluster_offset = bdrv_getlength(bs->file);
-        return bdrv_truncate(bs->file, cluster_offset);
+        cluster_offset = bdrv_getlength(bs->file->bs);
+        return bdrv_truncate(bs->file->bs, cluster_offset);
     }
 
     if (nb_sectors != s->cluster_sectors) {
@@ -2495,7 +2497,7 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
         }
 
         BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
-        ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
+        ret = bdrv_pwrite(bs->file->bs, cluster_offset, out_buf, out_len);
         if (ret < 0) {
             goto fail;
         }
@@ -2544,7 +2546,7 @@ static int make_completely_empty(BlockDriverState *bs)
     /* After this call, neither the in-memory nor the on-disk refcount
      * information accurately describe the actual references */
 
-    ret = bdrv_write_zeroes(bs->file, s->l1_table_offset / BDRV_SECTOR_SIZE,
+    ret = bdrv_write_zeroes(bs->file->bs, s->l1_table_offset / BDRV_SECTOR_SIZE,
                             l1_clusters * s->cluster_sectors, 0);
     if (ret < 0) {
         goto fail_broken_refcounts;
@@ -2558,7 +2560,7 @@ static int make_completely_empty(BlockDriverState *bs)
      * overwrite parts of the existing refcount and L1 table, which is not
      * an issue because the dirty flag is set, complete data loss is in fact
      * desired and partial data loss is consequently fine as well */
-    ret = bdrv_write_zeroes(bs->file, s->cluster_size / BDRV_SECTOR_SIZE,
+    ret = bdrv_write_zeroes(bs->file->bs, s->cluster_size / BDRV_SECTOR_SIZE,
                             (2 + l1_clusters) * s->cluster_size /
                             BDRV_SECTOR_SIZE, 0);
     /* This call (even if it failed overall) may have overwritten on-disk
@@ -2578,7 +2580,7 @@ static int make_completely_empty(BlockDriverState *bs)
     cpu_to_be64w(&l1_ofs_rt_ofs_cls.l1_offset, 3 * s->cluster_size);
     cpu_to_be64w(&l1_ofs_rt_ofs_cls.reftable_offset, s->cluster_size);
     cpu_to_be32w(&l1_ofs_rt_ofs_cls.reftable_clusters, 1);
-    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset),
+    ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, l1_table_offset),
                            &l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls));
     if (ret < 0) {
         goto fail_broken_refcounts;
@@ -2609,7 +2611,7 @@ static int make_completely_empty(BlockDriverState *bs)
 
     /* Enter the first refblock into the reftable */
     rt_entry = cpu_to_be64(2 * s->cluster_size);
-    ret = bdrv_pwrite_sync(bs->file, s->cluster_size,
+    ret = bdrv_pwrite_sync(bs->file->bs, s->cluster_size,
                            &rt_entry, sizeof(rt_entry));
     if (ret < 0) {
         goto fail_broken_refcounts;
@@ -2634,7 +2636,7 @@ static int make_completely_empty(BlockDriverState *bs)
         goto fail;
     }
 
-    ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size);
+    ret = bdrv_truncate(bs->file->bs, (3 + l1_clusters) * s->cluster_size);
     if (ret < 0) {
         goto fail;
     }
@@ -2769,7 +2771,7 @@ static void dump_refcounts(BlockDriverState *bs)
     int64_t nb_clusters, k, k1, size;
     int refcount;
 
-    size = bdrv_getlength(bs->file);
+    size = bdrv_getlength(bs->file->bs);
     nb_clusters = size_to_clusters(s, size);
     for(k = 0; k < nb_clusters;) {
         k1 = k;
diff --git a/block/qcow2.h b/block/qcow2.h
index d700bf1b62..351226302f 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -295,8 +295,6 @@ typedef struct BDRVQcow2State {
     char *image_backing_format;
 } BDRVQcow2State;
 
-struct QCowAIOCB;
-
 typedef struct Qcow2COWRegion {
     /**
      * Offset of the COW region in bytes from the start of the first cluster
diff --git a/block/qed-table.c b/block/qed-table.c
index 513aa872c9..f4219b8acc 100644
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -63,7 +63,7 @@ static void qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
     read_table_cb->iov.iov_len = s->header.cluster_size * s->header.table_size,
 
     qemu_iovec_init_external(qiov, &read_table_cb->iov, 1);
-    bdrv_aio_readv(s->bs->file, offset / BDRV_SECTOR_SIZE, qiov,
+    bdrv_aio_readv(s->bs->file->bs, offset / BDRV_SECTOR_SIZE, qiov,
                    qiov->size / BDRV_SECTOR_SIZE,
                    qed_read_table_cb, read_table_cb);
 }
@@ -152,7 +152,7 @@ static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
     /* Adjust for offset into table */
     offset += start * sizeof(uint64_t);
 
-    bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
+    bdrv_aio_writev(s->bs->file->bs, offset / BDRV_SECTOR_SIZE,
                     &write_table_cb->qiov,
                     write_table_cb->qiov.size / BDRV_SECTOR_SIZE,
                     qed_write_table_cb, write_table_cb);
diff --git a/block/qed.c b/block/qed.c
index a7ff1d9c41..5ea05d4909 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -82,7 +82,7 @@ int qed_write_header_sync(BDRVQEDState *s)
     int ret;
 
     qed_header_cpu_to_le(&s->header, &le);
-    ret = bdrv_pwrite(s->bs->file, 0, &le, sizeof(le));
+    ret = bdrv_pwrite(s->bs->file->bs, 0, &le, sizeof(le));
     if (ret != sizeof(le)) {
         return ret;
     }
@@ -119,7 +119,7 @@ static void qed_write_header_read_cb(void *opaque, int ret)
     /* Update header */
     qed_header_cpu_to_le(&s->header, (QEDHeader *)write_header_cb->buf);
 
-    bdrv_aio_writev(s->bs->file, 0, &write_header_cb->qiov,
+    bdrv_aio_writev(s->bs->file->bs, 0, &write_header_cb->qiov,
                     write_header_cb->nsectors, qed_write_header_cb,
                     write_header_cb);
 }
@@ -152,7 +152,7 @@ static void qed_write_header(BDRVQEDState *s, BlockCompletionFunc cb,
     write_header_cb->iov.iov_len = len;
     qemu_iovec_init_external(&write_header_cb->qiov, &write_header_cb->iov, 1);
 
-    bdrv_aio_readv(s->bs->file, 0, &write_header_cb->qiov, nsectors,
+    bdrv_aio_readv(s->bs->file->bs, 0, &write_header_cb->qiov, nsectors,
                    qed_write_header_read_cb, write_header_cb);
 }
 
@@ -354,12 +354,6 @@ static void qed_cancel_need_check_timer(BDRVQEDState *s)
     timer_del(s->need_check_timer);
 }
 
-static void bdrv_qed_rebind(BlockDriverState *bs)
-{
-    BDRVQEDState *s = bs->opaque;
-    s->bs = bs;
-}
-
 static void bdrv_qed_detach_aio_context(BlockDriverState *bs)
 {
     BDRVQEDState *s = bs->opaque;
@@ -392,7 +386,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
     s->bs = bs;
     QSIMPLEQ_INIT(&s->allocating_write_reqs);
 
-    ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header));
+    ret = bdrv_pread(bs->file->bs, 0, &le_header, sizeof(le_header));
     if (ret < 0) {
         return ret;
     }
@@ -416,7 +410,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     /* Round down file size to the last cluster */
-    file_size = bdrv_getlength(bs->file);
+    file_size = bdrv_getlength(bs->file->bs);
     if (file_size < 0) {
         return file_size;
     }
@@ -452,7 +446,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
             return -EINVAL;
         }
 
-        ret = qed_read_string(bs->file, s->header.backing_filename_offset,
+        ret = qed_read_string(bs->file->bs, s->header.backing_filename_offset,
                               s->header.backing_filename_size, bs->backing_file,
                               sizeof(bs->backing_file));
         if (ret < 0) {
@@ -471,7 +465,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
      * feature is no longer valid.
      */
     if ((s->header.autoclear_features & ~QED_AUTOCLEAR_FEATURE_MASK) != 0 &&
-        !bdrv_is_read_only(bs->file) && !(flags & BDRV_O_INCOMING)) {
+        !bdrv_is_read_only(bs->file->bs) && !(flags & BDRV_O_INCOMING)) {
         s->header.autoclear_features &= QED_AUTOCLEAR_FEATURE_MASK;
 
         ret = qed_write_header_sync(s);
@@ -480,7 +474,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
         }
 
         /* From here on only known autoclear feature bits are valid */
-        bdrv_flush(bs->file);
+        bdrv_flush(bs->file->bs);
     }
 
     s->l1_table = qed_alloc_table(s);
@@ -498,7 +492,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
          * potentially inconsistent images to be opened read-only.  This can
          * aid data recovery from an otherwise inconsistent image.
          */
-        if (!bdrv_is_read_only(bs->file) &&
+        if (!bdrv_is_read_only(bs->file->bs) &&
             !(flags & BDRV_O_INCOMING)) {
             BdrvCheckResult result = {0};
 
@@ -541,7 +535,7 @@ static void bdrv_qed_close(BlockDriverState *bs)
     bdrv_qed_detach_aio_context(bs);
 
     /* Ensure writes reach stable storage */
-    bdrv_flush(bs->file);
+    bdrv_flush(bs->file->bs);
 
     /* Clean shutdown, no check required on next open */
     if (s->header.features & QED_F_NEED_CHECK) {
@@ -772,8 +766,8 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
     /* If there is a backing file, get its length.  Treat the absence of a
      * backing file like a zero length backing file.
      */
-    if (s->bs->backing_hd) {
-        int64_t l = bdrv_getlength(s->bs->backing_hd);
+    if (s->bs->backing) {
+        int64_t l = bdrv_getlength(s->bs->backing->bs);
         if (l < 0) {
             cb(opaque, l);
             return;
@@ -802,7 +796,7 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
     qemu_iovec_concat(*backing_qiov, qiov, 0, size);
 
     BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
-    bdrv_aio_readv(s->bs->backing_hd, pos / BDRV_SECTOR_SIZE,
+    bdrv_aio_readv(s->bs->backing->bs, pos / BDRV_SECTOR_SIZE,
                    *backing_qiov, size / BDRV_SECTOR_SIZE, cb, opaque);
 }
 
@@ -839,7 +833,7 @@ static void qed_copy_from_backing_file_write(void *opaque, int ret)
     }
 
     BLKDBG_EVENT(s->bs->file, BLKDBG_COW_WRITE);
-    bdrv_aio_writev(s->bs->file, copy_cb->offset / BDRV_SECTOR_SIZE,
+    bdrv_aio_writev(s->bs->file->bs, copy_cb->offset / BDRV_SECTOR_SIZE,
                     &copy_cb->qiov, copy_cb->qiov.size / BDRV_SECTOR_SIZE,
                     qed_copy_from_backing_file_cb, copy_cb);
 }
@@ -1055,7 +1049,7 @@ static void qed_aio_write_flush_before_l2_update(void *opaque, int ret)
     QEDAIOCB *acb = opaque;
     BDRVQEDState *s = acb_to_s(acb);
 
-    if (!bdrv_aio_flush(s->bs->file, qed_aio_write_l2_update_cb, opaque)) {
+    if (!bdrv_aio_flush(s->bs->file->bs, qed_aio_write_l2_update_cb, opaque)) {
         qed_aio_complete(acb, -EIO);
     }
 }
@@ -1081,7 +1075,7 @@ static void qed_aio_write_main(void *opaque, int ret)
     if (acb->find_cluster_ret == QED_CLUSTER_FOUND) {
         next_fn = qed_aio_next_io;
     } else {
-        if (s->bs->backing_hd) {
+        if (s->bs->backing) {
             next_fn = qed_aio_write_flush_before_l2_update;
         } else {
             next_fn = qed_aio_write_l2_update_cb;
@@ -1089,7 +1083,7 @@ static void qed_aio_write_main(void *opaque, int ret)
     }
 
     BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO);
-    bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
+    bdrv_aio_writev(s->bs->file->bs, offset / BDRV_SECTOR_SIZE,
                     &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
                     next_fn, acb);
 }
@@ -1139,7 +1133,7 @@ static void qed_aio_write_prefill(void *opaque, int ret)
 static bool qed_should_set_need_check(BDRVQEDState *s)
 {
     /* The flush before L2 update path ensures consistency */
-    if (s->bs->backing_hd) {
+    if (s->bs->backing) {
         return false;
     }
 
@@ -1321,7 +1315,7 @@ static void qed_aio_read_data(void *opaque, int ret,
     }
 
     BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
-    bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE,
+    bdrv_aio_readv(bs->file->bs, offset / BDRV_SECTOR_SIZE,
                    &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
                    qed_aio_next_io, acb);
     return;
@@ -1443,7 +1437,7 @@ static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs,
     struct iovec iov;
 
     /* Refuse if there are untouched backing file sectors */
-    if (bs->backing_hd) {
+    if (bs->backing) {
         if (qed_offset_into_cluster(s, sector_num * BDRV_SECTOR_SIZE) != 0) {
             return -ENOTSUP;
         }
@@ -1580,7 +1574,7 @@ static int bdrv_qed_change_backing_file(BlockDriverState *bs,
     }
 
     /* Write new header */
-    ret = bdrv_pwrite_sync(bs->file, 0, buffer, buffer_len);
+    ret = bdrv_pwrite_sync(bs->file->bs, 0, buffer, buffer_len);
     g_free(buffer);
     if (ret == 0) {
         memcpy(&s->header, &new_header, sizeof(new_header));
@@ -1596,7 +1590,7 @@ static void bdrv_qed_invalidate_cache(BlockDriverState *bs, Error **errp)
 
     bdrv_qed_close(bs);
 
-    bdrv_invalidate_cache(bs->file, &local_err);
+    bdrv_invalidate_cache(bs->file->bs, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         return;
@@ -1664,7 +1658,6 @@ static BlockDriver bdrv_qed = {
     .supports_backing         = true,
 
     .bdrv_probe               = bdrv_qed_probe,
-    .bdrv_rebind              = bdrv_qed_rebind,
     .bdrv_open                = bdrv_qed_open,
     .bdrv_close               = bdrv_qed_close,
     .bdrv_reopen_prepare      = bdrv_qed_reopen_prepare,
diff --git a/block/quorum.c b/block/quorum.c
index 8fe53b4272..b9ba028d46 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -64,7 +64,7 @@ typedef struct QuorumVotes {
 
 /* the following structure holds the state of one quorum instance */
 typedef struct BDRVQuorumState {
-    BlockDriverState **bs; /* children BlockDriverStates */
+    BdrvChild **children;  /* children BlockDriverStates */
     int num_children;      /* children count */
     int threshold;         /* if less than threshold children reads gave the
                             * same result a quorum error occurs.
@@ -336,7 +336,7 @@ static void quorum_report_bad_versions(BDRVQuorumState *s,
             continue;
         }
         QLIST_FOREACH(item, &version->items, next) {
-            quorum_report_bad(acb, s->bs[item->index]->node_name, 0);
+            quorum_report_bad(acb, s->children[item->index]->bs->node_name, 0);
         }
     }
 }
@@ -369,8 +369,9 @@ static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
             continue;
         }
         QLIST_FOREACH(item, &version->items, next) {
-            bdrv_aio_writev(s->bs[item->index], acb->sector_num, acb->qiov,
-                            acb->nb_sectors, quorum_rewrite_aio_cb, acb);
+            bdrv_aio_writev(s->children[item->index]->bs, acb->sector_num,
+                            acb->qiov, acb->nb_sectors, quorum_rewrite_aio_cb,
+                            acb);
         }
     }
 
@@ -639,13 +640,13 @@ static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb)
     int i;
 
     for (i = 0; i < s->num_children; i++) {
-        acb->qcrs[i].buf = qemu_blockalign(s->bs[i], acb->qiov->size);
+        acb->qcrs[i].buf = qemu_blockalign(s->children[i]->bs, acb->qiov->size);
         qemu_iovec_init(&acb->qcrs[i].qiov, acb->qiov->niov);
         qemu_iovec_clone(&acb->qcrs[i].qiov, acb->qiov, acb->qcrs[i].buf);
     }
 
     for (i = 0; i < s->num_children; i++) {
-        bdrv_aio_readv(s->bs[i], acb->sector_num, &acb->qcrs[i].qiov,
+        bdrv_aio_readv(s->children[i]->bs, acb->sector_num, &acb->qcrs[i].qiov,
                        acb->nb_sectors, quorum_aio_cb, &acb->qcrs[i]);
     }
 
@@ -656,12 +657,12 @@ static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb)
 {
     BDRVQuorumState *s = acb->common.bs->opaque;
 
-    acb->qcrs[acb->child_iter].buf = qemu_blockalign(s->bs[acb->child_iter],
-                                                     acb->qiov->size);
+    acb->qcrs[acb->child_iter].buf =
+        qemu_blockalign(s->children[acb->child_iter]->bs, acb->qiov->size);
     qemu_iovec_init(&acb->qcrs[acb->child_iter].qiov, acb->qiov->niov);
     qemu_iovec_clone(&acb->qcrs[acb->child_iter].qiov, acb->qiov,
                      acb->qcrs[acb->child_iter].buf);
-    bdrv_aio_readv(s->bs[acb->child_iter], acb->sector_num,
+    bdrv_aio_readv(s->children[acb->child_iter]->bs, acb->sector_num,
                    &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors,
                    quorum_aio_cb, &acb->qcrs[acb->child_iter]);
 
@@ -702,8 +703,8 @@ static BlockAIOCB *quorum_aio_writev(BlockDriverState *bs,
     int i;
 
     for (i = 0; i < s->num_children; i++) {
-        acb->qcrs[i].aiocb = bdrv_aio_writev(s->bs[i], sector_num, qiov,
-                                             nb_sectors, &quorum_aio_cb,
+        acb->qcrs[i].aiocb = bdrv_aio_writev(s->children[i]->bs, sector_num,
+                                             qiov, nb_sectors, &quorum_aio_cb,
                                              &acb->qcrs[i]);
     }
 
@@ -717,12 +718,12 @@ static int64_t quorum_getlength(BlockDriverState *bs)
     int i;
 
     /* check that all file have the same length */
-    result = bdrv_getlength(s->bs[0]);
+    result = bdrv_getlength(s->children[0]->bs);
     if (result < 0) {
         return result;
     }
     for (i = 1; i < s->num_children; i++) {
-        int64_t value = bdrv_getlength(s->bs[i]);
+        int64_t value = bdrv_getlength(s->children[i]->bs);
         if (value < 0) {
             return value;
         }
@@ -741,7 +742,7 @@ static void quorum_invalidate_cache(BlockDriverState *bs, Error **errp)
     int i;
 
     for (i = 0; i < s->num_children; i++) {
-        bdrv_invalidate_cache(s->bs[i], &local_err);
+        bdrv_invalidate_cache(s->children[i]->bs, &local_err);
         if (local_err) {
             error_propagate(errp, local_err);
             return;
@@ -762,7 +763,7 @@ static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
     error_votes.compare = quorum_64bits_compare;
 
     for (i = 0; i < s->num_children; i++) {
-        result = bdrv_co_flush(s->bs[i]);
+        result = bdrv_co_flush(s->children[i]->bs);
         result_value.l = result;
         quorum_count_vote(&error_votes, &result_value, i);
     }
@@ -782,7 +783,7 @@ static bool quorum_recurse_is_first_non_filter(BlockDriverState *bs,
     int i;
 
     for (i = 0; i < s->num_children; i++) {
-        bool perm = bdrv_recurse_is_first_non_filter(s->bs[i],
+        bool perm = bdrv_recurse_is_first_non_filter(s->children[i]->bs,
                                                      candidate);
         if (perm) {
             return true;
@@ -922,8 +923,8 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
         }
     }
 
-    /* allocate the children BlockDriverState array */
-    s->bs = g_new0(BlockDriverState *, s->num_children);
+    /* allocate the children array */
+    s->children = g_new0(BdrvChild *, s->num_children);
     opened = g_new0(bool, s->num_children);
 
     for (i = 0; i < s->num_children; i++) {
@@ -931,9 +932,10 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
         ret = snprintf(indexstr, 32, "children.%d", i);
         assert(ret < 32);
 
-        ret = bdrv_open_image(&s->bs[i], NULL, options, indexstr, bs,
-                              &child_format, false, &local_err);
-        if (ret < 0) {
+        s->children[i] = bdrv_open_child(NULL, options, indexstr, bs,
+                                         &child_format, false, &local_err);
+        if (local_err) {
+            ret = -EINVAL;
             goto close_exit;
         }
 
@@ -949,9 +951,9 @@ close_exit:
         if (!opened[i]) {
             continue;
         }
-        bdrv_unref(s->bs[i]);
+        bdrv_unref_child(bs, s->children[i]);
     }
-    g_free(s->bs);
+    g_free(s->children);
     g_free(opened);
 exit:
     qemu_opts_del(opts);
@@ -968,10 +970,10 @@ static void quorum_close(BlockDriverState *bs)
     int i;
 
     for (i = 0; i < s->num_children; i++) {
-        bdrv_unref(s->bs[i]);
+        bdrv_unref_child(bs, s->children[i]);
     }
 
-    g_free(s->bs);
+    g_free(s->children);
 }
 
 static void quorum_detach_aio_context(BlockDriverState *bs)
@@ -980,7 +982,7 @@ static void quorum_detach_aio_context(BlockDriverState *bs)
     int i;
 
     for (i = 0; i < s->num_children; i++) {
-        bdrv_detach_aio_context(s->bs[i]);
+        bdrv_detach_aio_context(s->children[i]->bs);
     }
 }
 
@@ -991,7 +993,7 @@ static void quorum_attach_aio_context(BlockDriverState *bs,
     int i;
 
     for (i = 0; i < s->num_children; i++) {
-        bdrv_attach_aio_context(s->bs[i], new_context);
+        bdrv_attach_aio_context(s->children[i]->bs, new_context);
     }
 }
 
@@ -1003,16 +1005,17 @@ static void quorum_refresh_filename(BlockDriverState *bs)
     int i;
 
     for (i = 0; i < s->num_children; i++) {
-        bdrv_refresh_filename(s->bs[i]);
-        if (!s->bs[i]->full_open_options) {
+        bdrv_refresh_filename(s->children[i]->bs);
+        if (!s->children[i]->bs->full_open_options) {
             return;
         }
     }
 
     children = qlist_new();
     for (i = 0; i < s->num_children; i++) {
-        QINCREF(s->bs[i]->full_open_options);
-        qlist_append_obj(children, QOBJECT(s->bs[i]->full_open_options));
+        QINCREF(s->children[i]->bs->full_open_options);
+        qlist_append_obj(children,
+                         QOBJECT(s->children[i]->bs->full_open_options));
     }
 
     opts = qdict_new();
diff --git a/block/raw-posix.c b/block/raw-posix.c
index cc1b8743ac..3a527f0b6b 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -519,7 +519,16 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
                      "future QEMU versions.\n",
                      bs->filename);
     }
-#endif
+#else
+    if (bdrv_flags & BDRV_O_NATIVE_AIO) {
+        error_printf("WARNING: aio=native was specified for '%s', but "
+                     "is not supported in this build. Falling back to "
+                     "aio=threads.\n"
+                     "         This will become an error condition in "
+                     "future QEMU versions.\n",
+                     bs->filename);
+    }
+#endif /* !defined(CONFIG_LINUX_AIO) */
 
     s->has_discard = true;
     s->has_write_zeroes = true;
diff --git a/block/raw_bsd.c b/block/raw_bsd.c
index e3d2d04681..63ee91164f 100644
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -52,7 +52,7 @@ static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
                                      int nb_sectors, QEMUIOVector *qiov)
 {
     BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
-    return bdrv_co_readv(bs->file, sector_num, nb_sectors, qiov);
+    return bdrv_co_readv(bs->file->bs, sector_num, nb_sectors, qiov);
 }
 
 static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
@@ -75,7 +75,7 @@ static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
             return 0;
         }
 
-        buf = qemu_try_blockalign(bs->file, 512);
+        buf = qemu_try_blockalign(bs->file->bs, 512);
         if (!buf) {
             ret = -ENOMEM;
             goto fail;
@@ -102,7 +102,7 @@ static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
     }
 
     BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
-    ret = bdrv_co_writev(bs->file, sector_num, nb_sectors, qiov);
+    ret = bdrv_co_writev(bs->file->bs, sector_num, nb_sectors, qiov);
 
 fail:
     if (qiov == &local_qiov) {
@@ -125,58 +125,58 @@ static int coroutine_fn raw_co_write_zeroes(BlockDriverState *bs,
                                             int64_t sector_num, int nb_sectors,
                                             BdrvRequestFlags flags)
 {
-    return bdrv_co_write_zeroes(bs->file, sector_num, nb_sectors, flags);
+    return bdrv_co_write_zeroes(bs->file->bs, sector_num, nb_sectors, flags);
 }
 
 static int coroutine_fn raw_co_discard(BlockDriverState *bs,
                                        int64_t sector_num, int nb_sectors)
 {
-    return bdrv_co_discard(bs->file, sector_num, nb_sectors);
+    return bdrv_co_discard(bs->file->bs, sector_num, nb_sectors);
 }
 
 static int64_t raw_getlength(BlockDriverState *bs)
 {
-    return bdrv_getlength(bs->file);
+    return bdrv_getlength(bs->file->bs);
 }
 
 static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
-    return bdrv_get_info(bs->file, bdi);
+    return bdrv_get_info(bs->file->bs, bdi);
 }
 
 static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
 {
-    bs->bl = bs->file->bl;
+    bs->bl = bs->file->bs->bl;
 }
 
 static int raw_truncate(BlockDriverState *bs, int64_t offset)
 {
-    return bdrv_truncate(bs->file, offset);
+    return bdrv_truncate(bs->file->bs, offset);
 }
 
 static int raw_is_inserted(BlockDriverState *bs)
 {
-    return bdrv_is_inserted(bs->file);
+    return bdrv_is_inserted(bs->file->bs);
 }
 
 static int raw_media_changed(BlockDriverState *bs)
 {
-    return bdrv_media_changed(bs->file);
+    return bdrv_media_changed(bs->file->bs);
 }
 
 static void raw_eject(BlockDriverState *bs, bool eject_flag)
 {
-    bdrv_eject(bs->file, eject_flag);
+    bdrv_eject(bs->file->bs, eject_flag);
 }
 
 static void raw_lock_medium(BlockDriverState *bs, bool locked)
 {
-    bdrv_lock_medium(bs->file, locked);
+    bdrv_lock_medium(bs->file->bs, locked);
 }
 
 static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
 {
-    return bdrv_ioctl(bs->file, req, buf);
+    return bdrv_ioctl(bs->file->bs, req, buf);
 }
 
 static BlockAIOCB *raw_aio_ioctl(BlockDriverState *bs,
@@ -184,12 +184,12 @@ static BlockAIOCB *raw_aio_ioctl(BlockDriverState *bs,
                                  BlockCompletionFunc *cb,
                                  void *opaque)
 {
-    return bdrv_aio_ioctl(bs->file, req, buf, cb, opaque);
+    return bdrv_aio_ioctl(bs->file->bs, req, buf, cb, opaque);
 }
 
 static int raw_has_zero_init(BlockDriverState *bs)
 {
-    return bdrv_has_zero_init(bs->file);
+    return bdrv_has_zero_init(bs->file->bs);
 }
 
 static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
@@ -207,7 +207,7 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
 static int raw_open(BlockDriverState *bs, QDict *options, int flags,
                     Error **errp)
 {
-    bs->sg = bs->file->sg;
+    bs->sg = bs->file->bs->sg;
 
     if (bs->probed && !bdrv_is_read_only(bs)) {
         fprintf(stderr,
@@ -217,7 +217,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
                 "raw images, write operations on block 0 will be restricted.\n"
                 "         Specify the 'raw' format explicitly to remove the "
                 "restrictions.\n",
-                bs->file->filename);
+                bs->file->bs->filename);
     }
 
     return 0;
@@ -237,12 +237,12 @@ static int raw_probe(const uint8_t *buf, int buf_size, const char *filename)
 
 static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
 {
-    return bdrv_probe_blocksizes(bs->file, bsz);
+    return bdrv_probe_blocksizes(bs->file->bs, bsz);
 }
 
 static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
 {
-    return bdrv_probe_geometry(bs->file, geo);
+    return bdrv_probe_geometry(bs->file->bs, geo);
 }
 
 BlockDriver bdrv_raw = {
diff --git a/block/snapshot.c b/block/snapshot.c
index 49e143e991..89500f2f18 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -149,7 +149,7 @@ int bdrv_can_snapshot(BlockDriverState *bs)
 
     if (!drv->bdrv_snapshot_create) {
         if (bs->file != NULL) {
-            return bdrv_can_snapshot(bs->file);
+            return bdrv_can_snapshot(bs->file->bs);
         }
         return 0;
     }
@@ -168,7 +168,7 @@ int bdrv_snapshot_create(BlockDriverState *bs,
         return drv->bdrv_snapshot_create(bs, sn_info);
     }
     if (bs->file) {
-        return bdrv_snapshot_create(bs->file, sn_info);
+        return bdrv_snapshot_create(bs->file->bs, sn_info);
     }
     return -ENOTSUP;
 }
@@ -188,10 +188,10 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
 
     if (bs->file) {
         drv->bdrv_close(bs);
-        ret = bdrv_snapshot_goto(bs->file, snapshot_id);
+        ret = bdrv_snapshot_goto(bs->file->bs, snapshot_id);
         open_ret = drv->bdrv_open(bs, NULL, bs->open_flags, NULL);
         if (open_ret < 0) {
-            bdrv_unref(bs->file);
+            bdrv_unref(bs->file->bs);
             bs->drv = NULL;
             return open_ret;
         }
@@ -245,7 +245,7 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
         return drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp);
     }
     if (bs->file) {
-        return bdrv_snapshot_delete(bs->file, snapshot_id, name, errp);
+        return bdrv_snapshot_delete(bs->file->bs, snapshot_id, name, errp);
     }
     error_setg(errp, "Block format '%s' used by device '%s' "
                "does not support internal snapshot deletion",
@@ -283,7 +283,7 @@ int bdrv_snapshot_list(BlockDriverState *bs,
         return drv->bdrv_snapshot_list(bs, psn_info);
     }
     if (bs->file) {
-        return bdrv_snapshot_list(bs->file, psn_info);
+        return bdrv_snapshot_list(bs->file->bs, psn_info);
     }
     return -ENOTSUP;
 }
diff --git a/block/stream.c b/block/stream.c
index ab0bd057f7..3f64fa2245 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -52,34 +52,6 @@ static int coroutine_fn stream_populate(BlockDriverState *bs,
     return bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, &qiov);
 }
 
-static void close_unused_images(BlockDriverState *top, BlockDriverState *base,
-                                const char *base_id)
-{
-    BlockDriverState *intermediate;
-    intermediate = top->backing_hd;
-
-    /* Must assign before bdrv_delete() to prevent traversing dangling pointer
-     * while we delete backing image instances.
-     */
-    bdrv_set_backing_hd(top, base);
-
-    while (intermediate) {
-        BlockDriverState *unused;
-
-        /* reached base */
-        if (intermediate == base) {
-            break;
-        }
-
-        unused = intermediate;
-        intermediate = intermediate->backing_hd;
-        bdrv_set_backing_hd(unused, NULL);
-        bdrv_unref(unused);
-    }
-
-    bdrv_refresh_limits(top, NULL);
-}
-
 typedef struct {
     int ret;
     bool reached_end;
@@ -101,7 +73,7 @@ static void stream_complete(BlockJob *job, void *opaque)
             }
         }
         data->ret = bdrv_change_backing_file(job->bs, base_id, base_fmt);
-        close_unused_images(job->bs, base, base_id);
+        bdrv_set_backing_hd(job->bs, base);
     }
 
     g_free(s->backing_file_str);
@@ -121,7 +93,7 @@ static void coroutine_fn stream_run(void *opaque)
     int n = 0;
     void *buf;
 
-    if (!bs->backing_hd) {
+    if (!bs->backing) {
         block_job_completed(&s->common, 0);
         return;
     }
@@ -166,7 +138,7 @@ wait:
         } else if (ret >= 0) {
             /* Copy if allocated in the intermediate images.  Limit to the
              * known-unallocated area [sector_num, sector_num+n).  */
-            ret = bdrv_is_allocated_above(bs->backing_hd, base,
+            ret = bdrv_is_allocated_above(backing_bs(bs), base,
                                           sector_num, n, &n);
 
             /* Finish early if end of backing file has been reached */
diff --git a/block/vdi.c b/block/vdi.c
index 062a6541f8..17626d4f4e 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -399,7 +399,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
 
     logout("\n");
 
-    ret = bdrv_read(bs->file, 0, (uint8_t *)&header, 1);
+    ret = bdrv_read(bs->file->bs, 0, (uint8_t *)&header, 1);
     if (ret < 0) {
         goto fail;
     }
@@ -490,13 +490,14 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
 
     bmap_size = header.blocks_in_image * sizeof(uint32_t);
     bmap_size = DIV_ROUND_UP(bmap_size, SECTOR_SIZE);
-    s->bmap = qemu_try_blockalign(bs->file, bmap_size * SECTOR_SIZE);
+    s->bmap = qemu_try_blockalign(bs->file->bs, bmap_size * SECTOR_SIZE);
     if (s->bmap == NULL) {
         ret = -ENOMEM;
         goto fail;
     }
 
-    ret = bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap, bmap_size);
+    ret = bdrv_read(bs->file->bs, s->bmap_sector, (uint8_t *)s->bmap,
+                    bmap_size);
     if (ret < 0) {
         goto fail_free_bmap;
     }
@@ -585,7 +586,7 @@ static int vdi_co_read(BlockDriverState *bs,
             uint64_t offset = s->header.offset_data / SECTOR_SIZE +
                               (uint64_t)bmap_entry * s->block_sectors +
                               sector_in_block;
-            ret = bdrv_read(bs->file, offset, buf, n_sectors);
+            ret = bdrv_read(bs->file->bs, offset, buf, n_sectors);
         }
         logout("%u sectors read\n", n_sectors);
 
@@ -653,7 +654,7 @@ static int vdi_co_write(BlockDriverState *bs,
              * acquire the lock and thus the padded cluster is written before
              * the other coroutines can write to the affected area. */
             qemu_co_mutex_lock(&s->write_lock);
-            ret = bdrv_write(bs->file, offset, block, s->block_sectors);
+            ret = bdrv_write(bs->file->bs, offset, block, s->block_sectors);
             qemu_co_mutex_unlock(&s->write_lock);
         } else {
             uint64_t offset = s->header.offset_data / SECTOR_SIZE +
@@ -669,7 +670,7 @@ static int vdi_co_write(BlockDriverState *bs,
              * that that write operation has returned (there may be other writes
              * in flight, but they do not concern this very operation). */
             qemu_co_mutex_unlock(&s->write_lock);
-            ret = bdrv_write(bs->file, offset, buf, n_sectors);
+            ret = bdrv_write(bs->file->bs, offset, buf, n_sectors);
         }
 
         nb_sectors -= n_sectors;
@@ -694,7 +695,7 @@ static int vdi_co_write(BlockDriverState *bs,
         assert(VDI_IS_ALLOCATED(bmap_first));
         *header = s->header;
         vdi_header_to_le(header);
-        ret = bdrv_write(bs->file, 0, block, 1);
+        ret = bdrv_write(bs->file->bs, 0, block, 1);
         g_free(block);
         block = NULL;
 
@@ -712,7 +713,7 @@ static int vdi_co_write(BlockDriverState *bs,
         base = ((uint8_t *)&s->bmap[0]) + bmap_first * SECTOR_SIZE;
         logout("will write %u block map sectors starting from entry %u\n",
                n_sectors, bmap_first);
-        ret = bdrv_write(bs->file, offset, base, n_sectors);
+        ret = bdrv_write(bs->file->bs, offset, base, n_sectors);
     }
 
     return ret;
diff --git a/block/vhdx-log.c b/block/vhdx-log.c
index 47fec63c61..47ae4b1351 100644
--- a/block/vhdx-log.c
+++ b/block/vhdx-log.c
@@ -81,7 +81,7 @@ static int vhdx_log_peek_hdr(BlockDriverState *bs, VHDXLogEntries *log,
 
     offset = log->offset + read;
 
-    ret = bdrv_pread(bs->file, offset, hdr, sizeof(VHDXLogEntryHeader));
+    ret = bdrv_pread(bs->file->bs, offset, hdr, sizeof(VHDXLogEntryHeader));
     if (ret < 0) {
         goto exit;
     }
@@ -141,7 +141,7 @@ static int vhdx_log_read_sectors(BlockDriverState *bs, VHDXLogEntries *log,
         }
         offset = log->offset + read;
 
-        ret = bdrv_pread(bs->file, offset, buffer, VHDX_LOG_SECTOR_SIZE);
+        ret = bdrv_pread(bs->file->bs, offset, buffer, VHDX_LOG_SECTOR_SIZE);
         if (ret < 0) {
             goto exit;
         }
@@ -191,7 +191,8 @@ static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log,
             /* full */
             break;
         }
-        ret = bdrv_pwrite(bs->file, offset, buffer_tmp, VHDX_LOG_SECTOR_SIZE);
+        ret = bdrv_pwrite(bs->file->bs, offset, buffer_tmp,
+                          VHDX_LOG_SECTOR_SIZE);
         if (ret < 0) {
             goto exit;
         }
@@ -353,7 +354,7 @@ static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s,
     }
 
     desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
-    desc_entries = qemu_try_blockalign(bs->file,
+    desc_entries = qemu_try_blockalign(bs->file->bs,
                                        desc_sectors * VHDX_LOG_SECTOR_SIZE);
     if (desc_entries == NULL) {
         ret = -ENOMEM;
@@ -462,7 +463,7 @@ static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc,
 
     /* count is only > 1 if we are writing zeroes */
     for (i = 0; i < count; i++) {
-        ret = bdrv_pwrite_sync(bs->file, file_offset, buffer,
+        ret = bdrv_pwrite_sync(bs->file->bs, file_offset, buffer,
                                VHDX_LOG_SECTOR_SIZE);
         if (ret < 0) {
             goto exit;
@@ -509,7 +510,7 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
         /* if the log shows a FlushedFileOffset larger than our current file
          * size, then that means the file has been truncated / corrupted, and
          * we must refused to open it / use it */
-        if (hdr_tmp.flushed_file_offset > bdrv_getlength(bs->file)) {
+        if (hdr_tmp.flushed_file_offset > bdrv_getlength(bs->file->bs)) {
             ret = -EINVAL;
             goto exit;
         }
@@ -539,12 +540,12 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
                 goto exit;
             }
         }
-        if (bdrv_getlength(bs->file) < desc_entries->hdr.last_file_offset) {
+        if (bdrv_getlength(bs->file->bs) < desc_entries->hdr.last_file_offset) {
             new_file_size = desc_entries->hdr.last_file_offset;
             if (new_file_size % (1024*1024)) {
                 /* round up to nearest 1MB boundary */
                 new_file_size = ((new_file_size >> 20) + 1) << 20;
-                bdrv_truncate(bs->file, new_file_size);
+                bdrv_truncate(bs->file->bs, new_file_size);
             }
         }
         qemu_vfree(desc_entries);
@@ -908,8 +909,8 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
                 .sequence_number     = s->log.sequence,
                 .descriptor_count    = sectors,
                 .reserved            = 0,
-                .flushed_file_offset = bdrv_getlength(bs->file),
-                .last_file_offset    = bdrv_getlength(bs->file),
+                .flushed_file_offset = bdrv_getlength(bs->file->bs),
+                .last_file_offset    = bdrv_getlength(bs->file->bs),
               };
 
     new_hdr.log_guid = header->log_guid;
@@ -940,7 +941,7 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
 
         if (i == 0 && leading_length) {
             /* partial sector at the front of the buffer */
-            ret = bdrv_pread(bs->file, file_offset, merged_sector,
+            ret = bdrv_pread(bs->file->bs, file_offset, merged_sector,
                              VHDX_LOG_SECTOR_SIZE);
             if (ret < 0) {
                 goto exit;
@@ -950,7 +951,7 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
             sector_write = merged_sector;
         } else if (i == sectors - 1 && trailing_length) {
             /* partial sector at the end of the buffer */
-            ret = bdrv_pread(bs->file,
+            ret = bdrv_pread(bs->file->bs,
                             file_offset,
                             merged_sector + trailing_length,
                             VHDX_LOG_SECTOR_SIZE - trailing_length);
diff --git a/block/vhdx.c b/block/vhdx.c
index d3bb1bd9d0..2fe9a5e0cf 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -375,7 +375,7 @@ static int vhdx_update_header(BlockDriverState *bs, BDRVVHDXState *s,
         inactive_header->log_guid = *log_guid;
     }
 
-    ret = vhdx_write_header(bs->file, inactive_header, header_offset, true);
+    ret = vhdx_write_header(bs->file->bs, inactive_header, header_offset, true);
     if (ret < 0) {
         goto exit;
     }
@@ -427,7 +427,8 @@ static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s,
     /* We have to read the whole VHDX_HEADER_SIZE instead of
      * sizeof(VHDXHeader), because the checksum is over the whole
      * region */
-    ret = bdrv_pread(bs->file, VHDX_HEADER1_OFFSET, buffer, VHDX_HEADER_SIZE);
+    ret = bdrv_pread(bs->file->bs, VHDX_HEADER1_OFFSET, buffer,
+                     VHDX_HEADER_SIZE);
     if (ret < 0) {
         goto fail;
     }
@@ -443,7 +444,8 @@ static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s,
         }
     }
 
-    ret = bdrv_pread(bs->file, VHDX_HEADER2_OFFSET, buffer, VHDX_HEADER_SIZE);
+    ret = bdrv_pread(bs->file->bs, VHDX_HEADER2_OFFSET, buffer,
+                     VHDX_HEADER_SIZE);
     if (ret < 0) {
         goto fail;
     }
@@ -516,7 +518,7 @@ static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s)
      * whole block */
     buffer = qemu_blockalign(bs, VHDX_HEADER_BLOCK_SIZE);
 
-    ret = bdrv_pread(bs->file, VHDX_REGION_TABLE_OFFSET, buffer,
+    ret = bdrv_pread(bs->file->bs, VHDX_REGION_TABLE_OFFSET, buffer,
                      VHDX_HEADER_BLOCK_SIZE);
     if (ret < 0) {
         goto fail;
@@ -629,7 +631,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
 
     buffer = qemu_blockalign(bs, VHDX_METADATA_TABLE_MAX_SIZE);
 
-    ret = bdrv_pread(bs->file, s->metadata_rt.file_offset, buffer,
+    ret = bdrv_pread(bs->file->bs, s->metadata_rt.file_offset, buffer,
                      VHDX_METADATA_TABLE_MAX_SIZE);
     if (ret < 0) {
         goto exit;
@@ -732,7 +734,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
         goto exit;
     }
 
-    ret = bdrv_pread(bs->file,
+    ret = bdrv_pread(bs->file->bs,
                      s->metadata_entries.file_parameters_entry.offset
                                          + s->metadata_rt.file_offset,
                      &s->params,
@@ -767,7 +769,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
     /* determine virtual disk size, logical sector size,
      * and phys sector size */
 
-    ret = bdrv_pread(bs->file,
+    ret = bdrv_pread(bs->file->bs,
                      s->metadata_entries.virtual_disk_size_entry.offset
                                            + s->metadata_rt.file_offset,
                      &s->virtual_disk_size,
@@ -775,7 +777,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
     if (ret < 0) {
         goto exit;
     }
-    ret = bdrv_pread(bs->file,
+    ret = bdrv_pread(bs->file->bs,
                      s->metadata_entries.logical_sector_size_entry.offset
                                              + s->metadata_rt.file_offset,
                      &s->logical_sector_size,
@@ -783,7 +785,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
     if (ret < 0) {
         goto exit;
     }
-    ret = bdrv_pread(bs->file,
+    ret = bdrv_pread(bs->file->bs,
                      s->metadata_entries.phys_sector_size_entry.offset
                                           + s->metadata_rt.file_offset,
                      &s->physical_sector_size,
@@ -906,7 +908,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
     QLIST_INIT(&s->regions);
 
     /* validate the file signature */
-    ret = bdrv_pread(bs->file, 0, &signature, sizeof(uint64_t));
+    ret = bdrv_pread(bs->file->bs, 0, &signature, sizeof(uint64_t));
     if (ret < 0) {
         goto fail;
     }
@@ -959,13 +961,13 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     /* s->bat is freed in vhdx_close() */
-    s->bat = qemu_try_blockalign(bs->file, s->bat_rt.length);
+    s->bat = qemu_try_blockalign(bs->file->bs, s->bat_rt.length);
     if (s->bat == NULL) {
         ret = -ENOMEM;
         goto fail;
     }
 
-    ret = bdrv_pread(bs->file, s->bat_offset, s->bat, s->bat_rt.length);
+    ret = bdrv_pread(bs->file->bs, s->bat_offset, s->bat, s->bat_rt.length);
     if (ret < 0) {
         goto fail;
     }
@@ -1118,7 +1120,7 @@ static coroutine_fn int vhdx_co_readv(BlockDriverState *bs, int64_t sector_num,
                 break;
             case PAYLOAD_BLOCK_FULLY_PRESENT:
                 qemu_co_mutex_unlock(&s->lock);
-                ret = bdrv_co_readv(bs->file,
+                ret = bdrv_co_readv(bs->file->bs,
                                     sinfo.file_offset >> BDRV_SECTOR_BITS,
                                     sinfo.sectors_avail, &hd_qiov);
                 qemu_co_mutex_lock(&s->lock);
@@ -1156,12 +1158,12 @@ exit:
 static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
                                     uint64_t *new_offset)
 {
-    *new_offset = bdrv_getlength(bs->file);
+    *new_offset = bdrv_getlength(bs->file->bs);
 
     /* per the spec, the address for a block is in units of 1MB */
     *new_offset = ROUND_UP(*new_offset, 1024 * 1024);
 
-    return bdrv_truncate(bs->file, *new_offset + s->block_size);
+    return bdrv_truncate(bs->file->bs, *new_offset + s->block_size);
 }
 
 /*
@@ -1260,7 +1262,7 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
                 /* Queue another write of zero buffers if the underlying file
                  * does not zero-fill on file extension */
 
-                if (bdrv_has_zero_init(bs->file) == 0) {
+                if (bdrv_has_zero_init(bs->file->bs) == 0) {
                     use_zero_buffers = true;
 
                     /* zero fill the front, if any */
@@ -1327,7 +1329,7 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
                 }
                 /* block exists, so we can just overwrite it */
                 qemu_co_mutex_unlock(&s->lock);
-                ret = bdrv_co_writev(bs->file,
+                ret = bdrv_co_writev(bs->file->bs,
                                     sinfo.file_offset >> BDRV_SECTOR_BITS,
                                     sectors_to_write, &hd_qiov);
                 qemu_co_mutex_lock(&s->lock);
diff --git a/block/vmdk.c b/block/vmdk.c
index be0d6401af..0effb7d91c 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -87,7 +87,7 @@ typedef struct {
 #define L2_CACHE_SIZE 16
 
 typedef struct VmdkExtent {
-    BlockDriverState *file;
+    BdrvChild *file;
     bool flat;
     bool compressed;
     bool has_marker;
@@ -222,7 +222,7 @@ static void vmdk_free_extents(BlockDriverState *bs)
         g_free(e->l1_backup_table);
         g_free(e->type);
         if (e->file != bs->file) {
-            bdrv_unref(e->file);
+            bdrv_unref_child(bs, e->file);
         }
     }
     g_free(s->extents);
@@ -248,7 +248,7 @@ static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
     BDRVVmdkState *s = bs->opaque;
     int ret;
 
-    ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
+    ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
     if (ret < 0) {
         return 0;
     }
@@ -278,7 +278,7 @@ static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
     BDRVVmdkState *s = bs->opaque;
     int ret;
 
-    ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
+    ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
     if (ret < 0) {
         return ret;
     }
@@ -297,7 +297,7 @@ static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
         pstrcat(desc, sizeof(desc), tmp_desc);
     }
 
-    ret = bdrv_pwrite_sync(bs->file, s->desc_offset, desc, DESC_SIZE);
+    ret = bdrv_pwrite_sync(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
     if (ret < 0) {
         return ret;
     }
@@ -308,10 +308,11 @@ static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
 static int vmdk_is_cid_valid(BlockDriverState *bs)
 {
     BDRVVmdkState *s = bs->opaque;
-    BlockDriverState *p_bs = bs->backing_hd;
     uint32_t cur_pcid;
 
-    if (!s->cid_checked && p_bs) {
+    if (!s->cid_checked && bs->backing) {
+        BlockDriverState *p_bs = bs->backing->bs;
+
         cur_pcid = vmdk_read_cid(p_bs, 0);
         if (s->parent_cid != cur_pcid) {
             /* CID not valid */
@@ -340,7 +341,7 @@ static int vmdk_parent_open(BlockDriverState *bs)
     int ret;
 
     desc[DESC_SIZE] = '\0';
-    ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
+    ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
     if (ret < 0) {
         return ret;
     }
@@ -367,7 +368,7 @@ static int vmdk_parent_open(BlockDriverState *bs)
 /* Create and append extent to the extent array. Return the added VmdkExtent
  * address. return NULL if allocation failed. */
 static int vmdk_add_extent(BlockDriverState *bs,
-                           BlockDriverState *file, bool flat, int64_t sectors,
+                           BdrvChild *file, bool flat, int64_t sectors,
                            int64_t l1_offset, int64_t l1_backup_offset,
                            uint32_t l1_size,
                            int l2_size, uint64_t cluster_sectors,
@@ -392,7 +393,7 @@ static int vmdk_add_extent(BlockDriverState *bs,
         return -EFBIG;
     }
 
-    nb_sectors = bdrv_nb_sectors(file);
+    nb_sectors = bdrv_nb_sectors(file->bs);
     if (nb_sectors < 0) {
         return nb_sectors;
     }
@@ -439,14 +440,14 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
         return -ENOMEM;
     }
 
-    ret = bdrv_pread(extent->file,
+    ret = bdrv_pread(extent->file->bs,
                      extent->l1_table_offset,
                      extent->l1_table,
                      l1_size);
     if (ret < 0) {
         error_setg_errno(errp, -ret,
                          "Could not read l1 table from extent '%s'",
-                         extent->file->filename);
+                         extent->file->bs->filename);
         goto fail_l1;
     }
     for (i = 0; i < extent->l1_size; i++) {
@@ -459,14 +460,14 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
             ret = -ENOMEM;
             goto fail_l1;
         }
-        ret = bdrv_pread(extent->file,
+        ret = bdrv_pread(extent->file->bs,
                          extent->l1_backup_table_offset,
                          extent->l1_backup_table,
                          l1_size);
         if (ret < 0) {
             error_setg_errno(errp, -ret,
                              "Could not read l1 backup table from extent '%s'",
-                             extent->file->filename);
+                             extent->file->bs->filename);
             goto fail_l1b;
         }
         for (i = 0; i < extent->l1_size; i++) {
@@ -485,7 +486,7 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
 }
 
 static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
-                                 BlockDriverState *file,
+                                 BdrvChild *file,
                                  int flags, Error **errp)
 {
     int ret;
@@ -493,11 +494,11 @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
     VMDK3Header header;
     VmdkExtent *extent;
 
-    ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
+    ret = bdrv_pread(file->bs, sizeof(magic), &header, sizeof(header));
     if (ret < 0) {
         error_setg_errno(errp, -ret,
                          "Could not read header from file '%s'",
-                         file->filename);
+                         file->bs->filename);
         return ret;
     }
     ret = vmdk_add_extent(bs, file, false,
@@ -559,7 +560,7 @@ static char *vmdk_read_desc(BlockDriverState *file, uint64_t desc_offset,
 }
 
 static int vmdk_open_vmdk4(BlockDriverState *bs,
-                           BlockDriverState *file,
+                           BdrvChild *file,
                            int flags, QDict *options, Error **errp)
 {
     int ret;
@@ -570,17 +571,17 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
     BDRVVmdkState *s = bs->opaque;
     int64_t l1_backup_offset = 0;
 
-    ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
+    ret = bdrv_pread(file->bs, sizeof(magic), &header, sizeof(header));
     if (ret < 0) {
         error_setg_errno(errp, -ret,
                          "Could not read header from file '%s'",
-                         file->filename);
+                         file->bs->filename);
         return -EINVAL;
     }
     if (header.capacity == 0) {
         uint64_t desc_offset = le64_to_cpu(header.desc_offset);
         if (desc_offset) {
-            char *buf = vmdk_read_desc(file, desc_offset << 9, errp);
+            char *buf = vmdk_read_desc(file->bs, desc_offset << 9, errp);
             if (!buf) {
                 return -EINVAL;
             }
@@ -620,8 +621,8 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
             } QEMU_PACKED eos_marker;
         } QEMU_PACKED footer;
 
-        ret = bdrv_pread(file,
-            bs->file->total_sectors * 512 - 1536,
+        ret = bdrv_pread(file->bs,
+            bs->file->bs->total_sectors * 512 - 1536,
             &footer, sizeof(footer));
         if (ret < 0) {
             error_setg_errno(errp, -ret, "Failed to read footer");
@@ -675,7 +676,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
     if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) {
         l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9;
     }
-    if (bdrv_nb_sectors(file) < le64_to_cpu(header.grain_offset)) {
+    if (bdrv_nb_sectors(file->bs) < le64_to_cpu(header.grain_offset)) {
         error_setg(errp, "File truncated, expecting at least %" PRId64 " bytes",
                    (int64_t)(le64_to_cpu(header.grain_offset)
                              * BDRV_SECTOR_SIZE));
@@ -739,8 +740,7 @@ static int vmdk_parse_description(const char *desc, const char *opt_name,
 }
 
 /* Open an extent file and append to bs array */
-static int vmdk_open_sparse(BlockDriverState *bs,
-                            BlockDriverState *file, int flags,
+static int vmdk_open_sparse(BlockDriverState *bs, BdrvChild *file, int flags,
                             char *buf, QDict *options, Error **errp)
 {
     uint32_t magic;
@@ -773,10 +773,11 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
     int64_t sectors = 0;
     int64_t flat_offset;
     char *extent_path;
-    BlockDriverState *extent_file;
+    BdrvChild *extent_file;
     BDRVVmdkState *s = bs->opaque;
     VmdkExtent *extent;
     char extent_opt_prefix[32];
+    Error *local_err = NULL;
 
     while (*p) {
         /* parse extent line in one of below formats:
@@ -819,22 +820,22 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
             !desc_file_path[0])
         {
             error_setg(errp, "Cannot use relative extent paths with VMDK "
-                       "descriptor file '%s'", bs->file->filename);
+                       "descriptor file '%s'", bs->file->bs->filename);
             return -EINVAL;
         }
 
         extent_path = g_malloc0(PATH_MAX);
         path_combine(extent_path, PATH_MAX, desc_file_path, fname);
-        extent_file = NULL;
 
         ret = snprintf(extent_opt_prefix, 32, "extents.%d", s->num_extents);
         assert(ret < 32);
 
-        ret = bdrv_open_image(&extent_file, extent_path, options,
-                              extent_opt_prefix, bs, &child_file, false, errp);
+        extent_file = bdrv_open_child(extent_path, options, extent_opt_prefix,
+                                      bs, &child_file, false, &local_err);
         g_free(extent_path);
-        if (ret) {
-            return ret;
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return -EINVAL;
         }
 
         /* save to extents array */
@@ -844,13 +845,13 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
             ret = vmdk_add_extent(bs, extent_file, true, sectors,
                             0, 0, 0, 0, 0, &extent, errp);
             if (ret < 0) {
-                bdrv_unref(extent_file);
+                bdrv_unref_child(bs, extent_file);
                 return ret;
             }
             extent->flat_start_offset = flat_offset << 9;
         } else if (!strcmp(type, "SPARSE") || !strcmp(type, "VMFSSPARSE")) {
             /* SPARSE extent and VMFSSPARSE extent are both "COWD" sparse file*/
-            char *buf = vmdk_read_desc(extent_file, 0, errp);
+            char *buf = vmdk_read_desc(extent_file->bs, 0, errp);
             if (!buf) {
                 ret = -EINVAL;
             } else {
@@ -859,13 +860,13 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
             }
             g_free(buf);
             if (ret) {
-                bdrv_unref(extent_file);
+                bdrv_unref_child(bs, extent_file);
                 return ret;
             }
             extent = &s->extents[s->num_extents - 1];
         } else {
             error_setg(errp, "Unsupported extent type '%s'", type);
-            bdrv_unref(extent_file);
+            bdrv_unref_child(bs, extent_file);
             return -ENOTSUP;
         }
         extent->type = g_strdup(type);
@@ -905,7 +906,8 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
     }
     s->create_type = g_strdup(ct);
     s->desc_offset = 0;
-    ret = vmdk_parse_extents(buf, bs, bs->file->exact_filename, options, errp);
+    ret = vmdk_parse_extents(buf, bs, bs->file->bs->exact_filename, options,
+                             errp);
 exit:
     return ret;
 }
@@ -918,7 +920,7 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
     BDRVVmdkState *s = bs->opaque;
     uint32_t magic;
 
-    buf = vmdk_read_desc(bs->file, 0, errp);
+    buf = vmdk_read_desc(bs->file->bs, 0, errp);
     if (!buf) {
         return -EINVAL;
     }
@@ -927,7 +929,8 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
     switch (magic) {
         case VMDK3_MAGIC:
         case VMDK4_MAGIC:
-            ret = vmdk_open_sparse(bs, bs->file, flags, buf, options, errp);
+            ret = vmdk_open_sparse(bs, bs->file, flags, buf, options,
+                                   errp);
             s->desc_offset = 0x200;
             break;
         default:
@@ -1004,7 +1007,7 @@ static int get_whole_cluster(BlockDriverState *bs,
     cluster_bytes = extent->cluster_sectors << BDRV_SECTOR_BITS;
     whole_grain = qemu_blockalign(bs, cluster_bytes);
 
-    if (!bs->backing_hd) {
+    if (!bs->backing) {
         memset(whole_grain, 0,  skip_start_sector << BDRV_SECTOR_BITS);
         memset(whole_grain + (skip_end_sector << BDRV_SECTOR_BITS), 0,
                cluster_bytes - (skip_end_sector << BDRV_SECTOR_BITS));
@@ -1013,22 +1016,22 @@ static int get_whole_cluster(BlockDriverState *bs,
     assert(skip_end_sector <= extent->cluster_sectors);
     /* we will be here if it's first write on non-exist grain(cluster).
      * try to read from parent image, if exist */
-    if (bs->backing_hd && !vmdk_is_cid_valid(bs)) {
+    if (bs->backing && !vmdk_is_cid_valid(bs)) {
         ret = VMDK_ERROR;
         goto exit;
     }
 
     /* Read backing data before skip range */
     if (skip_start_sector > 0) {
-        if (bs->backing_hd) {
-            ret = bdrv_read(bs->backing_hd, sector_num,
+        if (bs->backing) {
+            ret = bdrv_read(bs->backing->bs, sector_num,
                             whole_grain, skip_start_sector);
             if (ret < 0) {
                 ret = VMDK_ERROR;
                 goto exit;
             }
         }
-        ret = bdrv_write(extent->file, cluster_sector_num, whole_grain,
+        ret = bdrv_write(extent->file->bs, cluster_sector_num, whole_grain,
                          skip_start_sector);
         if (ret < 0) {
             ret = VMDK_ERROR;
@@ -1037,8 +1040,8 @@ static int get_whole_cluster(BlockDriverState *bs,
     }
     /* Read backing data after skip range */
     if (skip_end_sector < extent->cluster_sectors) {
-        if (bs->backing_hd) {
-            ret = bdrv_read(bs->backing_hd, sector_num + skip_end_sector,
+        if (bs->backing) {
+            ret = bdrv_read(bs->backing->bs, sector_num + skip_end_sector,
                             whole_grain + (skip_end_sector << BDRV_SECTOR_BITS),
                             extent->cluster_sectors - skip_end_sector);
             if (ret < 0) {
@@ -1046,7 +1049,7 @@ static int get_whole_cluster(BlockDriverState *bs,
                 goto exit;
             }
         }
-        ret = bdrv_write(extent->file, cluster_sector_num + skip_end_sector,
+        ret = bdrv_write(extent->file->bs, cluster_sector_num + skip_end_sector,
                          whole_grain + (skip_end_sector << BDRV_SECTOR_BITS),
                          extent->cluster_sectors - skip_end_sector);
         if (ret < 0) {
@@ -1066,7 +1069,7 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
     offset = cpu_to_le32(offset);
     /* update L2 table */
     if (bdrv_pwrite_sync(
-                extent->file,
+                extent->file->bs,
                 ((int64_t)m_data->l2_offset * 512)
                     + (m_data->l2_index * sizeof(offset)),
                 &offset, sizeof(offset)) < 0) {
@@ -1076,7 +1079,7 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
     if (extent->l1_backup_table_offset != 0) {
         m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
         if (bdrv_pwrite_sync(
-                    extent->file,
+                    extent->file->bs,
                     ((int64_t)m_data->l2_offset * 512)
                         + (m_data->l2_index * sizeof(offset)),
                     &offset, sizeof(offset)) < 0) {
@@ -1166,7 +1169,7 @@ static int get_cluster_offset(BlockDriverState *bs,
     }
     l2_table = extent->l2_cache + (min_index * extent->l2_size);
     if (bdrv_pread(
-                extent->file,
+                extent->file->bs,
                 (int64_t)l2_offset * 512,
                 l2_table,
                 extent->l2_size * sizeof(uint32_t)
@@ -1320,7 +1323,7 @@ static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
         write_len = buf_len + sizeof(VmdkGrainMarker);
     }
     write_offset = cluster_offset + offset_in_cluster,
-    ret = bdrv_pwrite(extent->file, write_offset, write_buf, write_len);
+    ret = bdrv_pwrite(extent->file->bs, write_offset, write_buf, write_len);
 
     write_end_sector = DIV_ROUND_UP(write_offset + write_len, BDRV_SECTOR_SIZE);
 
@@ -1355,7 +1358,7 @@ static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
 
 
     if (!extent->compressed) {
-        ret = bdrv_pread(extent->file,
+        ret = bdrv_pread(extent->file->bs,
                           cluster_offset + offset_in_cluster,
                           buf, nb_sectors * 512);
         if (ret == nb_sectors * 512) {
@@ -1369,7 +1372,7 @@ static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
     buf_bytes = cluster_bytes * 2;
     cluster_buf = g_malloc(buf_bytes);
     uncomp_buf = g_malloc(cluster_bytes);
-    ret = bdrv_pread(extent->file,
+    ret = bdrv_pread(extent->file->bs,
                 cluster_offset,
                 cluster_buf, buf_bytes);
     if (ret < 0) {
@@ -1431,11 +1434,11 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
         }
         if (ret != VMDK_OK) {
             /* if not allocated, try to read from parent image, if exist */
-            if (bs->backing_hd && ret != VMDK_ZEROED) {
+            if (bs->backing && ret != VMDK_ZEROED) {
                 if (!vmdk_is_cid_valid(bs)) {
                     return -EINVAL;
                 }
-                ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
+                ret = bdrv_read(bs->backing->bs, sector_num, buf, n);
                 if (ret < 0) {
                     return ret;
                 }
@@ -2035,7 +2038,7 @@ static coroutine_fn int vmdk_co_flush(BlockDriverState *bs)
     int ret = 0;
 
     for (i = 0; i < s->num_extents; i++) {
-        err = bdrv_co_flush(s->extents[i].file);
+        err = bdrv_co_flush(s->extents[i].file->bs);
         if (err < 0) {
             ret = err;
         }
@@ -2050,7 +2053,7 @@ static int64_t vmdk_get_allocated_file_size(BlockDriverState *bs)
     int64_t r;
     BDRVVmdkState *s = bs->opaque;
 
-    ret = bdrv_get_allocated_file_size(bs->file);
+    ret = bdrv_get_allocated_file_size(bs->file->bs);
     if (ret < 0) {
         return ret;
     }
@@ -2058,7 +2061,7 @@ static int64_t vmdk_get_allocated_file_size(BlockDriverState *bs)
         if (s->extents[i].file == bs->file) {
             continue;
         }
-        r = bdrv_get_allocated_file_size(s->extents[i].file);
+        r = bdrv_get_allocated_file_size(s->extents[i].file->bs);
         if (r < 0) {
             return r;
         }
@@ -2076,7 +2079,7 @@ static int vmdk_has_zero_init(BlockDriverState *bs)
      * return 0. */
     for (i = 0; i < s->num_extents; i++) {
         if (s->extents[i].flat) {
-            if (!bdrv_has_zero_init(s->extents[i].file)) {
+            if (!bdrv_has_zero_init(s->extents[i].file->bs)) {
                 return 0;
             }
         }
@@ -2089,7 +2092,7 @@ static ImageInfo *vmdk_get_extent_info(VmdkExtent *extent)
     ImageInfo *info = g_new0(ImageInfo, 1);
 
     *info = (ImageInfo){
-        .filename         = g_strdup(extent->file->filename),
+        .filename         = g_strdup(extent->file->bs->filename),
         .format           = g_strdup(extent->type),
         .virtual_size     = extent->sectors * BDRV_SECTOR_SIZE,
         .compressed       = extent->compressed,
@@ -2135,7 +2138,9 @@ static int vmdk_check(BlockDriverState *bs, BdrvCheckResult *result,
                     PRId64 "\n", sector_num);
             break;
         }
-        if (ret == VMDK_OK && cluster_offset >= bdrv_getlength(extent->file)) {
+        if (ret == VMDK_OK &&
+            cluster_offset >= bdrv_getlength(extent->file->bs))
+        {
             fprintf(stderr,
                     "ERROR: cluster offset for sector %"
                     PRId64 " points after EOF\n", sector_num);
@@ -2211,7 +2216,7 @@ static void vmdk_detach_aio_context(BlockDriverState *bs)
     int i;
 
     for (i = 0; i < s->num_extents; i++) {
-        bdrv_detach_aio_context(s->extents[i].file);
+        bdrv_detach_aio_context(s->extents[i].file->bs);
     }
 }
 
@@ -2222,7 +2227,7 @@ static void vmdk_attach_aio_context(BlockDriverState *bs,
     int i;
 
     for (i = 0; i < s->num_extents; i++) {
-        bdrv_attach_aio_context(s->extents[i].file, new_context);
+        bdrv_attach_aio_context(s->extents[i].file->bs, new_context);
     }
 }
 
diff --git a/block/vpc.c b/block/vpc.c
index 2b3b518d1c..299d373092 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -172,14 +172,14 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
     int disk_type = VHD_DYNAMIC;
     int ret;
 
-    ret = bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE);
+    ret = bdrv_pread(bs->file->bs, 0, s->footer_buf, HEADER_SIZE);
     if (ret < 0) {
         goto fail;
     }
 
     footer = (VHDFooter *) s->footer_buf;
     if (strncmp(footer->creator, "conectix", 8)) {
-        int64_t offset = bdrv_getlength(bs->file);
+        int64_t offset = bdrv_getlength(bs->file->bs);
         if (offset < 0) {
             ret = offset;
             goto fail;
@@ -189,7 +189,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
         }
 
         /* If a fixed disk, the footer is found only at the end of the file */
-        ret = bdrv_pread(bs->file, offset-HEADER_SIZE, s->footer_buf,
+        ret = bdrv_pread(bs->file->bs, offset-HEADER_SIZE, s->footer_buf,
                          HEADER_SIZE);
         if (ret < 0) {
             goto fail;
@@ -232,7 +232,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     if (disk_type == VHD_DYNAMIC) {
-        ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf,
+        ret = bdrv_pread(bs->file->bs, be64_to_cpu(footer->data_offset), buf,
                          HEADER_SIZE);
         if (ret < 0) {
             goto fail;
@@ -280,7 +280,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
 
         pagetable_size = (uint64_t) s->max_table_entries * 4;
 
-        s->pagetable = qemu_try_blockalign(bs->file, pagetable_size);
+        s->pagetable = qemu_try_blockalign(bs->file->bs, pagetable_size);
         if (s->pagetable == NULL) {
             ret = -ENOMEM;
             goto fail;
@@ -288,7 +288,8 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
 
         s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);
 
-        ret = bdrv_pread(bs->file, s->bat_offset, s->pagetable, pagetable_size);
+        ret = bdrv_pread(bs->file->bs, s->bat_offset, s->pagetable,
+                         pagetable_size);
         if (ret < 0) {
             goto fail;
         }
@@ -308,7 +309,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
             }
         }
 
-        if (s->free_data_block_offset > bdrv_getlength(bs->file)) {
+        if (s->free_data_block_offset > bdrv_getlength(bs->file->bs)) {
             error_setg(errp, "block-vpc: free_data_block_offset points after "
                              "the end of file. The image has been truncated.");
             ret = -EINVAL;
@@ -383,7 +384,7 @@ static inline int64_t get_sector_offset(BlockDriverState *bs,
 
         s->last_bitmap_offset = bitmap_offset;
         memset(bitmap, 0xff, s->bitmap_size);
-        bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
+        bdrv_pwrite_sync(bs->file->bs, bitmap_offset, bitmap, s->bitmap_size);
     }
 
     return block_offset;
@@ -401,7 +402,7 @@ static int rewrite_footer(BlockDriverState* bs)
     BDRVVPCState *s = bs->opaque;
     int64_t offset = s->free_data_block_offset;
 
-    ret = bdrv_pwrite_sync(bs->file, offset, s->footer_buf, HEADER_SIZE);
+    ret = bdrv_pwrite_sync(bs->file->bs, offset, s->footer_buf, HEADER_SIZE);
     if (ret < 0)
         return ret;
 
@@ -436,7 +437,7 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
 
     // Initialize the block's bitmap
     memset(bitmap, 0xff, s->bitmap_size);
-    ret = bdrv_pwrite_sync(bs->file, s->free_data_block_offset, bitmap,
+    ret = bdrv_pwrite_sync(bs->file->bs, s->free_data_block_offset, bitmap,
         s->bitmap_size);
     if (ret < 0) {
         return ret;
@@ -451,7 +452,7 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
     // Write BAT entry to disk
     bat_offset = s->bat_offset + (4 * index);
     bat_value = cpu_to_be32(s->pagetable[index]);
-    ret = bdrv_pwrite_sync(bs->file, bat_offset, &bat_value, 4);
+    ret = bdrv_pwrite_sync(bs->file->bs, bat_offset, &bat_value, 4);
     if (ret < 0)
         goto fail;
 
@@ -485,7 +486,7 @@ static int vpc_read(BlockDriverState *bs, int64_t sector_num,
     VHDFooter *footer = (VHDFooter *) s->footer_buf;
 
     if (be32_to_cpu(footer->type) == VHD_FIXED) {
-        return bdrv_read(bs->file, sector_num, buf, nb_sectors);
+        return bdrv_read(bs->file->bs, sector_num, buf, nb_sectors);
     }
     while (nb_sectors > 0) {
         offset = get_sector_offset(bs, sector_num, 0);
@@ -499,7 +500,7 @@ static int vpc_read(BlockDriverState *bs, int64_t sector_num,
         if (offset == -1) {
             memset(buf, 0, sectors * BDRV_SECTOR_SIZE);
         } else {
-            ret = bdrv_pread(bs->file, offset, buf,
+            ret = bdrv_pread(bs->file->bs, offset, buf,
                 sectors * BDRV_SECTOR_SIZE);
             if (ret != sectors * BDRV_SECTOR_SIZE) {
                 return -1;
@@ -534,7 +535,7 @@ static int vpc_write(BlockDriverState *bs, int64_t sector_num,
     VHDFooter *footer =  (VHDFooter *) s->footer_buf;
 
     if (be32_to_cpu(footer->type) == VHD_FIXED) {
-        return bdrv_write(bs->file, sector_num, buf, nb_sectors);
+        return bdrv_write(bs->file->bs, sector_num, buf, nb_sectors);
     }
     while (nb_sectors > 0) {
         offset = get_sector_offset(bs, sector_num, 1);
@@ -551,7 +552,8 @@ static int vpc_write(BlockDriverState *bs, int64_t sector_num,
                 return -1;
         }
 
-        ret = bdrv_pwrite(bs->file, offset, buf, sectors * BDRV_SECTOR_SIZE);
+        ret = bdrv_pwrite(bs->file->bs, offset, buf,
+                          sectors * BDRV_SECTOR_SIZE);
         if (ret != sectors * BDRV_SECTOR_SIZE) {
             return -1;
         }
@@ -878,7 +880,7 @@ static int vpc_has_zero_init(BlockDriverState *bs)
     VHDFooter *footer =  (VHDFooter *) s->footer_buf;
 
     if (be32_to_cpu(footer->type) == VHD_FIXED) {
-        return bdrv_has_zero_init(bs->file);
+        return bdrv_has_zero_init(bs->file->bs);
     } else {
         return 1;
     }
diff --git a/block/vvfat.c b/block/vvfat.c
index 7ddc962436..b184eca6fc 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -985,12 +985,6 @@ static BDRVVVFATState *vvv = NULL;
 static int enable_write_target(BDRVVVFATState *s, Error **errp);
 static int is_consistent(BDRVVVFATState *s);
 
-static void vvfat_rebind(BlockDriverState *bs)
-{
-    BDRVVVFATState *s = bs->opaque;
-    s->bs = bs;
-}
-
 static QemuOptsList runtime_opts = {
     .name = "vvfat",
     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
@@ -2923,6 +2917,7 @@ static BlockDriver vvfat_write_target = {
 static int enable_write_target(BDRVVVFATState *s, Error **errp)
 {
     BlockDriver *bdrv_qcow = NULL;
+    BlockDriverState *backing;
     QemuOpts *opts = NULL;
     int ret;
     int size = sector2cluster(s, s->sector_count);
@@ -2971,10 +2966,13 @@ static int enable_write_target(BDRVVVFATState *s, Error **errp)
     unlink(s->qcow_filename);
 #endif
 
-    bdrv_set_backing_hd(s->bs, bdrv_new());
-    s->bs->backing_hd->drv = &vvfat_write_target;
-    s->bs->backing_hd->opaque = g_new(void *, 1);
-    *(void**)s->bs->backing_hd->opaque = s;
+    backing = bdrv_new();
+    bdrv_set_backing_hd(s->bs, backing);
+    bdrv_unref(backing);
+
+    s->bs->backing->bs->drv = &vvfat_write_target;
+    s->bs->backing->bs->opaque = g_new(void *, 1);
+    *(void**)s->bs->backing->bs->opaque = s;
 
     return 0;
 
@@ -3008,7 +3006,6 @@ static BlockDriver bdrv_vvfat = {
     .bdrv_parse_filename    = vvfat_parse_filename,
     .bdrv_file_open         = vvfat_open,
     .bdrv_close             = vvfat_close,
-    .bdrv_rebind            = vvfat_rebind,
 
     .bdrv_read              = vvfat_co_read,
     .bdrv_write             = vvfat_co_write,
diff --git a/blockdev.c b/blockdev.c
index 32b04b478c..8141b6b3da 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -411,7 +411,6 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
         bdrv_flags |= BDRV_O_NO_FLUSH;
     }
 
-#ifdef CONFIG_LINUX_AIO
     if ((buf = qemu_opt_get(opts, "aio")) != NULL) {
         if (!strcmp(buf, "native")) {
             bdrv_flags |= BDRV_O_NATIVE_AIO;
@@ -422,7 +421,6 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
            goto early_err;
         }
     }
-#endif
 
     if ((buf = qemu_opt_get(opts, "format")) != NULL) {
         if (is_help_option(buf)) {
@@ -1546,7 +1544,7 @@ static void external_snapshot_commit(BlkTransactionState *common)
     /* We don't need (or want) to use the transactional
      * bdrv_reopen_multiple() across all the entries at once, because we
      * don't want to abort all of them if one of them fails the reopen */
-    bdrv_reopen(state->new_bs, state->new_bs->open_flags & ~BDRV_O_RDWR,
+    bdrv_reopen(state->old_bs, state->old_bs->open_flags & ~BDRV_O_RDWR,
                 NULL);
 
     aio_context_release(state->aio_context);
@@ -2508,7 +2506,7 @@ void qmp_drive_backup(const char *device, const char *target,
     /* See if we have a backing HD we can use to create our new image
      * on top of. */
     if (sync == MIRROR_SYNC_MODE_TOP) {
-        source = bs->backing_hd;
+        source = backing_bs(bs);
         if (!source) {
             sync = MIRROR_SYNC_MODE_FULL;
         }
@@ -2716,7 +2714,7 @@ void qmp_drive_mirror(const char *device, const char *target,
     }
 
     flags = bs->open_flags | BDRV_O_RDWR;
-    source = bs->backing_hd;
+    source = backing_bs(bs);
     if (!source && sync == MIRROR_SYNC_MODE_TOP) {
         sync = MIRROR_SYNC_MODE_FULL;
     }
diff --git a/blockjob.c b/blockjob.c
index 62bb906634..d87869c24a 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -54,6 +54,7 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
     bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
 
     job->driver        = driver;
+    job->id            = g_strdup(bdrv_get_device_name(bs));
     job->bs            = bs;
     job->cb            = cb;
     job->opaque        = opaque;
@@ -81,6 +82,7 @@ void block_job_release(BlockDriverState *bs)
     bs->job = NULL;
     bdrv_op_unblock_all(bs, job->blocker);
     error_free(job->blocker);
+    g_free(job->id);
     g_free(job);
 }
 
@@ -113,8 +115,7 @@ void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
 void block_job_complete(BlockJob *job, Error **errp)
 {
     if (job->pause_count || job->cancelled || !job->driver->complete) {
-        error_setg(errp, QERR_BLOCK_JOB_NOT_READY,
-                   bdrv_get_device_name(job->bs));
+        error_setg(errp, QERR_BLOCK_JOB_NOT_READY, job->id);
         return;
     }
 
@@ -269,7 +270,7 @@ BlockJobInfo *block_job_query(BlockJob *job)
 {
     BlockJobInfo *info = g_new0(BlockJobInfo, 1);
     info->type      = g_strdup(BlockJobType_lookup[job->driver->job_type]);
-    info->device    = g_strdup(bdrv_get_device_name(job->bs));
+    info->device    = g_strdup(job->id);
     info->len       = job->len;
     info->busy      = job->busy;
     info->paused    = job->pause_count > 0;
@@ -291,7 +292,7 @@ static void block_job_iostatus_set_err(BlockJob *job, int error)
 void block_job_event_cancelled(BlockJob *job)
 {
     qapi_event_send_block_job_cancelled(job->driver->job_type,
-                                        bdrv_get_device_name(job->bs),
+                                        job->id,
                                         job->len,
                                         job->offset,
                                         job->speed,
@@ -301,7 +302,7 @@ void block_job_event_cancelled(BlockJob *job)
 void block_job_event_completed(BlockJob *job, const char *msg)
 {
     qapi_event_send_block_job_completed(job->driver->job_type,
-                                        bdrv_get_device_name(job->bs),
+                                        job->id,
                                         job->len,
                                         job->offset,
                                         job->speed,
@@ -315,7 +316,7 @@ void block_job_event_ready(BlockJob *job)
     job->ready = true;
 
     qapi_event_send_block_job_ready(job->driver->job_type,
-                                    bdrv_get_device_name(job->bs),
+                                    job->id,
                                     job->len,
                                     job->offset,
                                     job->speed, &error_abort);
@@ -344,7 +345,7 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
     default:
         abort();
     }
-    qapi_event_send_block_job_error(bdrv_get_device_name(job->bs),
+    qapi_event_send_block_job_error(job->id,
                                     is_read ? IO_OPERATION_TYPE_READ :
                                     IO_OPERATION_TYPE_WRITE,
                                     action, &error_abort);
diff --git a/include/block/block.h b/include/block/block.h
index 2dd66300ed..6d70eb42fe 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -203,12 +203,11 @@ BlockDriverState *bdrv_new(void);
 void bdrv_make_anon(BlockDriverState *bs);
 void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old);
 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top);
+void bdrv_replace_in_backing_chain(BlockDriverState *old,
+                                   BlockDriverState *new);
+
 int bdrv_parse_cache_flags(const char *mode, int *flags);
 int bdrv_parse_discard_flags(const char *mode, int *flags);
-int bdrv_open_image(BlockDriverState **pbs, const char *filename,
-                    QDict *options, const char *bdref_key,
-                    BlockDriverState* parent, const BdrvChildRole *child_role,
-                    bool allow_none, Error **errp);
 BdrvChild *bdrv_open_child(const char *filename,
                            QDict *options, const char *bdref_key,
                            BlockDriverState* parent,
@@ -585,7 +584,13 @@ typedef enum {
     BLKDBG_EVENT_MAX,
 } BlkDebugEvent;
 
-#define BLKDBG_EVENT(bs, evt) bdrv_debug_event(bs, evt)
+#define BLKDBG_EVENT(child, evt) \
+    do { \
+        if (child) { \
+            bdrv_debug_event(child->bs, evt); \
+        } \
+    } while (0)
+
 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event);
 
 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 14ad4c334b..c0e65138b1 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -122,7 +122,6 @@ struct BlockDriver {
     int (*bdrv_write)(BlockDriverState *bs, int64_t sector_num,
                       const uint8_t *buf, int nb_sectors);
     void (*bdrv_close)(BlockDriverState *bs);
-    void (*bdrv_rebind)(BlockDriverState *bs);
     int (*bdrv_create)(const char *filename, QemuOpts *opts, Error **errp);
     int (*bdrv_set_key)(BlockDriverState *bs, const char *key);
     int (*bdrv_make_empty)(BlockDriverState *bs);
@@ -339,6 +338,7 @@ struct BdrvChild {
     BlockDriverState *bs;
     const BdrvChildRole *role;
     QLIST_ENTRY(BdrvChild) next;
+    QLIST_ENTRY(BdrvChild) next_parent;
 };
 
 /*
@@ -378,9 +378,8 @@ struct BlockDriverState {
     QDict *full_open_options;
     char exact_filename[PATH_MAX];
 
-    BlockDriverState *backing_hd;
-    BdrvChild *backing_child;
-    BlockDriverState *file;
+    BdrvChild *backing;
+    BdrvChild *file;
 
     NotifierList close_notifiers;
 
@@ -446,6 +445,7 @@ struct BlockDriverState {
      * parent node of this node. */
     BlockDriverState *inherits_from;
     QLIST_HEAD(, BdrvChild) children;
+    QLIST_HEAD(, BdrvChild) parents;
 
     QDict *options;
     BlockdevDetectZeroesOptions detect_zeroes;
@@ -458,6 +458,11 @@ struct BlockDriverState {
     NotifierWithReturn write_threshold_notifier;
 };
 
+static inline BlockDriverState *backing_bs(BlockDriverState *bs)
+{
+    return bs->backing ? bs->backing->bs : NULL;
+}
+
 
 /* Essential block drivers which must always be statically linked into qemu, and
  * which therefore can be accessed without using bdrv_find_format() */
@@ -496,7 +501,7 @@ void bdrv_add_before_write_notifier(BlockDriverState *bs,
  *
  * May be called from .bdrv_detach_aio_context() to detach children from the
  * current #AioContext.  This is only needed by block drivers that manage their
- * own children.  Both ->file and ->backing_hd are automatically handled and
+ * own children.  Both ->file and ->backing are automatically handled and
  * block drivers should not call this function on them explicitly.
  */
 void bdrv_detach_aio_context(BlockDriverState *bs);
@@ -506,7 +511,7 @@ void bdrv_detach_aio_context(BlockDriverState *bs);
  *
  * May be called from .bdrv_attach_aio_context() to attach children to the new
  * #AioContext.  This is only needed by block drivers that manage their own
- * children.  Both ->file and ->backing_hd are automatically handled and block
+ * children.  Both ->file and ->backing are automatically handled and block
  * drivers should not call this function on them explicitly.
  */
 void bdrv_attach_aio_context(BlockDriverState *bs,
@@ -655,6 +660,8 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
                   BlockCompletionFunc *cb, void *opaque,
                   Error **errp);
 
+void blk_set_bs(BlockBackend *blk, BlockDriverState *bs);
+
 void blk_dev_change_media_cb(BlockBackend *blk, bool load);
 bool blk_dev_has_removable_media(BlockBackend *blk);
 void blk_dev_eject_request(BlockBackend *blk, bool force);
@@ -663,5 +670,6 @@ bool blk_dev_is_medium_locked(BlockBackend *blk);
 void blk_dev_resize_cb(BlockBackend *blk);
 
 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
+bool bdrv_requests_pending(BlockDriverState *bs);
 
 #endif /* BLOCK_INT_H */
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index dd9d5e6aad..289b13f0c0 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -65,6 +65,14 @@ struct BlockJob {
     BlockDriverState *bs;
 
     /**
+     * The ID of the block job. Currently the BlockBackend name of the BDS
+     * owning the job at the time when the job is started.
+     *
+     * TODO Decouple block job IDs from BlockBackend names
+     */
+    char *id;
+
+    /**
      * The coroutine that executes the job.  If not NULL, it is
      * reentered when busy is false and the job is cancelled.
      */
diff --git a/include/qemu-common.h b/include/qemu-common.h
index 0bd212b218..2f74540a87 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -246,6 +246,14 @@ int64_t qemu_strtosz_suffix_unit(const char *nptr, char **end,
 #define STR_OR_NULL(str) ((str) ? (str) : "null")
 
 /* id.c */
+
+typedef enum IdSubSystems {
+    ID_QDEV,
+    ID_BLOCK,
+    ID_MAX      /* last element, used as array size */
+} IdSubSystems;
+
+char *id_generate(IdSubSystems id);
 bool id_wellformed(const char *id);
 
 /* path.c */
diff --git a/include/qemu/queue.h b/include/qemu/queue.h
index a8d3cb8e63..f781aa20a8 100644
--- a/include/qemu/queue.h
+++ b/include/qemu/queue.h
@@ -117,12 +117,6 @@ struct {                                                                \
         }                                                               \
 } while (/*CONSTCOND*/0)
 
-#define QLIST_FIX_HEAD_PTR(head, field) do {                            \
-        if ((head)->lh_first != NULL) {                                 \
-            (head)->lh_first->field.le_prev = &(head)->lh_first;        \
-        }                                                               \
-} while (/*CONSTCOND*/0)
-
 #define QLIST_INSERT_AFTER(listelm, elm, field) do {                    \
         if (((elm)->field.le_next = (listelm)->field.le_next) != NULL)  \
                 (listelm)->field.le_next->field.le_prev =               \
diff --git a/qemu-img.c b/qemu-img.c
index 7d65c0a78c..3025776e14 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -748,7 +748,7 @@ static int img_commit(int argc, char **argv)
         /* This is different from QMP, which by default uses the deepest file in
          * the backing chain (i.e., the very base); however, the traditional
          * behavior of qemu-img commit is using the immediate backing file. */
-        base_bs = bs->backing_hd;
+        base_bs = backing_bs(bs);
         if (!base_bs) {
             error_setg(&local_err, "Image does not have a backing file");
             goto done;
@@ -766,12 +766,12 @@ static int img_commit(int argc, char **argv)
         goto done;
     }
 
-    /* The block job will swap base_bs and bs (which is not what we really want
-     * here, but okay) and unref base_bs (after the swap, i.e., the old top
-     * image). In order to still be able to empty that top image afterwards,
-     * increment the reference counter here preemptively. */
+    /* When the block job completes, the BlockBackend reference will point to
+     * the old backing file. In order to avoid that the top image is already
+     * deleted, so we can still empty it afterwards, increment the reference
+     * counter here preemptively. */
     if (!drop) {
-        bdrv_ref(base_bs);
+        bdrv_ref(bs);
     }
 
     run_block_job(bs->job, &local_err);
@@ -779,8 +779,8 @@ static int img_commit(int argc, char **argv)
         goto unref_backing;
     }
 
-    if (!drop && base_bs->drv->bdrv_make_empty) {
-        ret = base_bs->drv->bdrv_make_empty(base_bs);
+    if (!drop && bs->drv->bdrv_make_empty) {
+        ret = bs->drv->bdrv_make_empty(bs);
         if (ret) {
             error_setg_errno(&local_err, -ret, "Could not empty %s",
                              filename);
@@ -790,7 +790,7 @@ static int img_commit(int argc, char **argv)
 
 unref_backing:
     if (!drop) {
-        bdrv_unref(base_bs);
+        bdrv_unref(bs);
     }
 
 done:
@@ -2207,7 +2207,7 @@ static int get_block_status(BlockDriverState *bs, int64_t sector_num,
         if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) {
             break;
         }
-        bs = bs->backing_hd;
+        bs = backing_bs(bs);
         if (bs == NULL) {
             ret = 0;
             break;
diff --git a/qemu-nbd.c b/qemu-nbd.c
index 6428c15403..422a607bdd 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -98,9 +98,7 @@ static void usage(const char *name)
 "                            '[ID_OR_NAME]'\n"
 "  -n, --nocache             disable host cache\n"
 "      --cache=MODE          set cache mode (none, writeback, ...)\n"
-#ifdef CONFIG_LINUX_AIO
 "      --aio=MODE            set AIO mode (native or threads)\n"
-#endif
 "      --discard=MODE        set discard mode (ignore, unmap)\n"
 "      --detect-zeroes=MODE  set detect-zeroes mode (off, on, unmap)\n"
 "\n"
@@ -412,9 +410,7 @@ int main(int argc, char **argv)
         { "load-snapshot", 1, NULL, 'l' },
         { "nocache", 0, NULL, 'n' },
         { "cache", 1, NULL, QEMU_NBD_OPT_CACHE },
-#ifdef CONFIG_LINUX_AIO
         { "aio", 1, NULL, QEMU_NBD_OPT_AIO },
-#endif
         { "discard", 1, NULL, QEMU_NBD_OPT_DISCARD },
         { "detect-zeroes", 1, NULL, QEMU_NBD_OPT_DETECT_ZEROES },
         { "shared", 1, NULL, 'e' },
@@ -432,9 +428,7 @@ int main(int argc, char **argv)
     int fd;
     bool seen_cache = false;
     bool seen_discard = false;
-#ifdef CONFIG_LINUX_AIO
     bool seen_aio = false;
-#endif
     pthread_t client_thread;
     const char *fmt = NULL;
     Error *local_err = NULL;
@@ -467,7 +461,6 @@ int main(int argc, char **argv)
                 errx(EXIT_FAILURE, "Invalid cache mode `%s'", optarg);
             }
             break;
-#ifdef CONFIG_LINUX_AIO
         case QEMU_NBD_OPT_AIO:
             if (seen_aio) {
                 errx(EXIT_FAILURE, "--aio can only be specified once");
@@ -481,7 +474,6 @@ int main(int argc, char **argv)
                errx(EXIT_FAILURE, "invalid aio mode `%s'", optarg);
             }
             break;
-#endif
         case QEMU_NBD_OPT_DISCARD:
             if (seen_discard) {
                 errx(EXIT_FAILURE, "--discard can only be specified once");
diff --git a/qmp-commands.hx b/qmp-commands.hx
index d2ba800d5e..2b52980cfc 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1270,11 +1270,22 @@ SQMP
 transaction
 -----------
 
-Atomically operate on one or more block devices.  The only supported operations
-for now are drive-backup, internal and external snapshotting.  A list of
-dictionaries is accepted, that contains the actions to be performed.
-If there is any failure performing any of the operations, all operations
-for the group are abandoned.
+Atomically operate on one or more block devices.  Operations that are
+currently supported:
+
+    - drive-backup
+    - blockdev-backup
+    - blockdev-snapshot-sync
+    - blockdev-snapshot-internal-sync
+    - abort
+    - block-dirty-bitmap-add
+    - block-dirty-bitmap-clear
+
+Refer to the qemu/qapi-schema.json file for minimum required QEMU
+versions for these operations.  A list of dictionaries is accepted,
+that contains the actions to be performed.  If there is any failure
+performing any of the operations, all operations for the group are
+abandoned.
 
 For external snapshots, the dictionary contains the device, the file to use for
 the new snapshot, and the format.  The default format, if not specified, is
@@ -1301,8 +1312,12 @@ it later with qemu-img or other command.
 Arguments:
 
 actions array:
-    - "type": the operation to perform.  The only supported
-      value is "blockdev-snapshot-sync". (json-string)
+    - "type": the operation to perform (json-string).  Possible
+              values: "drive-backup", "blockdev-backup",
+                      "blockdev-snapshot-sync",
+                      "blockdev-snapshot-internal-sync",
+                      "abort", "block-dirty-bitmap-add",
+                      "block-dirty-bitmap-clear"
     - "data": a dictionary.  The contents depend on the value
       of "type".  When "type" is "blockdev-snapshot-sync":
       - "device": device name to snapshot (json-string)
diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index 59c1a762a2..05b5962cee 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -780,7 +780,7 @@ class TestRepairQuorum(iotests.QMPTestCase):
         # here we check that the last registered quorum file has not been
         # swapped out and unref
         result = self.vm.qmp('query-named-block-nodes')
-        self.assert_qmp(result, 'return[0]/file', quorum_img3)
+        self.assert_qmp(result, 'return[1]/file', quorum_img3)
         self.vm.shutdown()
 
     def test_cancel_after_ready(self):
@@ -799,7 +799,7 @@ class TestRepairQuorum(iotests.QMPTestCase):
         result = self.vm.qmp('query-named-block-nodes')
         # here we check that the last registered quorum file has not been
         # swapped out and unref
-        self.assert_qmp(result, 'return[0]/file', quorum_img3)
+        self.assert_qmp(result, 'return[1]/file', quorum_img3)
         self.vm.shutdown()
         self.assertTrue(iotests.compare_images(quorum_img2, quorum_repair_img),
                         'target image does not match source after mirroring')
diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051
index 4a8055b673..17dbf04af4 100755
--- a/tests/qemu-iotests/051
+++ b/tests/qemu-iotests/051
@@ -108,7 +108,8 @@ echo
 echo === Overriding backing file ===
 echo
 
-echo "info block" | run_qemu -drive file="$TEST_IMG",driver=qcow2,backing.file.filename="$TEST_IMG.orig" -nodefaults
+echo "info block" | run_qemu -drive file="$TEST_IMG",driver=qcow2,backing.file.filename="$TEST_IMG.orig" -nodefaults\
+                  | _filter_generated_node_ids
 
 # Drivers that don't support backing files
 run_qemu -drive file="$TEST_IMG",driver=raw,backing.file.filename="$TEST_IMG.orig"
diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out
index 0429be296c..7765aa0bb2 100644
--- a/tests/qemu-iotests/051.out
+++ b/tests/qemu-iotests/051.out
@@ -59,7 +59,7 @@ QEMU X.Y.Z monitor - type 'help' for more information
 Testing: -drive file=TEST_DIR/t.qcow2,driver=qcow2,backing.file.filename=TEST_DIR/t.qcow2.orig -nodefaults
 QEMU X.Y.Z monitor - type 'help' for more information
 (qemu) iininfinfoinfo info binfo blinfo bloinfo blocinfo block
-ide0-hd0: TEST_DIR/t.qcow2 (qcow2)
+ide0-hd0 (NODE_NAME): TEST_DIR/t.qcow2 (qcow2)
     Cache mode:       writeback
     Backing file:     TEST_DIR/t.qcow2.orig (chain depth: 1)
 (qemu) qququiquit
diff --git a/tests/qemu-iotests/059.out b/tests/qemu-iotests/059.out
index 67e3cf57e4..00057fef9b 100644
--- a/tests/qemu-iotests/059.out
+++ b/tests/qemu-iotests/059.out
@@ -16,17 +16,17 @@ qemu-io: can't open device TEST_DIR/t.vmdk: L1 size too big
 no file open, try 'help open'
 
 === Testing monolithicFlat creation and opening ===
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2147483648
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2147483648 subformat=monolithicFlat
 image: TEST_DIR/t.IMGFMT
 file format: IMGFMT
 virtual size: 2.0G (2147483648 bytes)
 
 === Testing monolithicFlat with zeroed_grain ===
 qemu-img: TEST_DIR/t.IMGFMT: Flat image can't enable zeroed grain
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2147483648
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2147483648 subformat=monolithicFlat
 
 === Testing big twoGbMaxExtentFlat ===
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824000
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824000 subformat=twoGbMaxExtentFlat
 image: TEST_DIR/t.vmdk
 file format: vmdk
 virtual size: 1.0T (1073741824000 bytes)
@@ -2043,7 +2043,7 @@ RW 12582912 VMFS "dummy.IMGFMT" 1
 
 
 === Testing truncated sparse ===
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=107374182400
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=107374182400 subformat=monolithicSparse
 qemu-img: Could not open 'TEST_DIR/t.IMGFMT': File truncated, expecting at least 13172736 bytes
 
 === Converting to streamOptimized from image with small cluster size===
@@ -2054,7 +2054,7 @@ wrote 512/512 bytes at offset 10240
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 
 === Testing monolithicFlat with internally generated JSON file name ===
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 subformat=monolithicFlat
 qemu-io: can't open: Cannot use relative extent paths with VMDK descriptor file 'json:{"image": {"driver": "file", "filename": "TEST_DIR/t.IMGFMT"}, "driver": "blkdebug", "inject-error.0.event": "read_aio"}'
 
 === Testing version 3 ===
@@ -2264,7 +2264,7 @@ read 512/512 bytes at offset 64931328
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 
 === Testing 4TB monolithicFlat creation and IO ===
-Formatting 'TEST_DIR/iotest-version3.IMGFMT', fmt=IMGFMT size=4398046511104
+Formatting 'TEST_DIR/iotest-version3.IMGFMT', fmt=IMGFMT size=4398046511104 subformat=monolithicFlat
 image: TEST_DIR/iotest-version3.IMGFMT
 file format: IMGFMT
 virtual size: 4.0T (4398046511104 bytes)
diff --git a/tests/qemu-iotests/061 b/tests/qemu-iotests/061
index 1df887a01d..e191e65d5f 100755
--- a/tests/qemu-iotests/061
+++ b/tests/qemu-iotests/061
@@ -58,8 +58,8 @@ echo
 echo "=== Testing dirty version downgrade ==="
 echo
 IMGOPTS="compat=1.1,lazy_refcounts=on" _make_test_img 64M
-$QEMU_IO -c "write -P 0x2a 0 128k" -c flush -c abort "$TEST_IMG" 2>&1 \
-    | _filter_qemu_io
+$QEMU_IO -c "write -P 0x2a 0 128k" -c flush \
+         -c "sigraise $(kill -l KILL)" "$TEST_IMG" 2>&1 | _filter_qemu_io
 $PYTHON qcow2.py "$TEST_IMG" dump-header
 $QEMU_IMG amend -o "compat=0.10" "$TEST_IMG"
 $PYTHON qcow2.py "$TEST_IMG" dump-header
@@ -92,8 +92,8 @@ echo
 echo "=== Testing dirty lazy_refcounts=off ==="
 echo
 IMGOPTS="compat=1.1,lazy_refcounts=on" _make_test_img 64M
-$QEMU_IO -c "write -P 0x2a 0 128k" -c flush -c abort "$TEST_IMG" 2>&1 \
-    | _filter_qemu_io
+$QEMU_IO -c "write -P 0x2a 0 128k" -c flush \
+         -c "sigraise $(kill -l KILL)" "$TEST_IMG" 2>&1 | _filter_qemu_io
 $PYTHON qcow2.py "$TEST_IMG" dump-header
 $QEMU_IMG amend -o "lazy_refcounts=off" "$TEST_IMG"
 $PYTHON qcow2.py "$TEST_IMG" dump-header
diff --git a/tests/qemu-iotests/061.out b/tests/qemu-iotests/061.out
index a683f46a12..b16bea95d2 100644
--- a/tests/qemu-iotests/061.out
+++ b/tests/qemu-iotests/061.out
@@ -57,7 +57,7 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
 wrote 131072/131072 bytes at offset 0
 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-./common.config: Aborted                 (core dumped) ( exec "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@" )
+./common.config: Killed                  ( exec "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@" )
 magic                     0x514649fb
 version                   3
 backing_file_offset       0x0
@@ -215,7 +215,7 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
 wrote 131072/131072 bytes at offset 0
 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-./common.config: Aborted                 (core dumped) ( exec "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@" )
+./common.config: Killed                  ( exec "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@" )
 magic                     0x514649fb
 version                   3
 backing_file_offset       0x0
diff --git a/tests/qemu-iotests/067 b/tests/qemu-iotests/067
index 3e9a053ef3..3788534d67 100755
--- a/tests/qemu-iotests/067
+++ b/tests/qemu-iotests/067
@@ -48,7 +48,8 @@ function do_run_qemu()
 function run_qemu()
 {
     do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | _filter_qemu \
-                          | sed -e 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g'
+                          | sed -e 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g' \
+                          | _filter_generated_node_ids
 }
 
 size=128M
diff --git a/tests/qemu-iotests/067.out b/tests/qemu-iotests/067.out
index 5fbc881b72..27ad56fe2f 100644
--- a/tests/qemu-iotests/067.out
+++ b/tests/qemu-iotests/067.out
@@ -40,6 +40,7 @@ Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,if=none,id=disk -device virti
                 },
                 "iops_wr": 0,
                 "ro": false,
+                "node-name": "NODE_NAME",
                 "backing_file_depth": 0,
                 "drv": "qcow2",
                 "iops": 0,
@@ -151,6 +152,7 @@ Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,if=none,id=disk
                 },
                 "iops_wr": 0,
                 "ro": false,
+                "node-name": "NODE_NAME",
                 "backing_file_depth": 0,
                 "drv": "qcow2",
                 "iops": 0,
@@ -270,6 +272,7 @@ Testing:
                 },
                 "iops_wr": 0,
                 "ro": false,
+                "node-name": "NODE_NAME",
                 "backing_file_depth": 0,
                 "drv": "qcow2",
                 "iops": 0,
@@ -390,6 +393,7 @@ Testing:
                 },
                 "iops_wr": 0,
                 "ro": false,
+                "node-name": "NODE_NAME",
                 "backing_file_depth": 0,
                 "drv": "qcow2",
                 "iops": 0,
@@ -480,6 +484,7 @@ Testing:
                 },
                 "iops_wr": 0,
                 "ro": false,
+                "node-name": "NODE_NAME",
                 "backing_file_depth": 0,
                 "drv": "qcow2",
                 "iops": 0,
diff --git a/tests/qemu-iotests/081 b/tests/qemu-iotests/081
index d9b042cfc7..51873ff7db 100755
--- a/tests/qemu-iotests/081
+++ b/tests/qemu-iotests/081
@@ -53,7 +53,8 @@ function do_run_qemu()
 
 function run_qemu()
 {
-    do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qemu | _filter_qmp | _filter_qemu_io
+    do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qemu | _filter_qmp\
+                          | _filter_qemu_io | _filter_generated_node_ids
 }
 
 test_quorum=$($QEMU_IMG --help|grep quorum)
diff --git a/tests/qemu-iotests/081.out b/tests/qemu-iotests/081.out
index 692c4a4343..044793dcc5 100644
--- a/tests/qemu-iotests/081.out
+++ b/tests/qemu-iotests/081.out
@@ -30,7 +30,7 @@ Testing: -drive file=TEST_DIR/2.IMGFMT,format=IMGFMT,if=none,id=drive2
 QMP_VERSION
 {"return": {}}
 {"return": {}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "QUORUM_REPORT_BAD", "data": {"node-name": "", "sectors-count": 20480, "sector-num": 0}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "QUORUM_REPORT_BAD", "data": {"node-name": "NODE_NAME", "sectors-count": 20480, "sector-num": 0}}
 read 10485760/10485760 bytes at offset 0
 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 {"return": ""}
diff --git a/tests/qemu-iotests/096 b/tests/qemu-iotests/096
new file mode 100644
index 0000000000..e34204b8ff
--- /dev/null
+++ b/tests/qemu-iotests/096
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+#
+# Test that snapshots move the throttling configuration to the active
+# layer
+#
+# Copyright (C) 2015 Igalia, S.L.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import iotests
+import os
+
+class TestLiveSnapshot(iotests.QMPTestCase):
+    base_img = os.path.join(iotests.test_dir, 'base.img')
+    target_img = os.path.join(iotests.test_dir, 'target.img')
+    group = 'mygroup'
+    iops = 6000
+    iops_size = 1024
+
+    def setUp(self):
+        opts = []
+        opts.append('node-name=base')
+        opts.append('throttling.group=%s' % self.group)
+        opts.append('throttling.iops-total=%d' % self.iops)
+        opts.append('throttling.iops-size=%d' % self.iops_size)
+        iotests.qemu_img('create', '-f', iotests.imgfmt, self.base_img, '100M')
+        self.vm = iotests.VM().add_drive(self.base_img, ','.join(opts))
+        self.vm.launch()
+
+    def tearDown(self):
+        self.vm.shutdown()
+        os.remove(self.base_img)
+        os.remove(self.target_img)
+
+    def checkConfig(self, active_layer):
+        result = self.vm.qmp('query-named-block-nodes')
+        for r in result['return']:
+            if r['node-name'] == active_layer:
+                self.assertEqual(r['group'], self.group)
+                self.assertEqual(r['iops'], self.iops)
+                self.assertEqual(r['iops_size'], self.iops_size)
+            else:
+                self.assertFalse(r.has_key('group'))
+                self.assertEqual(r['iops'], 0)
+                self.assertFalse(r.has_key('iops_size'))
+
+    def testSnapshot(self):
+        self.checkConfig('base')
+        self.vm.qmp('blockdev-snapshot-sync',
+                    node_name = 'base',
+                    snapshot_node_name = 'target',
+                    snapshot_file = self.target_img,
+                    format = iotests.imgfmt)
+        self.checkConfig('target')
+
+if __name__ == '__main__':
+    iotests.main(supported_fmts=['qcow2'])
diff --git a/tests/qemu-iotests/096.out b/tests/qemu-iotests/096.out
new file mode 100644
index 0000000000..ae1213e6f8
--- /dev/null
+++ b/tests/qemu-iotests/096.out
@@ -0,0 +1,5 @@
+.
+----------------------------------------------------------------------
+Ran 1 tests
+
+OK
diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
index d6d05de2da..cfdb6338aa 100644
--- a/tests/qemu-iotests/common.filter
+++ b/tests/qemu-iotests/common.filter
@@ -128,6 +128,11 @@ _filter_date()
         -e 's/[A-Z][a-z][a-z] [A-z][a-z][a-z]  *[0-9][0-9]* [0-9][0-9]:[0-9][0-9]:[0-9][0-9] [0-9][0-9][0-9][0-9]$/DATE/'
 }
 
+_filter_generated_node_ids()
+{
+     sed -re 's/\#block[0-9]{3,}/NODE_NAME/'
+}
+
 # replace occurrences of the actual TEST_DIR value with TEST_DIR
 _filter_testdir()
 {
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 439b1d237d..30c784e940 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -102,6 +102,7 @@
 093 auto
 094 rw auto quick
 095 rw auto quick
+096 rw auto quick
 097 rw auto backing
 098 rw auto backing quick
 099 rw auto quick
diff --git a/util/id.c b/util/id.c
index 09b22fb8fa..bcc64d836f 100644
--- a/util/id.c
+++ b/util/id.c
@@ -26,3 +26,40 @@ bool id_wellformed(const char *id)
     }
     return true;
 }
+
+#define ID_SPECIAL_CHAR '#'
+
+static const char *const id_subsys_str[] = {
+    [ID_QDEV]  = "qdev",
+    [ID_BLOCK] = "block",
+};
+
+/*
+ *  Generates an ID of the form PREFIX SUBSYSTEM NUMBER
+ *  where:
+ *
+ *  - PREFIX is the reserved character '#'
+ *  - SUBSYSTEM identifies the subsystem creating the ID
+ *  - NUMBER is a decimal number unique within SUBSYSTEM.
+ *
+ *    Example: "#block146"
+ *
+ * Note that these IDs do not satisfy id_wellformed().
+ *
+ * The caller is responsible for freeing the returned string with g_free()
+ */
+char *id_generate(IdSubSystems id)
+{
+    static uint64_t id_counters[ID_MAX];
+    uint32_t rnd;
+
+    assert(id < ID_MAX);
+    assert(id_subsys_str[id]);
+
+    rnd = g_random_int_range(0, 100);
+
+    return g_strdup_printf("%c%s%" PRIu64 "%02" PRId32, ID_SPECIAL_CHAR,
+                                                        id_subsys_str[id],
+                                                        id_counters[id]++,
+                                                        rnd);
+}