summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--Makefile.objs1
-rw-r--r--block.c1024
-rw-r--r--block/backup.c7
-rw-r--r--block/blkdebug.c81
-rw-r--r--block/blkverify.c31
-rw-r--r--block/cow.c3
-rw-r--r--block/curl.c81
-rw-r--r--block/gluster.c318
-rw-r--r--block/iscsi.c67
-rw-r--r--block/mirror.c39
-rw-r--r--block/qapi.c114
-rw-r--r--block/qcow.c3
-rw-r--r--block/qcow2.c13
-rw-r--r--block/qcow2.h6
-rw-r--r--block/qed.c15
-rw-r--r--block/raw-posix.c102
-rw-r--r--block/raw-win32.c41
-rw-r--r--block/rbd.c130
-rw-r--r--block/sheepdog.c25
-rw-r--r--block/stream.c2
-rw-r--r--block/vhdx.c2
-rw-r--r--block/vmdk.c45
-rw-r--r--blockdev.c112
-rwxr-xr-xconfigure8
-rw-r--r--hmp-commands.hx5
-rw-r--r--hmp.c14
-rw-r--r--hw/audio/hda-codec.c18
-rw-r--r--hw/block/virtio-blk.c2
-rw-r--r--hw/i386/pc_piix.c4
-rw-r--r--hw/ide/core.c2
-rw-r--r--hw/scsi/scsi-bus.c2
-rw-r--r--hw/scsi/scsi-disk.c3
-rw-r--r--hw/scsi/scsi-generic.c2
-rw-r--r--hw/scsi/virtio-scsi.c6
-rw-r--r--hw/usb/Makefile.objs2
-rw-r--r--hw/usb/bus.c2
-rw-r--r--hw/usb/desc-msos.c234
-rw-r--r--hw/usb/desc.c37
-rw-r--r--hw/usb/desc.h11
-rw-r--r--hw/usb/dev-hid.c8
-rw-r--r--hw/virtio/dataplane/vring.c2
-rw-r--r--include/block/block.h44
-rw-r--r--include/block/block_int.h48
-rw-r--r--include/block/qapi.h1
-rw-r--r--include/hw/i386/pc.h9
-rw-r--r--include/hw/usb.h3
-rw-r--r--include/monitor/monitor.h2
-rw-r--r--include/qapi/qmp/qdict.h1
-rw-r--r--include/qemu-io.h3
-rw-r--r--include/qemu/config-file.h6
-rw-r--r--include/qemu/osdep.h2
-rw-r--r--include/qemu/readline.h (renamed from include/monitor/readline.h)20
-rw-r--r--monitor.c41
-rw-r--r--qapi-schema.json183
-rw-r--r--qemu-doc.texi8
-rw-r--r--qemu-img.texi19
-rw-r--r--qemu-io-cmds.c57
-rw-r--r--qemu-io.c119
-rw-r--r--qmp-commands.hx117
-rw-r--r--qobject/qdict.c91
-rw-r--r--scripts/qapi.py2
-rw-r--r--tests/Makefile2
-rw-r--r--tests/check-qdict.c156
-rw-r--r--tests/fdc-test.c5
-rw-r--r--tests/ide-test.c3
-rwxr-xr-xtests/qemu-iotests/0171
-rwxr-xr-xtests/qemu-iotests/0181
-rwxr-xr-xtests/qemu-iotests/0193
-rwxr-xr-xtests/qemu-iotests/0203
-rwxr-xr-xtests/qemu-iotests/0343
-rwxr-xr-xtests/qemu-iotests/0373
-rw-r--r--tests/qemu-iotests/051.out2
-rwxr-xr-xtests/qemu-iotests/05916
-rw-r--r--tests/qemu-iotests/059.out79
-rwxr-xr-xtests/qemu-iotests/0633
-rwxr-xr-xtests/qemu-iotests/0691
-rwxr-xr-xtests/qemu-iotests/071239
-rw-r--r--tests/qemu-iotests/071.out90
-rwxr-xr-xtests/qemu-iotests/07269
-rw-r--r--tests/qemu-iotests/072.out21
-rwxr-xr-xtests/qemu-iotests/077278
-rw-r--r--tests/qemu-iotests/077.out202
-rw-r--r--tests/qemu-iotests/common.rc28
-rw-r--r--tests/qemu-iotests/group3
-rw-r--r--trace-events1
-rw-r--r--ui/gtk.c18
-rw-r--r--util/Makefile.objs1
-rw-r--r--util/error.c8
-rw-r--r--util/oslib-posix.c23
-rw-r--r--util/oslib-win32.c19
-rw-r--r--util/qemu-config.c100
-rw-r--r--util/qemu-progress.c11
-rw-r--r--util/readline.c (renamed from readline.c)46
93 files changed, 3938 insertions, 900 deletions
diff --git a/Makefile.objs b/Makefile.objs
index 857bb53ae4..ac1d0e1c28 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -43,7 +43,6 @@ libcacard-y += libcacard/vcardt.o
 ifeq ($(CONFIG_SOFTMMU),y)
 common-obj-y = $(block-obj-y) blockdev.o blockdev-nbd.o block/
 common-obj-y += net/
-common-obj-y += readline.o
 common-obj-y += qdev-monitor.o device-hotplug.o
 common-obj-$(CONFIG_WIN32) += os-win32.o
 common-obj-$(CONFIG_POSIX) += os-posix.o
diff --git a/block.c b/block.c
index 64e7d220c6..cb21a5fa61 100644
--- a/block.c
+++ b/block.c
@@ -32,6 +32,7 @@
 #include "sysemu/sysemu.h"
 #include "qemu/notify.h"
 #include "block/coroutine.h"
+#include "block/qapi.h"
 #include "qmp-commands.h"
 #include "qemu/timer.h"
 
@@ -69,11 +70,11 @@ static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
                                          int64_t sector_num, int nb_sectors,
                                          QEMUIOVector *iov);
-static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
+    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
     BdrvRequestFlags flags);
-static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
+    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
     BdrvRequestFlags flags);
 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
                                                int64_t sector_num,
@@ -90,6 +91,9 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
     QTAILQ_HEAD_INITIALIZER(bdrv_states);
 
+static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
+    QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
+
 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
     QLIST_HEAD_INITIALIZER(bdrv_drivers);
 
@@ -188,7 +192,7 @@ void bdrv_io_limits_enable(BlockDriverState *bs)
  * @is_write:   is the IO a write
  */
 static void bdrv_io_limits_intercept(BlockDriverState *bs,
-                                     int nb_sectors,
+                                     unsigned int bytes,
                                      bool is_write)
 {
     /* does this io must wait */
@@ -201,9 +205,8 @@ static void bdrv_io_limits_intercept(BlockDriverState *bs,
     }
 
     /* the IO will be executed, do the accounting */
-    throttle_account(&bs->throttle_state,
-                     is_write,
-                     nb_sectors * BDRV_SECTOR_SIZE);
+    throttle_account(&bs->throttle_state, is_write, bytes);
+
 
     /* if the next request must wait -> do nothing */
     if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
@@ -214,6 +217,16 @@ static void bdrv_io_limits_intercept(BlockDriverState *bs,
     qemu_co_queue_next(&bs->throttled_reqs[is_write]);
 }
 
+size_t bdrv_opt_mem_align(BlockDriverState *bs)
+{
+    if (!bs || !bs->drv) {
+        /* 4k should be on the safe side */
+        return 4096;
+    }
+
+    return bs->bl.opt_mem_alignment;
+}
+
 /* check if the path starts with "<protocol>:" */
 static int path_has_protocol(const char *path)
 {
@@ -327,7 +340,7 @@ BlockDriverState *bdrv_new(const char *device_name)
     QLIST_INIT(&bs->dirty_bitmaps);
     pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
     if (device_name[0] != '\0') {
-        QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
+        QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
     }
     bdrv_iostatus_disable(bs);
     notifier_list_init(&bs->close_notifiers);
@@ -479,6 +492,43 @@ int bdrv_create_file(const char* filename, QEMUOptionParameter *options,
     return ret;
 }
 
+int bdrv_refresh_limits(BlockDriverState *bs)
+{
+    BlockDriver *drv = bs->drv;
+
+    memset(&bs->bl, 0, sizeof(bs->bl));
+
+    if (!drv) {
+        return 0;
+    }
+
+    /* Take some limits from the children as a default */
+    if (bs->file) {
+        bdrv_refresh_limits(bs->file);
+        bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
+        bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
+    } else {
+        bs->bl.opt_mem_alignment = 512;
+    }
+
+    if (bs->backing_hd) {
+        bdrv_refresh_limits(bs->backing_hd);
+        bs->bl.opt_transfer_length =
+            MAX(bs->bl.opt_transfer_length,
+                bs->backing_hd->bl.opt_transfer_length);
+        bs->bl.opt_mem_alignment =
+            MAX(bs->bl.opt_mem_alignment,
+                bs->backing_hd->bl.opt_mem_alignment);
+    }
+
+    /* Then let the driver override it */
+    if (drv->bdrv_refresh_limits) {
+        return drv->bdrv_refresh_limits(bs);
+    }
+
+    return 0;
+}
+
 /*
  * Create a uniquely-named empty temporary file.
  * Return 0 upon success, otherwise a negative errno value.
@@ -732,6 +782,33 @@ static int bdrv_open_flags(BlockDriverState *bs, int flags)
     return open_flags;
 }
 
+static int bdrv_assign_node_name(BlockDriverState *bs,
+                                 const char *node_name,
+                                 Error **errp)
+{
+    if (!node_name) {
+        return 0;
+    }
+
+    /* empty string node name is invalid */
+    if (node_name[0] == '\0') {
+        error_setg(errp, "Empty node name");
+        return -EINVAL;
+    }
+
+    /* takes care of avoiding duplicates node names */
+    if (bdrv_find_node(node_name)) {
+        error_setg(errp, "Duplicate node name");
+        return -EINVAL;
+    }
+
+    /* copy node name into the bs and insert it into the graph list */
+    pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
+    QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
+
+    return 0;
+}
+
 /*
  * Common part for opening disk images and files
  *
@@ -742,6 +819,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
 {
     int ret, open_flags;
     const char *filename;
+    const char *node_name = NULL;
     Error *local_err = NULL;
 
     assert(drv != NULL);
@@ -756,6 +834,13 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
 
     trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
 
+    node_name = qdict_get_try_str(options, "node-name");
+    ret = bdrv_assign_node_name(bs, node_name, errp);
+    if (ret < 0) {
+        return ret;
+    }
+    qdict_del(options, "node-name");
+
     /* bdrv_open() with directly using a protocol as drv. This layer is already
      * opened, so assign it to bs (while file becomes a closed BlockDriverState)
      * and return immediately. */
@@ -765,7 +850,8 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
     }
 
     bs->open_flags = flags;
-    bs->buffer_alignment = 512;
+    bs->guest_block_size = 512;
+    bs->request_alignment = 512;
     bs->zero_beyond_eof = true;
     open_flags = bdrv_open_flags(bs, flags);
     bs->read_only = !(open_flags & BDRV_O_RDWR);
@@ -833,6 +919,10 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
         goto free_and_fail;
     }
 
+    bdrv_refresh_limits(bs);
+    assert(bdrv_opt_mem_align(bs) != 0);
+    assert(bs->request_alignment != 0);
+
 #ifndef _WIN32
     if (bs->is_temporary) {
         assert(bs->filename[0] != '\0');
@@ -858,9 +948,10 @@ free_and_fail:
  * dictionary, it needs to use QINCREF() before calling bdrv_file_open.
  */
 int bdrv_file_open(BlockDriverState **pbs, const char *filename,
-                   QDict *options, int flags, Error **errp)
+                   const char *reference, QDict *options, int flags,
+                   Error **errp)
 {
-    BlockDriverState *bs;
+    BlockDriverState *bs = NULL;
     BlockDriver *drv;
     const char *drvname;
     bool allow_protocol_prefix = false;
@@ -872,6 +963,24 @@ int bdrv_file_open(BlockDriverState **pbs, const char *filename,
         options = qdict_new();
     }
 
+    if (reference) {
+        if (filename || qdict_size(options)) {
+            error_setg(errp, "Cannot reference an existing block device with "
+                       "additional options or a new filename");
+            return -EINVAL;
+        }
+        QDECREF(options);
+
+        bs = bdrv_find(reference);
+        if (!bs) {
+            error_setg(errp, "Cannot find block device '%s'", reference);
+            return -ENODEV;
+        }
+        bdrv_ref(bs);
+        *pbs = bs;
+        return 0;
+    }
+
     bs = bdrv_new("");
     bs->options = options;
     options = qdict_clone_shallow(options);
@@ -929,14 +1038,19 @@ int bdrv_file_open(BlockDriverState **pbs, const char *filename,
         goto fail;
     }
 
-    ret = bdrv_open_common(bs, NULL, options, flags, drv, &local_err);
+    if (!drv->bdrv_file_open) {
+        ret = bdrv_open(bs, filename, options, flags, drv, &local_err);
+        options = NULL;
+    } else {
+        ret = bdrv_open_common(bs, NULL, options, flags, drv, &local_err);
+    }
     if (ret < 0) {
         error_propagate(errp, local_err);
         goto fail;
     }
 
     /* Check if any unknown options were used */
-    if (qdict_size(options) != 0) {
+    if (options && (qdict_size(options) != 0)) {
         const QDictEntry *entry = qdict_first(options);
         error_setg(errp, "Block protocol '%s' doesn't support the option '%s'",
                    drv->format_name, entry->key);
@@ -1016,12 +1130,92 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
         error_free(local_err);
         return ret;
     }
-    pstrcpy(bs->backing_file, sizeof(bs->backing_file),
-            bs->backing_hd->file->filename);
+
+    if (bs->backing_hd->file) {
+        pstrcpy(bs->backing_file, sizeof(bs->backing_file),
+                bs->backing_hd->file->filename);
+    }
+
+    /* Recalculate the BlockLimits with the backing file */
+    bdrv_refresh_limits(bs);
+
     return 0;
 }
 
 /*
+ * Opens a disk image whose options are given as BlockdevRef in another block
+ * device's options.
+ *
+ * If force_raw is true, bdrv_file_open() will be used, thereby preventing any
+ * image format auto-detection. If it is false and a filename is given,
+ * bdrv_open() will be used for auto-detection.
+ *
+ * If allow_none is true, no image will be opened if filename is false and no
+ * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
+ *
+ * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
+ * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
+ * itself, all options starting with "${bdref_key}." are considered part of the
+ * BlockdevRef.
+ *
+ * The BlockdevRef will be removed from the options QDict.
+ */
+int bdrv_open_image(BlockDriverState **pbs, const char *filename,
+                    QDict *options, const char *bdref_key, int flags,
+                    bool force_raw, bool allow_none, Error **errp)
+{
+    QDict *image_options;
+    int ret;
+    char *bdref_key_dot;
+    const char *reference;
+
+    bdref_key_dot = g_strdup_printf("%s.", bdref_key);
+    qdict_extract_subqdict(options, &image_options, bdref_key_dot);
+    g_free(bdref_key_dot);
+
+    reference = qdict_get_try_str(options, bdref_key);
+    if (!filename && !reference && !qdict_size(image_options)) {
+        if (allow_none) {
+            ret = 0;
+        } else {
+            error_setg(errp, "A block device must be specified for \"%s\"",
+                       bdref_key);
+            ret = -EINVAL;
+        }
+        goto done;
+    }
+
+    if (filename && !force_raw) {
+        /* If a filename is given and the block driver should be detected
+           automatically (instead of using none), use bdrv_open() in order to do
+           that auto-detection. */
+        BlockDriverState *bs;
+
+        if (reference) {
+            error_setg(errp, "Cannot reference an existing block device while "
+                       "giving a filename");
+            ret = -EINVAL;
+            goto done;
+        }
+
+        bs = bdrv_new("");
+        ret = bdrv_open(bs, filename, image_options, flags, NULL, errp);
+        if (ret < 0) {
+            bdrv_unref(bs);
+        } else {
+            *pbs = bs;
+        }
+    } else {
+        ret = bdrv_file_open(pbs, filename, reference, image_options, flags,
+                             errp);
+    }
+
+done:
+    qdict_del(options, bdref_key);
+    return ret;
+}
+
+/*
  * Opens a disk image (raw, qcow2, vmdk, ...)
  *
  * options is a QDict of options to pass to the block drivers, or NULL for an
@@ -1036,7 +1230,6 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
     char tmp_filename[PATH_MAX + 1];
     BlockDriverState *file = NULL;
-    QDict *file_options = NULL;
     const char *drvname;
     Error *local_err = NULL;
 
@@ -1122,10 +1315,9 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
         flags |= BDRV_O_ALLOW_RDWR;
     }
 
-    qdict_extract_subqdict(options, &file_options, "file.");
-
-    ret = bdrv_file_open(&file, filename, file_options,
-                         bdrv_open_flags(bs, flags | BDRV_O_UNMAP), &local_err);
+    ret = bdrv_open_image(&file, filename, options, "file",
+                          bdrv_open_flags(bs, flags | BDRV_O_UNMAP), true, true,
+                          &local_err);
     if (ret < 0) {
         goto fail;
     }
@@ -1143,7 +1335,13 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
     }
 
     if (!drv) {
-        ret = find_image_format(file, filename, &drv, &local_err);
+        if (file) {
+            ret = find_image_format(file, filename, &drv, &local_err);
+        } else {
+            error_setg(errp, "Must specify either driver or file");
+            ret = -EINVAL;
+            goto unlink_and_fail;
+        }
     }
 
     if (!drv) {
@@ -1156,7 +1354,7 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
         goto unlink_and_fail;
     }
 
-    if (bs->file != file) {
+    if (file && (bs->file != file)) {
         bdrv_unref(file);
         file = NULL;
     }
@@ -1427,6 +1625,8 @@ void bdrv_reopen_commit(BDRVReopenState *reopen_state)
     reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
                                               BDRV_O_CACHE_WB);
     reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
+
+    bdrv_refresh_limits(reopen_state->bs);
 }
 
 /*
@@ -1501,7 +1701,7 @@ void bdrv_close_all(void)
 {
     BlockDriverState *bs;
 
-    QTAILQ_FOREACH(bs, &bdrv_states, list) {
+    QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
         bdrv_close(bs);
     }
 }
@@ -1530,7 +1730,7 @@ static bool bdrv_requests_pending(BlockDriverState *bs)
 static bool bdrv_requests_pending_all(void)
 {
     BlockDriverState *bs;
-    QTAILQ_FOREACH(bs, &bdrv_states, list) {
+    QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
         if (bdrv_requests_pending(bs)) {
             return true;
         }
@@ -1557,7 +1757,7 @@ void bdrv_drain_all(void)
     BlockDriverState *bs;
 
     while (busy) {
-        QTAILQ_FOREACH(bs, &bdrv_states, list) {
+        QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
             bdrv_start_throttled_reqs(bs);
         }
 
@@ -1566,14 +1766,19 @@ void bdrv_drain_all(void)
     }
 }
 
-/* make a BlockDriverState anonymous by removing from bdrv_state list.
+/* make a BlockDriverState anonymous by removing from bdrv_state and
+ * graph_bdrv_state list.
    Also, NULL terminate the device_name to prevent double remove */
 void bdrv_make_anon(BlockDriverState *bs)
 {
     if (bs->device_name[0] != '\0') {
-        QTAILQ_REMOVE(&bdrv_states, bs, list);
+        QTAILQ_REMOVE(&bdrv_states, bs, device_list);
     }
     bs->device_name[0] = '\0';
+    if (bs->node_name[0] != '\0') {
+        QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
+    }
+    bs->node_name[0] = '\0';
 }
 
 static void bdrv_rebind(BlockDriverState *bs)
@@ -1593,7 +1798,7 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
     bs_dest->dev_ops            = bs_src->dev_ops;
     bs_dest->dev_opaque         = bs_src->dev_opaque;
     bs_dest->dev                = bs_src->dev;
-    bs_dest->buffer_alignment   = bs_src->buffer_alignment;
+    bs_dest->guest_block_size   = bs_src->guest_block_size;
     bs_dest->copy_on_read       = bs_src->copy_on_read;
 
     bs_dest->enable_write_cache = bs_src->enable_write_cache;
@@ -1627,7 +1832,12 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
     /* keep the same entry in bdrv_states */
     pstrcpy(bs_dest->device_name, sizeof(bs_dest->device_name),
             bs_src->device_name);
-    bs_dest->list = bs_src->list;
+    bs_dest->device_list = bs_src->device_list;
+
+    /* keep the same entry in graph_bdrv_states
+     * We do want to swap name but don't want to swap linked list entries
+     */
+    bs_dest->node_list   = bs_src->node_list;
 }
 
 /*
@@ -1745,7 +1955,7 @@ void bdrv_detach_dev(BlockDriverState *bs, void *dev)
     bs->dev = NULL;
     bs->dev_ops = NULL;
     bs->dev_opaque = NULL;
-    bs->buffer_alignment = 512;
+    bs->guest_block_size = 512;
 }
 
 /* TODO change to return DeviceState * when all users are qdevified */
@@ -1876,10 +2086,10 @@ int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
 int bdrv_commit(BlockDriverState *bs)
 {
     BlockDriver *drv = bs->drv;
-    int64_t sector, total_sectors;
+    int64_t sector, total_sectors, length, backing_length;
     int n, ro, open_flags;
     int ret = 0;
-    uint8_t *buf;
+    uint8_t *buf = NULL;
     char filename[PATH_MAX];
 
     if (!drv)
@@ -1904,7 +2114,29 @@ int bdrv_commit(BlockDriverState *bs)
         }
     }
 
-    total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
+    length = bdrv_getlength(bs);
+    if (length < 0) {
+        ret = length;
+        goto ro_cleanup;
+    }
+
+    backing_length = bdrv_getlength(bs->backing_hd);
+    if (backing_length < 0) {
+        ret = backing_length;
+        goto ro_cleanup;
+    }
+
+    /* If our top snapshot is larger than the backing file image,
+     * grow the backing file image if possible.  If not possible,
+     * we must return an error */
+    if (length > backing_length) {
+        ret = bdrv_truncate(bs->backing_hd, length);
+        if (ret < 0) {
+            goto ro_cleanup;
+        }
+    }
+
+    total_sectors = length >> BDRV_SECTOR_BITS;
     buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
 
     for (sector = 0; sector < total_sectors; sector += n) {
@@ -1913,13 +2145,13 @@ int bdrv_commit(BlockDriverState *bs)
             goto ro_cleanup;
         }
         if (ret) {
-            if (bdrv_read(bs, sector, buf, n) != 0) {
-                ret = -EIO;
+            ret = bdrv_read(bs, sector, buf, n);
+            if (ret < 0) {
                 goto ro_cleanup;
             }
 
-            if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
-                ret = -EIO;
+            ret = bdrv_write(bs->backing_hd, sector, buf, n);
+            if (ret < 0) {
                 goto ro_cleanup;
             }
         }
@@ -1927,6 +2159,9 @@ int bdrv_commit(BlockDriverState *bs)
 
     if (drv->bdrv_make_empty) {
         ret = drv->bdrv_make_empty(bs);
+        if (ret < 0) {
+            goto ro_cleanup;
+        }
         bdrv_flush(bs);
     }
 
@@ -1934,9 +2169,11 @@ int bdrv_commit(BlockDriverState *bs)
      * Make sure all data we wrote to the backing device is actually
      * stable on disk.
      */
-    if (bs->backing_hd)
+    if (bs->backing_hd) {
         bdrv_flush(bs->backing_hd);
+    }
 
+    ret = 0;
 ro_cleanup:
     g_free(buf);
 
@@ -1952,7 +2189,7 @@ int bdrv_commit_all(void)
 {
     BlockDriverState *bs;
 
-    QTAILQ_FOREACH(bs, &bdrv_states, list) {
+    QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
         if (bs->drv && bs->backing_hd) {
             int ret = bdrv_commit(bs);
             if (ret < 0) {
@@ -1970,6 +2207,10 @@ int bdrv_commit_all(void)
  */
 static void tracked_request_end(BdrvTrackedRequest *req)
 {
+    if (req->serialising) {
+        req->bs->serialising_in_flight--;
+    }
+
     QLIST_REMOVE(req, list);
     qemu_co_queue_restart_all(&req->wait_queue);
 }
@@ -1979,15 +2220,18 @@ static void tracked_request_end(BdrvTrackedRequest *req)
  */
 static void tracked_request_begin(BdrvTrackedRequest *req,
                                   BlockDriverState *bs,
-                                  int64_t sector_num,
-                                  int nb_sectors, bool is_write)
+                                  int64_t offset,
+                                  unsigned int bytes, bool is_write)
 {
     *req = (BdrvTrackedRequest){
         .bs = bs,
-        .sector_num = sector_num,
-        .nb_sectors = nb_sectors,
-        .is_write = is_write,
-        .co = qemu_coroutine_self(),
+        .offset         = offset,
+        .bytes          = bytes,
+        .is_write       = is_write,
+        .co             = qemu_coroutine_self(),
+        .serialising    = false,
+        .overlap_offset = offset,
+        .overlap_bytes  = bytes,
     };
 
     qemu_co_queue_init(&req->wait_queue);
@@ -1995,6 +2239,21 @@ static void tracked_request_begin(BdrvTrackedRequest *req,
     QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
 }
 
+static void mark_request_serialising(BdrvTrackedRequest *req, size_t align)
+{
+    int64_t overlap_offset = req->offset & ~(align - 1);
+    int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
+                      - overlap_offset;
+
+    if (!req->serialising) {
+        req->bs->serialising_in_flight++;
+        req->serialising = true;
+    }
+
+    req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
+    req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
+}
+
 /**
  * Round a region to cluster boundaries
  */
@@ -2016,53 +2275,75 @@ void bdrv_round_to_clusters(BlockDriverState *bs,
     }
 }
 
+static int bdrv_get_cluster_size(BlockDriverState *bs)
+{
+    BlockDriverInfo bdi;
+    int ret;
+
+    ret = bdrv_get_info(bs, &bdi);
+    if (ret < 0 || bdi.cluster_size == 0) {
+        return bs->request_alignment;
+    } else {
+        return bdi.cluster_size;
+    }
+}
+
 static bool tracked_request_overlaps(BdrvTrackedRequest *req,
-                                     int64_t sector_num, int nb_sectors) {
+                                     int64_t offset, unsigned int bytes)
+{
     /*        aaaa   bbbb */
-    if (sector_num >= req->sector_num + req->nb_sectors) {
+    if (offset >= req->overlap_offset + req->overlap_bytes) {
         return false;
     }
     /* bbbb   aaaa        */
-    if (req->sector_num >= sector_num + nb_sectors) {
+    if (req->overlap_offset >= offset + bytes) {
         return false;
     }
     return true;
 }
 
-static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors)
+static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
 {
+    BlockDriverState *bs = self->bs;
     BdrvTrackedRequest *req;
-    int64_t cluster_sector_num;
-    int cluster_nb_sectors;
     bool retry;
+    bool waited = false;
 
-    /* If we touch the same cluster it counts as an overlap.  This guarantees
-     * that allocating writes will be serialized and not race with each other
-     * for the same cluster.  For example, in copy-on-read it ensures that the
-     * CoR read and write operations are atomic and guest writes cannot
-     * interleave between them.
-     */
-    bdrv_round_to_clusters(bs, sector_num, nb_sectors,
-                           &cluster_sector_num, &cluster_nb_sectors);
+    if (!bs->serialising_in_flight) {
+        return false;
+    }
 
     do {
         retry = false;
         QLIST_FOREACH(req, &bs->tracked_requests, list) {
-            if (tracked_request_overlaps(req, cluster_sector_num,
-                                         cluster_nb_sectors)) {
+            if (req == self || (!req->serialising && !self->serialising)) {
+                continue;
+            }
+            if (tracked_request_overlaps(req, self->overlap_offset,
+                                         self->overlap_bytes))
+            {
                 /* Hitting this means there was a reentrant request, for
                  * example, a block driver issuing nested requests.  This must
                  * never happen since it means deadlock.
                  */
                 assert(qemu_coroutine_self() != req->co);
 
-                qemu_co_queue_wait(&req->wait_queue);
-                retry = true;
-                break;
+                /* If the request is already (indirectly) waiting for us, or
+                 * will wait for us as soon as it wakes up, then just go on
+                 * (instead of producing a deadlock in the former case). */
+                if (!req->waiting_for) {
+                    self->waiting_for = req;
+                    qemu_co_queue_wait(&req->wait_queue);
+                    self->waiting_for = NULL;
+                    retry = true;
+                    waited = true;
+                    break;
+                }
             }
         }
     } while (retry);
+
+    return waited;
 }
 
 /*
@@ -2224,6 +2505,7 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
     }
     new_top_bs->backing_hd = base_bs;
 
+    bdrv_refresh_limits(new_top_bs);
 
     QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
         /* so that bdrv_close() does not recursively close the chain */
@@ -2271,8 +2553,7 @@ static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
 
 typedef struct RwCo {
     BlockDriverState *bs;
-    int64_t sector_num;
-    int nb_sectors;
+    int64_t offset;
     QEMUIOVector *qiov;
     bool is_write;
     int ret;
@@ -2284,34 +2565,32 @@ static void coroutine_fn bdrv_rw_co_entry(void *opaque)
     RwCo *rwco = opaque;
 
     if (!rwco->is_write) {
-        rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
-                                     rwco->nb_sectors, rwco->qiov,
-                                     rwco->flags);
-    } else {
-        rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
-                                      rwco->nb_sectors, rwco->qiov,
+        rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
+                                      rwco->qiov->size, rwco->qiov,
                                       rwco->flags);
+    } else {
+        rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
+                                       rwco->qiov->size, rwco->qiov,
+                                       rwco->flags);
     }
 }
 
 /*
  * Process a vectored synchronous request using coroutines
  */
-static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num,
-                       QEMUIOVector *qiov, bool is_write,
-                       BdrvRequestFlags flags)
+static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
+                        QEMUIOVector *qiov, bool is_write,
+                        BdrvRequestFlags flags)
 {
     Coroutine *co;
     RwCo rwco = {
         .bs = bs,
-        .sector_num = sector_num,
-        .nb_sectors = qiov->size >> BDRV_SECTOR_BITS,
+        .offset = offset,
         .qiov = qiov,
         .is_write = is_write,
         .ret = NOT_DONE,
         .flags = flags,
     };
-    assert((qiov->size & (BDRV_SECTOR_SIZE - 1)) == 0);
 
     /**
      * In sync call context, when the vcpu is blocked, this throttling timer
@@ -2350,7 +2629,8 @@ static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
     };
 
     qemu_iovec_init_external(&qiov, &iov, 1);
-    return bdrv_rwv_co(bs, sector_num, &qiov, is_write, flags);
+    return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
+                        &qiov, is_write, flags);
 }
 
 /* return < 0 if error. See bdrv_write() for the return codes */
@@ -2386,11 +2666,6 @@ int bdrv_write(BlockDriverState *bs, int64_t sector_num,
     return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
 }
 
-int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov)
-{
-    return bdrv_rwv_co(bs, sector_num, qiov, true, 0);
-}
-
 int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
                       int nb_sectors, BdrvRequestFlags flags)
 {
@@ -2440,117 +2715,53 @@ int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
     }
 }
 
-int bdrv_pread(BlockDriverState *bs, int64_t offset,
-               void *buf, int count1)
+int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
 {
-    uint8_t tmp_buf[BDRV_SECTOR_SIZE];
-    int len, nb_sectors, count;
-    int64_t sector_num;
+    QEMUIOVector qiov;
+    struct iovec iov = {
+        .iov_base = (void *)buf,
+        .iov_len = bytes,
+    };
     int ret;
 
-    count = count1;
-    /* first read to align to sector start */
-    len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
-    if (len > count)
-        len = count;
-    sector_num = offset >> BDRV_SECTOR_BITS;
-    if (len > 0) {
-        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
-            return ret;
-        memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
-        count -= len;
-        if (count == 0)
-            return count1;
-        sector_num++;
-        buf += len;
-    }
-
-    /* read the sectors "in place" */
-    nb_sectors = count >> BDRV_SECTOR_BITS;
-    if (nb_sectors > 0) {
-        if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
-            return ret;
-        sector_num += nb_sectors;
-        len = nb_sectors << BDRV_SECTOR_BITS;
-        buf += len;
-        count -= len;
+    if (bytes < 0) {
+        return -EINVAL;
     }
 
-    /* add data from the last sector */
-    if (count > 0) {
-        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
-            return ret;
-        memcpy(buf, tmp_buf, count);
+    qemu_iovec_init_external(&qiov, &iov, 1);
+    ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
+    if (ret < 0) {
+        return ret;
     }
-    return count1;
+
+    return bytes;
 }
 
 int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
 {
-    uint8_t tmp_buf[BDRV_SECTOR_SIZE];
-    int len, nb_sectors, count;
-    int64_t sector_num;
     int ret;
 
-    count = qiov->size;
-
-    /* first write to align to sector start */
-    len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
-    if (len > count)
-        len = count;
-    sector_num = offset >> BDRV_SECTOR_BITS;
-    if (len > 0) {
-        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
-            return ret;
-        qemu_iovec_to_buf(qiov, 0, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)),
-                          len);
-        if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
-            return ret;
-        count -= len;
-        if (count == 0)
-            return qiov->size;
-        sector_num++;
-    }
-
-    /* write the sectors "in place" */
-    nb_sectors = count >> BDRV_SECTOR_BITS;
-    if (nb_sectors > 0) {
-        QEMUIOVector qiov_inplace;
-
-        qemu_iovec_init(&qiov_inplace, qiov->niov);
-        qemu_iovec_concat(&qiov_inplace, qiov, len,
-                          nb_sectors << BDRV_SECTOR_BITS);
-        ret = bdrv_writev(bs, sector_num, &qiov_inplace);
-        qemu_iovec_destroy(&qiov_inplace);
-        if (ret < 0) {
-            return ret;
-        }
-
-        sector_num += nb_sectors;
-        len = nb_sectors << BDRV_SECTOR_BITS;
-        count -= len;
+    ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
+    if (ret < 0) {
+        return ret;
     }
 
-    /* add data from the last sector */
-    if (count > 0) {
-        if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
-            return ret;
-        qemu_iovec_to_buf(qiov, qiov->size - count, tmp_buf, count);
-        if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
-            return ret;
-    }
     return qiov->size;
 }
 
 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
-                const void *buf, int count1)
+                const void *buf, int bytes)
 {
     QEMUIOVector qiov;
     struct iovec iov = {
         .iov_base   = (void *) buf,
-        .iov_len    = count1,
+        .iov_len    = bytes,
     };
 
+    if (bytes < 0) {
+        return -EINVAL;
+    }
+
     qemu_iovec_init_external(&qiov, &iov, 1);
     return bdrv_pwritev(bs, offset, &qiov);
 }
@@ -2646,40 +2857,34 @@ err:
 }
 
 /*
- * Handle a read request in coroutine context
+ * Forwards an already correctly aligned request to the BlockDriver. This
+ * handles copy on read and zeroing after EOF; any other features must be
+ * implemented by the caller.
  */
-static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
-    BdrvRequestFlags flags)
+static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
+    BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
+    int64_t align, QEMUIOVector *qiov, int flags)
 {
     BlockDriver *drv = bs->drv;
-    BdrvTrackedRequest req;
     int ret;
 
-    if (!drv) {
-        return -ENOMEDIUM;
-    }
-    if (bdrv_check_request(bs, sector_num, nb_sectors)) {
-        return -EIO;
-    }
+    int64_t sector_num = offset >> BDRV_SECTOR_BITS;
+    unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
 
-    if (bs->copy_on_read) {
-        flags |= BDRV_REQ_COPY_ON_READ;
-    }
-    if (flags & BDRV_REQ_COPY_ON_READ) {
-        bs->copy_on_read_in_flight++;
-    }
+    assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+    assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
 
-    if (bs->copy_on_read_in_flight) {
-        wait_for_overlapping_requests(bs, sector_num, nb_sectors);
-    }
-
-    /* throttling disk I/O */
-    if (bs->io_limits_enabled) {
-        bdrv_io_limits_intercept(bs, nb_sectors, false);
+    /* Handle Copy on Read and associated serialisation */
+    if (flags & BDRV_REQ_COPY_ON_READ) {
+        /* If we touch the same cluster it counts as an overlap.  This
+         * guarantees that allocating writes will be serialized and not race
+         * with each other for the same cluster.  For example, in copy-on-read
+         * it ensures that the CoR read and write operations are atomic and
+         * guest writes cannot interleave between them. */
+        mark_request_serialising(req, bdrv_get_cluster_size(bs));
     }
 
-    tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
+    wait_serialising_requests(req);
 
     if (flags & BDRV_REQ_COPY_ON_READ) {
         int pnum;
@@ -2695,6 +2900,7 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
         }
     }
 
+    /* Forward the request to the BlockDriver */
     if (!(bs->zero_beyond_eof && bs->growable)) {
         ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
     } else {
@@ -2708,7 +2914,8 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
         }
 
         total_sectors = DIV_ROUND_UP(len, BDRV_SECTOR_SIZE);
-        max_nb_sectors = MAX(0, total_sectors - sector_num);
+        max_nb_sectors = MAX(0, ROUND_UP(total_sectors - sector_num,
+                                         align >> BDRV_SECTOR_BITS));
         if (max_nb_sectors > 0) {
             ret = drv->bdrv_co_readv(bs, sector_num,
                                      MIN(nb_sectors, max_nb_sectors), qiov);
@@ -2726,15 +2933,95 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
     }
 
 out:
+    return ret;
+}
+
+/*
+ * Handle a read request in coroutine context
+ */
+static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
+    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
+    BdrvRequestFlags flags)
+{
+    BlockDriver *drv = bs->drv;
+    BdrvTrackedRequest req;
+
+    /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
+    uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
+    uint8_t *head_buf = NULL;
+    uint8_t *tail_buf = NULL;
+    QEMUIOVector local_qiov;
+    bool use_local_qiov = false;
+    int ret;
+
+    if (!drv) {
+        return -ENOMEDIUM;
+    }
+    if (bdrv_check_byte_request(bs, offset, bytes)) {
+        return -EIO;
+    }
+
+    if (bs->copy_on_read) {
+        flags |= BDRV_REQ_COPY_ON_READ;
+    }
+
+    /* throttling disk I/O */
+    if (bs->io_limits_enabled) {
+        bdrv_io_limits_intercept(bs, bytes, false);
+    }
+
+    /* Align read if necessary by padding qiov */
+    if (offset & (align - 1)) {
+        head_buf = qemu_blockalign(bs, align);
+        qemu_iovec_init(&local_qiov, qiov->niov + 2);
+        qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
+        qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
+        use_local_qiov = true;
+
+        bytes += offset & (align - 1);
+        offset = offset & ~(align - 1);
+    }
+
+    if ((offset + bytes) & (align - 1)) {
+        if (!use_local_qiov) {
+            qemu_iovec_init(&local_qiov, qiov->niov + 1);
+            qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
+            use_local_qiov = true;
+        }
+        tail_buf = qemu_blockalign(bs, align);
+        qemu_iovec_add(&local_qiov, tail_buf,
+                       align - ((offset + bytes) & (align - 1)));
+
+        bytes = ROUND_UP(bytes, align);
+    }
+
+    tracked_request_begin(&req, bs, offset, bytes, false);
+    ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
+                              use_local_qiov ? &local_qiov : qiov,
+                              flags);
     tracked_request_end(&req);
 
-    if (flags & BDRV_REQ_COPY_ON_READ) {
-        bs->copy_on_read_in_flight--;
+    if (use_local_qiov) {
+        qemu_iovec_destroy(&local_qiov);
+        qemu_vfree(head_buf);
+        qemu_vfree(tail_buf);
     }
 
     return ret;
 }
 
+static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
+    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+    BdrvRequestFlags flags)
+{
+    if (nb_sectors < 0 || nb_sectors > (UINT_MAX >> BDRV_SECTOR_BITS)) {
+        return -EINVAL;
+    }
+
+    return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
+                             nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
+}
+
 int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
     int nb_sectors, QEMUIOVector *qiov)
 {
@@ -2828,46 +3115,37 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
 }
 
 /*
- * Handle a write request in coroutine context
+ * Forwards an already correctly aligned write request to the BlockDriver.
  */
-static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
-    BdrvRequestFlags flags)
+static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
+    BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
+    QEMUIOVector *qiov, int flags)
 {
     BlockDriver *drv = bs->drv;
-    BdrvTrackedRequest req;
+    bool waited;
     int ret;
 
-    if (!bs->drv) {
-        return -ENOMEDIUM;
-    }
-    if (bs->read_only) {
-        return -EACCES;
-    }
-    if (bdrv_check_request(bs, sector_num, nb_sectors)) {
-        return -EIO;
-    }
+    int64_t sector_num = offset >> BDRV_SECTOR_BITS;
+    unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
 
-    if (bs->copy_on_read_in_flight) {
-        wait_for_overlapping_requests(bs, sector_num, nb_sectors);
-    }
-
-    /* throttling disk I/O */
-    if (bs->io_limits_enabled) {
-        bdrv_io_limits_intercept(bs, nb_sectors, true);
-    }
+    assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+    assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
 
-    tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
+    waited = wait_serialising_requests(req);
+    assert(!waited || !req->serialising);
 
-    ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req);
+    ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
 
     if (ret < 0) {
         /* Do nothing, write notifier decided to fail this request */
     } else if (flags & BDRV_REQ_ZERO_WRITE) {
+        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
         ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
     } else {
+        BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
         ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
     }
+    BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
 
     if (ret == 0 && !bs->enable_write_cache) {
         ret = bdrv_co_flush(bs);
@@ -2882,11 +3160,143 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
         bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
     }
 
+    return ret;
+}
+
+/*
+ * Handle a write request in coroutine context
+ */
+static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
+    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
+    BdrvRequestFlags flags)
+{
+    BdrvTrackedRequest req;
+    /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
+    uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
+    uint8_t *head_buf = NULL;
+    uint8_t *tail_buf = NULL;
+    QEMUIOVector local_qiov;
+    bool use_local_qiov = false;
+    int ret;
+
+    if (!bs->drv) {
+        return -ENOMEDIUM;
+    }
+    if (bs->read_only) {
+        return -EACCES;
+    }
+    if (bdrv_check_byte_request(bs, offset, bytes)) {
+        return -EIO;
+    }
+
+    /* throttling disk I/O */
+    if (bs->io_limits_enabled) {
+        bdrv_io_limits_intercept(bs, bytes, true);
+    }
+
+    /*
+     * Align write if necessary by performing a read-modify-write cycle.
+     * Pad qiov with the read parts and be sure to have a tracked request not
+     * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
+     */
+    tracked_request_begin(&req, bs, offset, bytes, true);
+
+    if (offset & (align - 1)) {
+        QEMUIOVector head_qiov;
+        struct iovec head_iov;
+
+        mark_request_serialising(&req, align);
+        wait_serialising_requests(&req);
+
+        head_buf = qemu_blockalign(bs, align);
+        head_iov = (struct iovec) {
+            .iov_base   = head_buf,
+            .iov_len    = align,
+        };
+        qemu_iovec_init_external(&head_qiov, &head_iov, 1);
+
+        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
+        ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
+                                  align, &head_qiov, 0);
+        if (ret < 0) {
+            goto fail;
+        }
+        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
+
+        qemu_iovec_init(&local_qiov, qiov->niov + 2);
+        qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
+        qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
+        use_local_qiov = true;
+
+        bytes += offset & (align - 1);
+        offset = offset & ~(align - 1);
+    }
+
+    if ((offset + bytes) & (align - 1)) {
+        QEMUIOVector tail_qiov;
+        struct iovec tail_iov;
+        size_t tail_bytes;
+        bool waited;
+
+        mark_request_serialising(&req, align);
+        waited = wait_serialising_requests(&req);
+        assert(!waited || !use_local_qiov);
+
+        tail_buf = qemu_blockalign(bs, align);
+        tail_iov = (struct iovec) {
+            .iov_base   = tail_buf,
+            .iov_len    = align,
+        };
+        qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
+
+        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
+        ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
+                                  align, &tail_qiov, 0);
+        if (ret < 0) {
+            goto fail;
+        }
+        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
+
+        if (!use_local_qiov) {
+            qemu_iovec_init(&local_qiov, qiov->niov + 1);
+            qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
+            use_local_qiov = true;
+        }
+
+        tail_bytes = (offset + bytes) & (align - 1);
+        qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
+
+        bytes = ROUND_UP(bytes, align);
+    }
+
+    ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
+                               use_local_qiov ? &local_qiov : qiov,
+                               flags);
+
+fail:
     tracked_request_end(&req);
 
+    if (use_local_qiov) {
+        qemu_iovec_destroy(&local_qiov);
+        qemu_vfree(head_buf);
+        qemu_vfree(tail_buf);
+    }
+
     return ret;
 }
 
+static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
+    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+    BdrvRequestFlags flags)
+{
+    if (nb_sectors < 0 || nb_sectors > (INT_MAX >> BDRV_SECTOR_BITS)) {
+        return -EINVAL;
+    }
+
+    return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
+                              nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
+}
+
 int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
     int nb_sectors, QEMUIOVector *qiov)
 {
@@ -3110,11 +3520,12 @@ void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
     }
 }
 
+/* This function is to find block backend bs */
 BlockDriverState *bdrv_find(const char *name)
 {
     BlockDriverState *bs;
 
-    QTAILQ_FOREACH(bs, &bdrv_states, list) {
+    QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
         if (!strcmp(name, bs->device_name)) {
             return bs;
         }
@@ -3122,19 +3533,83 @@ BlockDriverState *bdrv_find(const char *name)
     return NULL;
 }
 
+/* This function is to find a node in the bs graph */
+BlockDriverState *bdrv_find_node(const char *node_name)
+{
+    BlockDriverState *bs;
+
+    assert(node_name);
+
+    QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
+        if (!strcmp(node_name, bs->node_name)) {
+            return bs;
+        }
+    }
+    return NULL;
+}
+
+/* Put this QMP function here so it can access the static graph_bdrv_states. */
+BlockDeviceInfoList *bdrv_named_nodes_list(void)
+{
+    BlockDeviceInfoList *list, *entry;
+    BlockDriverState *bs;
+
+    list = NULL;
+    QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
+        entry = g_malloc0(sizeof(*entry));
+        entry->value = bdrv_block_device_info(bs);
+        entry->next = list;
+        list = entry;
+    }
+
+    return list;
+}
+
+BlockDriverState *bdrv_lookup_bs(const char *device,
+                                 const char *node_name,
+                                 Error **errp)
+{
+    BlockDriverState *bs = NULL;
+
+    if ((!device && !node_name) || (device && node_name)) {
+        error_setg(errp, "Use either device or node-name but not both");
+        return NULL;
+    }
+
+    if (device) {
+        bs = bdrv_find(device);
+
+        if (!bs) {
+            error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+            return NULL;
+        }
+
+        return bs;
+    }
+
+    bs = bdrv_find_node(node_name);
+
+    if (!bs) {
+        error_set(errp, QERR_DEVICE_NOT_FOUND, node_name);
+        return NULL;
+    }
+
+    return bs;
+}
+
 BlockDriverState *bdrv_next(BlockDriverState *bs)
 {
     if (!bs) {
         return QTAILQ_FIRST(&bdrv_states);
     }
-    return QTAILQ_NEXT(bs, list);
+    return QTAILQ_NEXT(bs, device_list);
 }
 
 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
 {
     BlockDriverState *bs;
 
-    QTAILQ_FOREACH(bs, &bdrv_states, list) {
+    QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
         it(opaque, bs);
     }
 }
@@ -3154,7 +3629,7 @@ int bdrv_flush_all(void)
     BlockDriverState *bs;
     int result = 0;
 
-    QTAILQ_FOREACH(bs, &bdrv_states, list) {
+    QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
         int ret = bdrv_flush(bs);
         if (ret < 0 && !result) {
             result = ret;
@@ -4278,7 +4753,7 @@ void bdrv_invalidate_cache_all(void)
 {
     BlockDriverState *bs;
 
-    QTAILQ_FOREACH(bs, &bdrv_states, list) {
+    QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
         bdrv_invalidate_cache(bs);
     }
 }
@@ -4287,7 +4762,7 @@ void bdrv_clear_incoming_migration_all(void)
 {
     BlockDriverState *bs;
 
-    QTAILQ_FOREACH(bs, &bdrv_states, list) {
+    QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
         bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
     }
 }
@@ -4314,9 +4789,15 @@ int bdrv_flush(BlockDriverState *bs)
     return rwco.ret;
 }
 
+typedef struct DiscardCo {
+    BlockDriverState *bs;
+    int64_t sector_num;
+    int nb_sectors;
+    int ret;
+} DiscardCo;
 static void coroutine_fn bdrv_discard_co_entry(void *opaque)
 {
-    RwCo *rwco = opaque;
+    DiscardCo *rwco = opaque;
 
     rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
 }
@@ -4400,7 +4881,7 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
 {
     Coroutine *co;
-    RwCo rwco = {
+    DiscardCo rwco = {
         .bs = bs,
         .sector_num = sector_num,
         .nb_sectors = nb_sectors,
@@ -4505,14 +4986,14 @@ BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
     return NULL;
 }
 
-void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
+void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
 {
-    bs->buffer_alignment = align;
+    bs->guest_block_size = align;
 }
 
 void *qemu_blockalign(BlockDriverState *bs, size_t size)
 {
-    return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
+    return qemu_memalign(bdrv_opt_mem_align(bs), size);
 }
 
 /*
@@ -4521,9 +5002,13 @@ void *qemu_blockalign(BlockDriverState *bs, size_t size)
 bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
 {
     int i;
+    size_t alignment = bdrv_opt_mem_align(bs);
 
     for (i = 0; i < qiov->niov; i++) {
-        if ((uintptr_t) qiov->iov[i].iov_base % bs->buffer_alignment) {
+        if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
+            return false;
+        }
+        if (qiov->iov[i].iov_len % alignment) {
             return false;
         }
     }
@@ -4875,21 +5360,68 @@ int bdrv_amend_options(BlockDriverState *bs, QEMUOptionParameter *options)
     return bs->drv->bdrv_amend_options(bs, options);
 }
 
-ExtSnapshotPerm bdrv_check_ext_snapshot(BlockDriverState *bs)
+/* Used to recurse on single child block filters.
+ * Single child block filter will store their child in bs->file.
+ */
+bool bdrv_generic_is_first_non_filter(BlockDriverState *bs,
+                                      BlockDriverState *candidate)
 {
-    if (bs->drv->bdrv_check_ext_snapshot) {
-        return bs->drv->bdrv_check_ext_snapshot(bs);
+    if (!bs->drv) {
+        return false;
     }
 
-    if (bs->file && bs->file->drv && bs->file->drv->bdrv_check_ext_snapshot) {
-        return bs->file->drv->bdrv_check_ext_snapshot(bs);
+    if (!bs->drv->authorizations[BS_IS_A_FILTER]) {
+        if (bs == candidate) {
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    if (!bs->drv->authorizations[BS_FILTER_PASS_DOWN]) {
+        return false;
+    }
+
+    if (!bs->file) {
+        return false;
     }
 
-    /* external snapshots are allowed by default */
-    return EXT_SNAPSHOT_ALLOWED;
+    return bdrv_recurse_is_first_non_filter(bs->file, candidate);
 }
 
-ExtSnapshotPerm bdrv_check_ext_snapshot_forbidden(BlockDriverState *bs)
+bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
+                                      BlockDriverState *candidate)
+{
+    if (bs->drv && bs->drv->bdrv_recurse_is_first_non_filter) {
+        return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
+    }
+
+    return bdrv_generic_is_first_non_filter(bs, candidate);
+}
+
+/* This function checks if the candidate is the first non filter bs down it's
+ * bs chain. Since we don't have pointers to parents it explore all bs chains
+ * from the top. Some filters can choose not to pass down the recursion.
+ */
+bool bdrv_is_first_non_filter(BlockDriverState *candidate)
 {
-    return EXT_SNAPSHOT_FORBIDDEN;
+    BlockDriverState *bs;
+
+    /* walk down the bs forest recursively */
+    QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
+        bool perm;
+
+        if (!bs->file) {
+            continue;
+        }
+
+        perm = bdrv_recurse_is_first_non_filter(bs->file, candidate);
+
+        /* candidate is the first non filter */
+        if (perm) {
+            return true;
+        }
+    }
+
+    return false;
 }
diff --git a/block/backup.c b/block/backup.c
index 0198514043..15a2e55e8e 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -181,8 +181,13 @@ static int coroutine_fn backup_before_write_notify(
         void *opaque)
 {
     BdrvTrackedRequest *req = opaque;
+    int64_t sector_num = req->offset >> BDRV_SECTOR_BITS;
+    int nb_sectors = req->bytes >> BDRV_SECTOR_BITS;
 
-    return backup_do_cow(req->bs, req->sector_num, req->nb_sectors, NULL);
+    assert((req->offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+    assert((req->bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+
+    return backup_do_cow(req->bs, sector_num, nb_sectors, NULL);
 }
 
 static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
diff --git a/block/blkdebug.c b/block/blkdebug.c
index ebc5f13464..56c4cd084f 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -186,6 +186,14 @@ static const char *event_names[BLKDBG_EVENT_MAX] = {
 
     [BLKDBG_FLUSH_TO_OS]                    = "flush_to_os",
     [BLKDBG_FLUSH_TO_DISK]                  = "flush_to_disk",
+
+    [BLKDBG_PWRITEV_RMW_HEAD]               = "pwritev_rmw.head",
+    [BLKDBG_PWRITEV_RMW_AFTER_HEAD]         = "pwritev_rmw.after_head",
+    [BLKDBG_PWRITEV_RMW_TAIL]               = "pwritev_rmw.tail",
+    [BLKDBG_PWRITEV_RMW_AFTER_TAIL]         = "pwritev_rmw.after_tail",
+    [BLKDBG_PWRITEV]                        = "pwritev",
+    [BLKDBG_PWRITEV_ZERO]                   = "pwritev_zero",
+    [BLKDBG_PWRITEV_DONE]                   = "pwritev_done",
 };
 
 static int get_event_by_name(const char *name, BlkDebugEvent *event)
@@ -271,19 +279,33 @@ static void remove_rule(BlkdebugRule *rule)
     g_free(rule);
 }
 
-static int read_config(BDRVBlkdebugState *s, const char *filename)
+static int read_config(BDRVBlkdebugState *s, const char *filename,
+                       QDict *options, Error **errp)
 {
-    FILE *f;
+    FILE *f = NULL;
     int ret;
     struct add_rule_data d;
+    Error *local_err = NULL;
+
+    if (filename) {
+        f = fopen(filename, "r");
+        if (f == NULL) {
+            error_setg_errno(errp, errno, "Could not read blkdebug config file");
+            return -errno;
+        }
 
-    f = fopen(filename, "r");
-    if (f == NULL) {
-        return -errno;
+        ret = qemu_config_parse(f, config_groups, filename);
+        if (ret < 0) {
+            error_setg(errp, "Could not parse blkdebug config file");
+            ret = -EINVAL;
+            goto fail;
+        }
     }
 
-    ret = qemu_config_parse(f, config_groups, filename);
-    if (ret < 0) {
+    qemu_config_parse_qdict(options, config_groups, &local_err);
+    if (error_is_set(&local_err)) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
         goto fail;
     }
 
@@ -298,7 +320,9 @@ static int read_config(BDRVBlkdebugState *s, const char *filename)
 fail:
     qemu_opts_reset(&inject_error_opts);
     qemu_opts_reset(&set_state_opts);
-    fclose(f);
+    if (f) {
+        fclose(f);
+    }
     return ret;
 }
 
@@ -310,7 +334,9 @@ static void blkdebug_parse_filename(const char *filename, QDict *options,
 
     /* Parse the blkdebug: prefix */
     if (!strstart(filename, "blkdebug:", &filename)) {
-        error_setg(errp, "File name string must start with 'blkdebug:'");
+        /* There was no prefix; therefore, all options have to be already
+           present in the QDict (except for the filename) */
+        qdict_put(options, "x-image", qstring_from_str(filename));
         return;
     }
 
@@ -346,6 +372,11 @@ static QemuOptsList runtime_opts = {
             .type = QEMU_OPT_STRING,
             .help = "[internal use only, will be removed]",
         },
+        {
+            .name = "align",
+            .type = QEMU_OPT_SIZE,
+            .help = "Required alignment in bytes",
+        },
         { /* end of list */ }
     },
 };
@@ -356,7 +387,8 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
     BDRVBlkdebugState *s = bs->opaque;
     QemuOpts *opts;
     Error *local_err = NULL;
-    const char *filename, *config;
+    const char *config;
+    uint64_t align;
     int ret;
 
     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
@@ -367,30 +399,31 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
         goto fail;
     }
 
-    /* Read rules from config file */
+    /* Read rules from config file or command line options */
     config = qemu_opt_get(opts, "config");
-    if (config) {
-        ret = read_config(s, config);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret, "Could not read blkdebug config file");
-            goto fail;
-        }
+    ret = read_config(s, config, options, errp);
+    if (ret) {
+        goto fail;
     }
 
     /* Set initial state */
     s->state = 1;
 
     /* Open the backing file */
-    filename = qemu_opt_get(opts, "x-image");
-    if (filename == NULL) {
-        error_setg(errp, "Could not retrieve image file name");
-        ret = -EINVAL;
+    ret = bdrv_open_image(&bs->file, qemu_opt_get(opts, "x-image"), options, "image",
+                          flags, true, false, &local_err);
+    if (ret < 0) {
+        error_propagate(errp, local_err);
         goto fail;
     }
 
-    ret = bdrv_file_open(&bs->file, filename, NULL, flags, &local_err);
-    if (ret < 0) {
-        error_propagate(errp, local_err);
+    /* Set request alignment */
+    align = qemu_opt_get_size(opts, "align", bs->request_alignment);
+    if (align > 0 && align < INT_MAX && !(align & (align - 1))) {
+        bs->request_alignment = align;
+    } else {
+        error_setg(errp, "Invalid alignment");
+        ret = -EINVAL;
         goto fail;
     }
 
diff --git a/block/blkverify.c b/block/blkverify.c
index 1c1637f55e..cfcbcf41c3 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -78,7 +78,9 @@ static void blkverify_parse_filename(const char *filename, QDict *options,
 
     /* Parse the blkverify: prefix */
     if (!strstart(filename, "blkverify:", &filename)) {
-        error_setg(errp, "File name string must start with 'blkverify:'");
+        /* There was no prefix; therefore, all options have to be already
+           present in the QDict (except for the filename) */
+        qdict_put(options, "x-image", qstring_from_str(filename));
         return;
     }
 
@@ -122,7 +124,6 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags,
     BDRVBlkverifyState *s = bs->opaque;
     QemuOpts *opts;
     Error *local_err = NULL;
-    const char *filename, *raw;
     int ret;
 
     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
@@ -133,33 +134,19 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags,
         goto fail;
     }
 
-    /* Parse the raw image filename */
-    raw = qemu_opt_get(opts, "x-raw");
-    if (raw == NULL) {
-        error_setg(errp, "Could not retrieve raw image filename");
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    ret = bdrv_file_open(&bs->file, raw, NULL, flags, &local_err);
+    /* Open the raw file */
+    ret = bdrv_open_image(&bs->file, qemu_opt_get(opts, "x-raw"), options,
+                          "raw", flags, true, false, &local_err);
     if (ret < 0) {
         error_propagate(errp, local_err);
         goto fail;
     }
 
     /* Open the test file */
-    filename = qemu_opt_get(opts, "x-image");
-    if (filename == NULL) {
-        error_setg(errp, "Could not retrieve test image filename");
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    s->test_file = bdrv_new("");
-    ret = bdrv_open(s->test_file, filename, NULL, flags, NULL, &local_err);
+    ret = bdrv_open_image(&s->test_file, qemu_opt_get(opts, "x-image"), options,
+                          "test", flags, false, false, &local_err);
     if (ret < 0) {
         error_propagate(errp, local_err);
-        bdrv_unref(s->test_file);
         s->test_file = NULL;
         goto fail;
     }
@@ -417,7 +404,7 @@ static BlockDriver bdrv_blkverify = {
     .bdrv_aio_writev        = blkverify_aio_writev,
     .bdrv_aio_flush         = blkverify_aio_flush,
 
-    .bdrv_check_ext_snapshot = bdrv_check_ext_snapshot_forbidden,
+    .authorizations         = { true, false },
 };
 
 static void bdrv_blkverify_init(void)
diff --git a/block/cow.c b/block/cow.c
index dc15e46b6c..7fc0b12163 100644
--- a/block/cow.c
+++ b/block/cow.c
@@ -351,7 +351,8 @@ static int cow_create(const char *filename, QEMUOptionParameter *options,
         return ret;
     }
 
-    ret = bdrv_file_open(&cow_bs, filename, NULL, BDRV_O_RDWR, &local_err);
+    ret = bdrv_file_open(&cow_bs, filename, NULL, NULL, BDRV_O_RDWR,
+                         &local_err);
     if (ret < 0) {
         qerror_report_err(local_err);
         error_free(local_err);
diff --git a/block/curl.c b/block/curl.c
index a6039366da..a8075847b8 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -34,6 +34,11 @@
 #define DPRINTF(fmt, ...) do { } while (0)
 #endif
 
+#if LIBCURL_VERSION_NUM >= 0x071000
+/* The multi interface timer callback was introduced in 7.16.0 */
+#define NEED_CURL_TIMER_CALLBACK
+#endif
+
 #define PROTOCOLS (CURLPROTO_HTTP | CURLPROTO_HTTPS | \
                    CURLPROTO_FTP | CURLPROTO_FTPS | \
                    CURLPROTO_TFTP)
@@ -77,6 +82,7 @@ typedef struct CURLState
 
 typedef struct BDRVCURLState {
     CURLM *multi;
+    QEMUTimer timer;
     size_t len;
     CURLState states[CURL_NUM_STATES];
     char *url;
@@ -87,6 +93,23 @@ typedef struct BDRVCURLState {
 static void curl_clean_state(CURLState *s);
 static void curl_multi_do(void *arg);
 
+#ifdef NEED_CURL_TIMER_CALLBACK
+static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque)
+{
+    BDRVCURLState *s = opaque;
+
+    DPRINTF("CURL: timer callback timeout_ms %ld\n", timeout_ms);
+    if (timeout_ms == -1) {
+        timer_del(&s->timer);
+    } else {
+        int64_t timeout_ns = (int64_t)timeout_ms * 1000 * 1000;
+        timer_mod(&s->timer,
+                  qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ns);
+    }
+    return 0;
+}
+#endif
+
 static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
                         void *s, void *sp)
 {
@@ -209,20 +232,10 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
     return FIND_RET_NONE;
 }
 
-static void curl_multi_do(void *arg)
+static void curl_multi_read(BDRVCURLState *s)
 {
-    BDRVCURLState *s = (BDRVCURLState *)arg;
-    int running;
-    int r;
     int msgs_in_queue;
 
-    if (!s->multi)
-        return;
-
-    do {
-        r = curl_multi_socket_all(s->multi, &running);
-    } while(r == CURLM_CALL_MULTI_PERFORM);
-
     /* Try to find done transfers, so we can free the easy
      * handle again. */
     do {
@@ -266,6 +279,41 @@ static void curl_multi_do(void *arg)
     } while(msgs_in_queue);
 }
 
+static void curl_multi_do(void *arg)
+{
+    BDRVCURLState *s = (BDRVCURLState *)arg;
+    int running;
+    int r;
+
+    if (!s->multi) {
+        return;
+    }
+
+    do {
+        r = curl_multi_socket_all(s->multi, &running);
+    } while(r == CURLM_CALL_MULTI_PERFORM);
+
+    curl_multi_read(s);
+}
+
+static void curl_multi_timeout_do(void *arg)
+{
+#ifdef NEED_CURL_TIMER_CALLBACK
+    BDRVCURLState *s = (BDRVCURLState *)arg;
+    int running;
+
+    if (!s->multi) {
+        return;
+    }
+
+    curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
+
+    curl_multi_read(s);
+#else
+    abort();
+#endif
+}
+
 static CURLState *curl_init_state(BDRVCURLState *s)
 {
     CURLState *state = NULL;
@@ -473,12 +521,20 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
     curl_easy_cleanup(state->curl);
     state->curl = NULL;
 
+    aio_timer_init(bdrv_get_aio_context(bs), &s->timer,
+                   QEMU_CLOCK_REALTIME, SCALE_NS,
+                   curl_multi_timeout_do, s);
+
     // Now we know the file exists and its size, so let's
     // initialize the multi interface!
 
     s->multi = curl_multi_init();
     curl_multi_setopt(s->multi, CURLMOPT_SOCKETDATA, s);
     curl_multi_setopt(s->multi, CURLMOPT_SOCKETFUNCTION, curl_sock_cb);
+#ifdef NEED_CURL_TIMER_CALLBACK
+    curl_multi_setopt(s->multi, CURLMOPT_TIMERDATA, s);
+    curl_multi_setopt(s->multi, CURLMOPT_TIMERFUNCTION, curl_timer_cb);
+#endif
     curl_multi_do(s);
 
     qemu_opts_del(opts);
@@ -597,6 +653,9 @@ static void curl_close(BlockDriverState *bs)
     }
     if (s->multi)
         curl_multi_cleanup(s->multi);
+
+    timer_del(&s->timer);
+
     g_free(s->url);
 }
 
diff --git a/block/gluster.c b/block/gluster.c
index 563d497dc2..a009b15ded 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -21,19 +21,15 @@
 #include "qemu/uri.h"
 
 typedef struct GlusterAIOCB {
-    BlockDriverAIOCB common;
     int64_t size;
     int ret;
-    bool *finished;
     QEMUBH *bh;
+    Coroutine *coroutine;
 } GlusterAIOCB;
 
 typedef struct BDRVGlusterState {
     struct glfs *glfs;
-    int fds[2];
     struct glfs_fd *fd;
-    int event_reader_pos;
-    GlusterAIOCB *event_acb;
 } BDRVGlusterState;
 
 #define GLUSTER_FD_READ  0
@@ -231,46 +227,32 @@ out:
     return NULL;
 }
 
-static void qemu_gluster_complete_aio(GlusterAIOCB *acb, BDRVGlusterState *s)
+static void qemu_gluster_complete_aio(void *opaque)
 {
-    int ret;
-    bool *finished = acb->finished;
-    BlockDriverCompletionFunc *cb = acb->common.cb;
-    void *opaque = acb->common.opaque;
-
-    if (!acb->ret || acb->ret == acb->size) {
-        ret = 0; /* Success */
-    } else if (acb->ret < 0) {
-        ret = acb->ret; /* Read/Write failed */
-    } else {
-        ret = -EIO; /* Partial read/write - fail it */
-    }
+    GlusterAIOCB *acb = (GlusterAIOCB *)opaque;
 
-    qemu_aio_release(acb);
-    cb(opaque, ret);
-    if (finished) {
-        *finished = true;
-    }
+    qemu_bh_delete(acb->bh);
+    acb->bh = NULL;
+    qemu_coroutine_enter(acb->coroutine, NULL);
 }
 
-static void qemu_gluster_aio_event_reader(void *opaque)
+/*
+ * AIO callback routine called from GlusterFS thread.
+ */
+static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
 {
-    BDRVGlusterState *s = opaque;
-    ssize_t ret;
-
-    do {
-        char *p = (char *)&s->event_acb;
-
-        ret = read(s->fds[GLUSTER_FD_READ], p + s->event_reader_pos,
-                   sizeof(s->event_acb) - s->event_reader_pos);
-        if (ret > 0) {
-            s->event_reader_pos += ret;
-            if (s->event_reader_pos == sizeof(s->event_acb)) {
-                s->event_reader_pos = 0;
-                qemu_gluster_complete_aio(s->event_acb, s);
-            }
-        }
-    } while (ret < 0 && errno == EINTR);
+    GlusterAIOCB *acb = (GlusterAIOCB *)arg;
+
+    if (!ret || ret == acb->size) {
+        acb->ret = 0; /* Success */
+    } else if (ret < 0) {
+        acb->ret = ret; /* Read/Write failed */
+    } else {
+        acb->ret = -EIO; /* Partial read/write - fail it */
+    }
+
+    acb->bh = qemu_bh_new(qemu_gluster_complete_aio, acb);
+    qemu_bh_schedule(acb->bh);
 }
 
 /* TODO Convert to fine grained options */
@@ -309,7 +291,6 @@ static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,
 
     filename = qemu_opt_get(opts, "filename");
 
-
     s->glfs = qemu_gluster_init(gconf, filename);
     if (!s->glfs) {
         ret = -errno;
@@ -329,18 +310,8 @@ static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,
     s->fd = glfs_open(s->glfs, gconf->image, open_flags);
     if (!s->fd) {
         ret = -errno;
-        goto out;
     }
 
-    ret = qemu_pipe(s->fds);
-    if (ret < 0) {
-        ret = -errno;
-        goto out;
-    }
-    fcntl(s->fds[GLUSTER_FD_READ], F_SETFL, O_NONBLOCK);
-    qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ],
-        qemu_gluster_aio_event_reader, NULL, s);
-
 out:
     qemu_opts_del(opts);
     qemu_gluster_gconf_free(gconf);
@@ -356,12 +327,65 @@ out:
     return ret;
 }
 
+#ifdef CONFIG_GLUSTERFS_ZEROFILL
+static coroutine_fn int qemu_gluster_co_write_zeroes(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
+{
+    int ret;
+    GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
+    BDRVGlusterState *s = bs->opaque;
+    off_t size = nb_sectors * BDRV_SECTOR_SIZE;
+    off_t offset = sector_num * BDRV_SECTOR_SIZE;
+
+    acb->size = size;
+    acb->ret = 0;
+    acb->coroutine = qemu_coroutine_self();
+
+    ret = glfs_zerofill_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
+    if (ret < 0) {
+        ret = -errno;
+        goto out;
+    }
+
+    qemu_coroutine_yield();
+    ret = acb->ret;
+
+out:
+    g_slice_free(GlusterAIOCB, acb);
+    return ret;
+}
+
+static inline bool gluster_supports_zerofill(void)
+{
+    return 1;
+}
+
+static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
+        int64_t size)
+{
+    return glfs_zerofill(fd, offset, size);
+}
+
+#else
+static inline bool gluster_supports_zerofill(void)
+{
+    return 0;
+}
+
+static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
+        int64_t size)
+{
+    return 0;
+}
+#endif
+
 static int qemu_gluster_create(const char *filename,
         QEMUOptionParameter *options, Error **errp)
 {
     struct glfs *glfs;
     struct glfs_fd *fd;
     int ret = 0;
+    int prealloc = 0;
     int64_t total_size = 0;
     GlusterConf *gconf = g_malloc0(sizeof(GlusterConf));
 
@@ -374,6 +398,19 @@ static int qemu_gluster_create(const char *filename,
     while (options && options->name) {
         if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
             total_size = options->value.n / BDRV_SECTOR_SIZE;
+        } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) {
+            if (!options->value.s || !strcmp(options->value.s, "off")) {
+                prealloc = 0;
+            } else if (!strcmp(options->value.s, "full") &&
+                    gluster_supports_zerofill()) {
+                prealloc = 1;
+            } else {
+                error_setg(errp, "Invalid preallocation mode: '%s'"
+                    " or GlusterFS doesn't support zerofill API",
+                           options->value.s);
+                ret = -EINVAL;
+                goto out;
+            }
         }
         options++;
     }
@@ -383,9 +420,15 @@ static int qemu_gluster_create(const char *filename,
     if (!fd) {
         ret = -errno;
     } else {
-        if (glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
+        if (!glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE)) {
+            if (prealloc && qemu_gluster_zerofill(fd, 0,
+                    total_size * BDRV_SECTOR_SIZE)) {
+                ret = -errno;
+            }
+        } else {
             ret = -errno;
         }
+
         if (glfs_close(fd) != 0) {
             ret = -errno;
         }
@@ -398,58 +441,18 @@ out:
     return ret;
 }
 
-static void qemu_gluster_aio_cancel(BlockDriverAIOCB *blockacb)
-{
-    GlusterAIOCB *acb = (GlusterAIOCB *)blockacb;
-    bool finished = false;
-
-    acb->finished = &finished;
-    while (!finished) {
-        qemu_aio_wait();
-    }
-}
-
-static const AIOCBInfo gluster_aiocb_info = {
-    .aiocb_size = sizeof(GlusterAIOCB),
-    .cancel = qemu_gluster_aio_cancel,
-};
-
-static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
-{
-    GlusterAIOCB *acb = (GlusterAIOCB *)arg;
-    BlockDriverState *bs = acb->common.bs;
-    BDRVGlusterState *s = bs->opaque;
-    int retval;
-
-    acb->ret = ret;
-    retval = qemu_write_full(s->fds[GLUSTER_FD_WRITE], &acb, sizeof(acb));
-    if (retval != sizeof(acb)) {
-        /*
-         * Gluster AIO callback thread failed to notify the waiting
-         * QEMU thread about IO completion.
-         */
-        error_report("Gluster AIO completion failed: %s", strerror(errno));
-        abort();
-    }
-}
-
-static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque, int write)
+static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int write)
 {
     int ret;
-    GlusterAIOCB *acb;
+    GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
     BDRVGlusterState *s = bs->opaque;
-    size_t size;
-    off_t offset;
-
-    offset = sector_num * BDRV_SECTOR_SIZE;
-    size = nb_sectors * BDRV_SECTOR_SIZE;
+    size_t size = nb_sectors * BDRV_SECTOR_SIZE;
+    off_t offset = sector_num * BDRV_SECTOR_SIZE;
 
-    acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque);
     acb->size = size;
     acb->ret = 0;
-    acb->finished = NULL;
+    acb->coroutine = qemu_coroutine_self();
 
     if (write) {
         ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0,
@@ -460,13 +463,16 @@ static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs,
     }
 
     if (ret < 0) {
+        ret = -errno;
         goto out;
     }
-    return &acb->common;
+
+    qemu_coroutine_yield();
+    ret = acb->ret;
 
 out:
-    qemu_aio_release(acb);
-    return NULL;
+    g_slice_free(GlusterAIOCB, acb);
+    return ret;
 }
 
 static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset)
@@ -482,71 +488,68 @@ static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset)
     return 0;
 }
 
-static BlockDriverAIOCB *qemu_gluster_aio_readv(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
+static coroutine_fn int qemu_gluster_co_readv(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
 {
-    return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
+    return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 0);
 }
 
-static BlockDriverAIOCB *qemu_gluster_aio_writev(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
+static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
 {
-    return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
+    return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1);
 }
 
-static BlockDriverAIOCB *qemu_gluster_aio_flush(BlockDriverState *bs,
-        BlockDriverCompletionFunc *cb, void *opaque)
+static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
 {
     int ret;
-    GlusterAIOCB *acb;
+    GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
     BDRVGlusterState *s = bs->opaque;
 
-    acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque);
     acb->size = 0;
     acb->ret = 0;
-    acb->finished = NULL;
+    acb->coroutine = qemu_coroutine_self();
 
     ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb);
     if (ret < 0) {
+        ret = -errno;
         goto out;
     }
-    return &acb->common;
+
+    qemu_coroutine_yield();
+    ret = acb->ret;
 
 out:
-    qemu_aio_release(acb);
-    return NULL;
+    g_slice_free(GlusterAIOCB, acb);
+    return ret;
 }
 
 #ifdef CONFIG_GLUSTERFS_DISCARD
-static BlockDriverAIOCB *qemu_gluster_aio_discard(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, BlockDriverCompletionFunc *cb,
-        void *opaque)
+static coroutine_fn int qemu_gluster_co_discard(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors)
 {
     int ret;
-    GlusterAIOCB *acb;
+    GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
     BDRVGlusterState *s = bs->opaque;
-    size_t size;
-    off_t offset;
-
-    offset = sector_num * BDRV_SECTOR_SIZE;
-    size = nb_sectors * BDRV_SECTOR_SIZE;
+    size_t size = nb_sectors * BDRV_SECTOR_SIZE;
+    off_t offset = sector_num * BDRV_SECTOR_SIZE;
 
-    acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque);
     acb->size = 0;
     acb->ret = 0;
-    acb->finished = NULL;
+    acb->coroutine = qemu_coroutine_self();
 
     ret = glfs_discard_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
     if (ret < 0) {
+        ret = -errno;
         goto out;
     }
-    return &acb->common;
+
+    qemu_coroutine_yield();
+    ret = acb->ret;
 
 out:
-    qemu_aio_release(acb);
-    return NULL;
+    g_slice_free(GlusterAIOCB, acb);
+    return ret;
 }
 #endif
 
@@ -581,10 +584,6 @@ static void qemu_gluster_close(BlockDriverState *bs)
 {
     BDRVGlusterState *s = bs->opaque;
 
-    close(s->fds[GLUSTER_FD_READ]);
-    close(s->fds[GLUSTER_FD_WRITE]);
-    qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL);
-
     if (s->fd) {
         glfs_close(s->fd);
         s->fd = NULL;
@@ -604,6 +603,11 @@ static QEMUOptionParameter qemu_gluster_create_options[] = {
         .type = OPT_SIZE,
         .help = "Virtual disk size"
     },
+    {
+        .name = BLOCK_OPT_PREALLOC,
+        .type = OPT_STRING,
+        .help = "Preallocation mode (allowed values: off, full)"
+    },
     { NULL }
 };
 
@@ -618,12 +622,15 @@ static BlockDriver bdrv_gluster = {
     .bdrv_getlength               = qemu_gluster_getlength,
     .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
     .bdrv_truncate                = qemu_gluster_truncate,
-    .bdrv_aio_readv               = qemu_gluster_aio_readv,
-    .bdrv_aio_writev              = qemu_gluster_aio_writev,
-    .bdrv_aio_flush               = qemu_gluster_aio_flush,
+    .bdrv_co_readv                = qemu_gluster_co_readv,
+    .bdrv_co_writev               = qemu_gluster_co_writev,
+    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
     .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
 #ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_aio_discard             = qemu_gluster_aio_discard,
+    .bdrv_co_discard              = qemu_gluster_co_discard,
+#endif
+#ifdef CONFIG_GLUSTERFS_ZEROFILL
+    .bdrv_co_write_zeroes         = qemu_gluster_co_write_zeroes,
 #endif
     .create_options               = qemu_gluster_create_options,
 };
@@ -639,12 +646,15 @@ static BlockDriver bdrv_gluster_tcp = {
     .bdrv_getlength               = qemu_gluster_getlength,
     .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
     .bdrv_truncate                = qemu_gluster_truncate,
-    .bdrv_aio_readv               = qemu_gluster_aio_readv,
-    .bdrv_aio_writev              = qemu_gluster_aio_writev,
-    .bdrv_aio_flush               = qemu_gluster_aio_flush,
+    .bdrv_co_readv                = qemu_gluster_co_readv,
+    .bdrv_co_writev               = qemu_gluster_co_writev,
+    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
     .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
 #ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_aio_discard             = qemu_gluster_aio_discard,
+    .bdrv_co_discard              = qemu_gluster_co_discard,
+#endif
+#ifdef CONFIG_GLUSTERFS_ZEROFILL
+    .bdrv_co_write_zeroes         = qemu_gluster_co_write_zeroes,
 #endif
     .create_options               = qemu_gluster_create_options,
 };
@@ -660,12 +670,15 @@ static BlockDriver bdrv_gluster_unix = {
     .bdrv_getlength               = qemu_gluster_getlength,
     .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
     .bdrv_truncate                = qemu_gluster_truncate,
-    .bdrv_aio_readv               = qemu_gluster_aio_readv,
-    .bdrv_aio_writev              = qemu_gluster_aio_writev,
-    .bdrv_aio_flush               = qemu_gluster_aio_flush,
+    .bdrv_co_readv                = qemu_gluster_co_readv,
+    .bdrv_co_writev               = qemu_gluster_co_writev,
+    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
     .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
 #ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_aio_discard             = qemu_gluster_aio_discard,
+    .bdrv_co_discard              = qemu_gluster_co_discard,
+#endif
+#ifdef CONFIG_GLUSTERFS_ZEROFILL
+    .bdrv_co_write_zeroes         = qemu_gluster_co_write_zeroes,
 #endif
     .create_options               = qemu_gluster_create_options,
 };
@@ -681,12 +694,15 @@ static BlockDriver bdrv_gluster_rdma = {
     .bdrv_getlength               = qemu_gluster_getlength,
     .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
     .bdrv_truncate                = qemu_gluster_truncate,
-    .bdrv_aio_readv               = qemu_gluster_aio_readv,
-    .bdrv_aio_writev              = qemu_gluster_aio_writev,
-    .bdrv_aio_flush               = qemu_gluster_aio_flush,
+    .bdrv_co_readv                = qemu_gluster_co_readv,
+    .bdrv_co_writev               = qemu_gluster_co_writev,
+    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
     .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
 #ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_aio_discard             = qemu_gluster_aio_discard,
+    .bdrv_co_discard              = qemu_gluster_co_discard,
+#endif
+#ifdef CONFIG_GLUSTERFS_ZEROFILL
+    .bdrv_co_write_zeroes         = qemu_gluster_co_write_zeroes,
 #endif
     .create_options               = qemu_gluster_create_options,
 };
diff --git a/block/iscsi.c b/block/iscsi.c
index c0ea0c4543..6f4af72a75 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -308,7 +308,7 @@ retry:
                                     iscsi_co_generic_cb, &iTask);
     if (iTask.task == NULL) {
         g_free(buf);
-        return -EIO;
+        return -ENOMEM;
     }
 #if defined(LIBISCSI_FEATURE_IOVECTOR)
     scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
@@ -376,7 +376,7 @@ retry:
         break;
     }
     if (iTask.task == NULL) {
-        return -EIO;
+        return -ENOMEM;
     }
 #if defined(LIBISCSI_FEATURE_IOVECTOR)
     scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
@@ -419,7 +419,7 @@ static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
 retry:
     if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
                                       0, iscsi_co_generic_cb, &iTask) == NULL) {
-        return -EIO;
+        return -ENOMEM;
     }
 
     while (!iTask.complete) {
@@ -669,7 +669,7 @@ retry:
                                   sector_qemu2lun(sector_num, iscsilun),
                                   8 + 16, iscsi_co_generic_cb,
                                   &iTask) == NULL) {
-        ret = -EIO;
+        ret = -ENOMEM;
         goto out;
     }
 
@@ -753,7 +753,7 @@ coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num,
 retry:
     if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
                      iscsi_co_generic_cb, &iTask) == NULL) {
-        return -EIO;
+        return -ENOMEM;
     }
 
     while (!iTask.complete) {
@@ -822,7 +822,7 @@ retry:
                                iscsilun->zeroblock, iscsilun->block_size,
                                nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
                                0, 0, iscsi_co_generic_cb, &iTask) == NULL) {
-        return -EIO;
+        return -ENOMEM;
     }
 
     while (!iTask.complete) {
@@ -1217,6 +1217,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
         goto out;
     }
     bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
+    bs->request_alignment = iscsilun->block_size;
 
     /* Medium changer or tape. We dont have any emulation for this so this must
      * be sg ioctl compatible. We force it to be sg, otherwise qemu will try
@@ -1265,23 +1266,6 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
                sizeof(struct scsi_inquiry_block_limits));
         scsi_free_scsi_task(task);
         task = NULL;
-
-        if (iscsilun->bl.max_unmap < 0xffffffff) {
-            bs->bl.max_discard = sector_lun2qemu(iscsilun->bl.max_unmap,
-                                                 iscsilun);
-        }
-        bs->bl.discard_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran,
-                                                   iscsilun);
-
-        if (iscsilun->bl.max_ws_len < 0xffffffff) {
-            bs->bl.max_write_zeroes = sector_lun2qemu(iscsilun->bl.max_ws_len,
-                                                      iscsilun);
-        }
-        bs->bl.write_zeroes_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran,
-                                                        iscsilun);
-
-        bs->bl.opt_transfer_length = sector_lun2qemu(iscsilun->bl.opt_xfer_len,
-                                                     iscsilun);
     }
 
 #if defined(LIBISCSI_FEATURE_NOP_COUNTER)
@@ -1326,6 +1310,41 @@ static void iscsi_close(BlockDriverState *bs)
     memset(iscsilun, 0, sizeof(IscsiLun));
 }
 
+static int iscsi_refresh_limits(BlockDriverState *bs)
+{
+    IscsiLun *iscsilun = bs->opaque;
+
+    /* We don't actually refresh here, but just return data queried in
+     * iscsi_open(): iscsi targets don't change their limits. */
+    if (iscsilun->lbp.lbpu || iscsilun->lbp.lbpws) {
+        if (iscsilun->bl.max_unmap < 0xffffffff) {
+            bs->bl.max_discard = sector_lun2qemu(iscsilun->bl.max_unmap,
+                                                 iscsilun);
+        }
+        bs->bl.discard_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran,
+                                                   iscsilun);
+
+        if (iscsilun->bl.max_ws_len < 0xffffffff) {
+            bs->bl.max_write_zeroes = sector_lun2qemu(iscsilun->bl.max_ws_len,
+                                                      iscsilun);
+        }
+        bs->bl.write_zeroes_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran,
+                                                        iscsilun);
+
+        bs->bl.opt_transfer_length = sector_lun2qemu(iscsilun->bl.opt_xfer_len,
+                                                     iscsilun);
+    }
+    return 0;
+}
+
+/* We have nothing to do for iSCSI reopen, stub just returns
+ * success */
+static int iscsi_reopen_prepare(BDRVReopenState *state,
+                                BlockReopenQueue *queue, Error **errp)
+{
+    return 0;
+}
+
 static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
 {
     IscsiLun *iscsilun = bs->opaque;
@@ -1434,10 +1453,12 @@ static BlockDriver bdrv_iscsi = {
     .bdrv_close      = iscsi_close,
     .bdrv_create     = iscsi_create,
     .create_options  = iscsi_create_options,
+    .bdrv_reopen_prepare  = iscsi_reopen_prepare,
 
     .bdrv_getlength  = iscsi_getlength,
     .bdrv_get_info   = iscsi_get_info,
     .bdrv_truncate   = iscsi_truncate,
+    .bdrv_refresh_limits = iscsi_refresh_limits,
 
 #if defined(LIBISCSI_FEATURE_IOVECTOR)
     .bdrv_co_get_block_status = iscsi_co_get_block_status,
diff --git a/block/mirror.c b/block/mirror.c
index 2932bab27a..2a4333474e 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -96,6 +96,7 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
         bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
     }
 
+    qemu_iovec_destroy(&op->qiov);
     g_slice_free(MirrorOp, op);
     qemu_coroutine_enter(s->common.co, NULL);
 }
@@ -630,11 +631,49 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
                          BlockDriverCompletionFunc *cb,
                          void *opaque, Error **errp)
 {
+    int64_t length, base_length;
+    int orig_base_flags;
+
+    orig_base_flags = bdrv_get_flags(base);
+
     if (bdrv_reopen(base, bs->open_flags, errp)) {
         return;
     }
+
+    length = bdrv_getlength(bs);
+    if (length < 0) {
+        error_setg(errp, "Unable to determine length of %s", bs->filename);
+        goto error_restore_flags;
+    }
+
+    base_length = bdrv_getlength(base);
+    if (base_length < 0) {
+        error_setg(errp, "Unable to determine length of %s", base->filename);
+        goto error_restore_flags;
+    }
+
+    if (length > base_length) {
+        if (bdrv_truncate(base, length) < 0) {
+            error_setg(errp, "Top image %s is larger than base image %s, and "
+                             "resize of base image failed",
+                             bs->filename, base->filename);
+            goto error_restore_flags;
+        }
+    }
+
     bdrv_ref(base);
     mirror_start_job(bs, base, speed, 0, 0,
                      on_error, on_error, cb, opaque, errp,
                      &commit_active_job_driver, false, base);
+    if (error_is_set(errp)) {
+        goto error_restore_flags;
+    }
+
+    return;
+
+error_restore_flags:
+    /* ignore error and errp for bdrv_reopen, because we want to propagate
+     * the original error */
+    bdrv_reopen(base, orig_base_flags, NULL);
+    return;
 }
diff --git a/block/qapi.c b/block/qapi.c
index a32cb79db8..8f4134b40a 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -29,6 +29,60 @@
 #include "qapi/qmp-output-visitor.h"
 #include "qapi/qmp/types.h"
 
+BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
+{
+    BlockDeviceInfo *info = g_malloc0(sizeof(*info));
+
+    info->file                   = g_strdup(bs->filename);
+    info->ro                     = bs->read_only;
+    info->drv                    = g_strdup(bs->drv->format_name);
+    info->encrypted              = bs->encrypted;
+    info->encryption_key_missing = bdrv_key_required(bs);
+
+    if (bs->node_name[0]) {
+        info->has_node_name = true;
+        info->node_name = g_strdup(bs->node_name);
+    }
+
+    if (bs->backing_file[0]) {
+        info->has_backing_file = true;
+        info->backing_file = g_strdup(bs->backing_file);
+    }
+
+    info->backing_file_depth = bdrv_get_backing_file_depth(bs);
+
+    if (bs->io_limits_enabled) {
+        ThrottleConfig cfg;
+        throttle_get_config(&bs->throttle_state, &cfg);
+        info->bps     = cfg.buckets[THROTTLE_BPS_TOTAL].avg;
+        info->bps_rd  = cfg.buckets[THROTTLE_BPS_READ].avg;
+        info->bps_wr  = cfg.buckets[THROTTLE_BPS_WRITE].avg;
+
+        info->iops    = cfg.buckets[THROTTLE_OPS_TOTAL].avg;
+        info->iops_rd = cfg.buckets[THROTTLE_OPS_READ].avg;
+        info->iops_wr = cfg.buckets[THROTTLE_OPS_WRITE].avg;
+
+        info->has_bps_max     = cfg.buckets[THROTTLE_BPS_TOTAL].max;
+        info->bps_max         = cfg.buckets[THROTTLE_BPS_TOTAL].max;
+        info->has_bps_rd_max  = cfg.buckets[THROTTLE_BPS_READ].max;
+        info->bps_rd_max      = cfg.buckets[THROTTLE_BPS_READ].max;
+        info->has_bps_wr_max  = cfg.buckets[THROTTLE_BPS_WRITE].max;
+        info->bps_wr_max      = cfg.buckets[THROTTLE_BPS_WRITE].max;
+
+        info->has_iops_max    = cfg.buckets[THROTTLE_OPS_TOTAL].max;
+        info->iops_max        = cfg.buckets[THROTTLE_OPS_TOTAL].max;
+        info->has_iops_rd_max = cfg.buckets[THROTTLE_OPS_READ].max;
+        info->iops_rd_max     = cfg.buckets[THROTTLE_OPS_READ].max;
+        info->has_iops_wr_max = cfg.buckets[THROTTLE_OPS_WRITE].max;
+        info->iops_wr_max     = cfg.buckets[THROTTLE_OPS_WRITE].max;
+
+        info->has_iops_size = cfg.op_size;
+        info->iops_size = cfg.op_size;
+    }
+
+    return info;
+}
+
 /*
  * Returns 0 on success, with *p_list either set to describe snapshot
  * information, or NULL because there are no snapshots.  Returns -errno on
@@ -211,60 +265,7 @@ void bdrv_query_info(BlockDriverState *bs,
 
     if (bs->drv) {
         info->has_inserted = true;
-        info->inserted = g_malloc0(sizeof(*info->inserted));
-        info->inserted->file = g_strdup(bs->filename);
-        info->inserted->ro = bs->read_only;
-        info->inserted->drv = g_strdup(bs->drv->format_name);
-        info->inserted->encrypted = bs->encrypted;
-        info->inserted->encryption_key_missing = bdrv_key_required(bs);
-
-        if (bs->backing_file[0]) {
-            info->inserted->has_backing_file = true;
-            info->inserted->backing_file = g_strdup(bs->backing_file);
-        }
-
-        info->inserted->backing_file_depth = bdrv_get_backing_file_depth(bs);
-
-        if (bs->io_limits_enabled) {
-            ThrottleConfig cfg;
-            throttle_get_config(&bs->throttle_state, &cfg);
-            info->inserted->bps     = cfg.buckets[THROTTLE_BPS_TOTAL].avg;
-            info->inserted->bps_rd  = cfg.buckets[THROTTLE_BPS_READ].avg;
-            info->inserted->bps_wr  = cfg.buckets[THROTTLE_BPS_WRITE].avg;
-
-            info->inserted->iops    = cfg.buckets[THROTTLE_OPS_TOTAL].avg;
-            info->inserted->iops_rd = cfg.buckets[THROTTLE_OPS_READ].avg;
-            info->inserted->iops_wr = cfg.buckets[THROTTLE_OPS_WRITE].avg;
-
-            info->inserted->has_bps_max     =
-                cfg.buckets[THROTTLE_BPS_TOTAL].max;
-            info->inserted->bps_max         =
-                cfg.buckets[THROTTLE_BPS_TOTAL].max;
-            info->inserted->has_bps_rd_max  =
-                cfg.buckets[THROTTLE_BPS_READ].max;
-            info->inserted->bps_rd_max      =
-                cfg.buckets[THROTTLE_BPS_READ].max;
-            info->inserted->has_bps_wr_max  =
-                cfg.buckets[THROTTLE_BPS_WRITE].max;
-            info->inserted->bps_wr_max      =
-                cfg.buckets[THROTTLE_BPS_WRITE].max;
-
-            info->inserted->has_iops_max    =
-                cfg.buckets[THROTTLE_OPS_TOTAL].max;
-            info->inserted->iops_max        =
-                cfg.buckets[THROTTLE_OPS_TOTAL].max;
-            info->inserted->has_iops_rd_max =
-                cfg.buckets[THROTTLE_OPS_READ].max;
-            info->inserted->iops_rd_max     =
-                cfg.buckets[THROTTLE_OPS_READ].max;
-            info->inserted->has_iops_wr_max =
-                cfg.buckets[THROTTLE_OPS_WRITE].max;
-            info->inserted->iops_wr_max     =
-                cfg.buckets[THROTTLE_OPS_WRITE].max;
-
-            info->inserted->has_iops_size = cfg.op_size;
-            info->inserted->iops_size = cfg.op_size;
-        }
+        info->inserted = bdrv_block_device_info(bs);
 
         bs0 = bs;
         p_image_info = &info->inserted->image;
@@ -318,6 +319,11 @@ BlockStats *bdrv_query_stats(const BlockDriverState *bs)
         s->parent = bdrv_query_stats(bs->file);
     }
 
+    if (bs->backing_hd) {
+        s->has_backing = true;
+        s->backing = bdrv_query_stats(bs->backing_hd);
+    }
+
     return s;
 }
 
diff --git a/block/qcow.c b/block/qcow.c
index c470e05f60..948b0c5601 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -691,7 +691,8 @@ static int qcow_create(const char *filename, QEMUOptionParameter *options,
         return ret;
     }
 
-    ret = bdrv_file_open(&qcow_bs, filename, NULL, BDRV_O_RDWR, &local_err);
+    ret = bdrv_file_open(&qcow_bs, filename, NULL, NULL, BDRV_O_RDWR,
+                         &local_err);
     if (ret < 0) {
         qerror_report_err(local_err);
         error_free(local_err);
diff --git a/block/qcow2.c b/block/qcow2.c
index 8ec9db10f8..2da62b8a90 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -718,7 +718,6 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     qemu_opts_del(opts);
-    bs->bl.write_zeroes_alignment = s->cluster_sectors;
 
     if (s->use_lazy_refcounts && s->qcow_version < 3) {
         error_setg(errp, "Lazy refcounts require a qcow2 image with at least "
@@ -751,6 +750,15 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
     return ret;
 }
 
+static int qcow2_refresh_limits(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+
+    bs->bl.write_zeroes_alignment = s->cluster_sectors;
+
+    return 0;
+}
+
 static int qcow2_set_key(BlockDriverState *bs, const char *key)
 {
     BDRVQcowState *s = bs->opaque;
@@ -1483,7 +1491,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
         return ret;
     }
 
-    ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR, &local_err);
+    ret = bdrv_file_open(&bs, filename, NULL, NULL, BDRV_O_RDWR, &local_err);
     if (ret < 0) {
         error_propagate(errp, local_err);
         return ret;
@@ -2268,6 +2276,7 @@ static BlockDriver bdrv_qcow2 = {
 
     .bdrv_change_backing_file   = qcow2_change_backing_file,
 
+    .bdrv_refresh_limits        = qcow2_refresh_limits,
     .bdrv_invalidate_cache      = qcow2_invalidate_cache,
 
     .create_options = qcow2_create_options,
diff --git a/block/qcow2.h b/block/qcow2.h
index 303eb26629..b5b7d13630 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -340,11 +340,11 @@ typedef enum QCow2MetadataOverlap {
 #define QCOW2_OL_ALL \
     (QCOW2_OL_CACHED | QCOW2_OL_INACTIVE_L2)
 
-#define L1E_OFFSET_MASK 0x00ffffffffffff00ULL
-#define L2E_OFFSET_MASK 0x00ffffffffffff00ULL
+#define L1E_OFFSET_MASK 0x00fffffffffffe00ULL
+#define L2E_OFFSET_MASK 0x00fffffffffffe00ULL
 #define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL
 
-#define REFT_OFFSET_MASK 0xffffffffffffff00ULL
+#define REFT_OFFSET_MASK 0xfffffffffffffe00ULL
 
 static inline int64_t start_of_cluster(BDRVQcowState *s, int64_t offset)
 {
diff --git a/block/qed.c b/block/qed.c
index 450a1fa2e9..694e6e2ee0 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -495,7 +495,6 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
         }
     }
 
-    bs->bl.write_zeroes_alignment = s->header.cluster_size >> BDRV_SECTOR_BITS;
     s->need_check_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
                                             qed_need_check_timer_cb, s);
 
@@ -507,6 +506,15 @@ out:
     return ret;
 }
 
+static int bdrv_qed_refresh_limits(BlockDriverState *bs)
+{
+    BDRVQEDState *s = bs->opaque;
+
+    bs->bl.write_zeroes_alignment = s->header.cluster_size >> BDRV_SECTOR_BITS;
+
+    return 0;
+}
+
 /* We have nothing to do for QED reopen, stubs just return
  * success */
 static int bdrv_qed_reopen_prepare(BDRVReopenState *state,
@@ -563,8 +571,8 @@ static int qed_create(const char *filename, uint32_t cluster_size,
         return ret;
     }
 
-    ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR | BDRV_O_CACHE_WB,
-                         &local_err);
+    ret = bdrv_file_open(&bs, filename, NULL, NULL,
+                         BDRV_O_RDWR | BDRV_O_CACHE_WB, &local_err);
     if (ret < 0) {
         qerror_report_err(local_err);
         error_free(local_err);
@@ -1616,6 +1624,7 @@ static BlockDriver bdrv_qed = {
     .bdrv_truncate            = bdrv_qed_truncate,
     .bdrv_getlength           = bdrv_qed_getlength,
     .bdrv_get_info            = bdrv_qed_get_info,
+    .bdrv_refresh_limits      = bdrv_qed_refresh_limits,
     .bdrv_change_backing_file = bdrv_qed_change_backing_file,
     .bdrv_invalidate_cache    = bdrv_qed_invalidate_cache,
     .bdrv_check               = bdrv_qed_check,
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 0676037e13..126a634e45 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -127,6 +127,8 @@ typedef struct BDRVRawState {
     int fd;
     int type;
     int open_flags;
+    size_t buf_align;
+
 #if defined(__linux__)
     /* linux floppy specific */
     int64_t fd_open_time;
@@ -213,6 +215,76 @@ static int raw_normalize_devicepath(const char **filename)
 }
 #endif
 
+static void raw_probe_alignment(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+    char *buf;
+    unsigned int sector_size;
+
+    /* For /dev/sg devices the alignment is not really used.
+       With buffered I/O, we don't have any restrictions. */
+    if (bs->sg || !(s->open_flags & O_DIRECT)) {
+        bs->request_alignment = 1;
+        s->buf_align = 1;
+        return;
+    }
+
+    /* Try a few ioctls to get the right size */
+    bs->request_alignment = 0;
+    s->buf_align = 0;
+
+#ifdef BLKSSZGET
+    if (ioctl(s->fd, BLKSSZGET, &sector_size) >= 0) {
+        bs->request_alignment = sector_size;
+    }
+#endif
+#ifdef DKIOCGETBLOCKSIZE
+    if (ioctl(s->fd, DKIOCGETBLOCKSIZE, &sector_size) >= 0) {
+        bs->request_alignment = sector_size;
+    }
+#endif
+#ifdef DIOCGSECTORSIZE
+    if (ioctl(s->fd, DIOCGSECTORSIZE, &sector_size) >= 0) {
+        bs->request_alignment = sector_size;
+    }
+#endif
+#ifdef CONFIG_XFS
+    if (s->is_xfs) {
+        struct dioattr da;
+        if (xfsctl(NULL, s->fd, XFS_IOC_DIOINFO, &da) >= 0) {
+            bs->request_alignment = da.d_miniosz;
+            /* The kernel returns wrong information for d_mem */
+            /* s->buf_align = da.d_mem; */
+        }
+    }
+#endif
+
+    /* If we could not get the sizes so far, we can only guess them */
+    if (!s->buf_align) {
+        size_t align;
+        buf = qemu_memalign(MAX_BLOCKSIZE, 2 * MAX_BLOCKSIZE);
+        for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
+            if (pread(s->fd, buf + align, MAX_BLOCKSIZE, 0) >= 0) {
+                s->buf_align = align;
+                break;
+            }
+        }
+        qemu_vfree(buf);
+    }
+
+    if (!bs->request_alignment) {
+        size_t align;
+        buf = qemu_memalign(s->buf_align, MAX_BLOCKSIZE);
+        for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
+            if (pread(s->fd, buf, align, 0) >= 0) {
+                bs->request_alignment = align;
+                break;
+            }
+        }
+        qemu_vfree(buf);
+    }
+}
+
 static void raw_parse_flags(int bdrv_flags, int *open_flags)
 {
     assert(open_flags != NULL);
@@ -463,7 +535,6 @@ static int raw_reopen_prepare(BDRVReopenState *state,
     return ret;
 }
 
-
 static void raw_reopen_commit(BDRVReopenState *state)
 {
     BDRVRawReopenState *raw_s = state->opaque;
@@ -499,23 +570,15 @@ static void raw_reopen_abort(BDRVReopenState *state)
     state->opaque = NULL;
 }
 
+static int raw_refresh_limits(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
 
-/* XXX: use host sector size if necessary with:
-#ifdef DIOCGSECTORSIZE
-        {
-            unsigned int sectorsize = 512;
-            if (!ioctl(fd, DIOCGSECTORSIZE, &sectorsize) &&
-                sectorsize > bufsize)
-                bufsize = sectorsize;
-        }
-#endif
-#ifdef CONFIG_COCOA
-        uint32_t blockSize = 512;
-        if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) {
-            bufsize = blockSize;
-        }
-#endif
-*/
+    raw_probe_alignment(bs);
+    bs->bl.opt_mem_alignment = s->buf_align;
+
+    return 0;
+}
 
 static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
 {
@@ -1363,6 +1426,7 @@ static BlockDriver bdrv_file = {
     .bdrv_aio_writev = raw_aio_writev,
     .bdrv_aio_flush = raw_aio_flush,
     .bdrv_aio_discard = raw_aio_discard,
+    .bdrv_refresh_limits = raw_refresh_limits,
 
     .bdrv_truncate = raw_truncate,
     .bdrv_getlength = raw_getlength,
@@ -1740,6 +1804,7 @@ static BlockDriver bdrv_host_device = {
     .bdrv_aio_writev	= raw_aio_writev,
     .bdrv_aio_flush	= raw_aio_flush,
     .bdrv_aio_discard   = hdev_aio_discard,
+    .bdrv_refresh_limits = raw_refresh_limits,
 
     .bdrv_truncate      = raw_truncate,
     .bdrv_getlength	= raw_getlength,
@@ -1871,6 +1936,7 @@ static BlockDriver bdrv_host_floppy = {
     .bdrv_aio_readv     = raw_aio_readv,
     .bdrv_aio_writev    = raw_aio_writev,
     .bdrv_aio_flush	= raw_aio_flush,
+    .bdrv_refresh_limits = raw_refresh_limits,
 
     .bdrv_truncate      = raw_truncate,
     .bdrv_getlength      = raw_getlength,
@@ -1981,6 +2047,7 @@ static BlockDriver bdrv_host_cdrom = {
     .bdrv_aio_readv     = raw_aio_readv,
     .bdrv_aio_writev    = raw_aio_writev,
     .bdrv_aio_flush	= raw_aio_flush,
+    .bdrv_refresh_limits = raw_refresh_limits,
 
     .bdrv_truncate      = raw_truncate,
     .bdrv_getlength      = raw_getlength,
@@ -2110,6 +2177,7 @@ static BlockDriver bdrv_host_cdrom = {
     .bdrv_aio_readv     = raw_aio_readv,
     .bdrv_aio_writev    = raw_aio_writev,
     .bdrv_aio_flush	= raw_aio_flush,
+    .bdrv_refresh_limits = raw_refresh_limits,
 
     .bdrv_truncate      = raw_truncate,
     .bdrv_getlength      = raw_getlength,
diff --git a/block/raw-win32.c b/block/raw-win32.c
index ce314fd54f..beb7f2395e 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -202,6 +202,35 @@ static int set_sparse(int fd)
 				 NULL, 0, NULL, 0, &returned, NULL);
 }
 
+static void raw_probe_alignment(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+    DWORD sectorsPerCluster, freeClusters, totalClusters, count;
+    DISK_GEOMETRY_EX dg;
+    BOOL status;
+
+    if (s->type == FTYPE_CD) {
+        bs->request_alignment = 2048;
+        return;
+    }
+    if (s->type == FTYPE_HARDDISK) {
+        status = DeviceIoControl(s->hfile, IOCTL_DISK_GET_DRIVE_GEOMETRY_EX,
+                                 NULL, 0, &dg, sizeof(dg), &count, NULL);
+        if (status != 0) {
+            bs->request_alignment = dg.Geometry.BytesPerSector;
+            return;
+        }
+        /* try GetDiskFreeSpace too */
+    }
+
+    if (s->drive_path[0]) {
+        GetDiskFreeSpace(s->drive_path, &sectorsPerCluster,
+                         &dg.Geometry.BytesPerSector,
+                         &freeClusters, &totalClusters);
+        bs->request_alignment = dg.Geometry.BytesPerSector;
+    }
+}
+
 static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped)
 {
     assert(access_flags != NULL);
@@ -269,6 +298,17 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
         }
     }
 
+    if (filename[0] && filename[1] == ':') {
+        snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", filename[0]);
+    } else if (filename[0] == '\\' && filename[1] == '\\') {
+        s->drive_path[0] = 0;
+    } else {
+        /* Relative path.  */
+        char buf[MAX_PATH];
+        GetCurrentDirectory(MAX_PATH, buf);
+        snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", buf[0]);
+    }
+
     s->hfile = CreateFile(filename, access_flags,
                           FILE_SHARE_READ, NULL,
                           OPEN_EXISTING, overlapped, NULL);
@@ -293,6 +333,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
         s->aio = aio;
     }
 
+    raw_probe_alignment(bs);
     ret = 0;
 fail:
     qemu_opts_del(opts);
diff --git a/block/rbd.c b/block/rbd.c
index f453f04757..121fae221e 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -95,18 +95,13 @@ typedef struct RADOSCB {
 #define RBD_FD_WRITE 1
 
 typedef struct BDRVRBDState {
-    int fds[2];
     rados_t cluster;
     rados_ioctx_t io_ctx;
     rbd_image_t image;
     char name[RBD_MAX_IMAGE_NAME_SIZE];
     char *snap;
-    int event_reader_pos;
-    RADOSCB *event_rcb;
 } BDRVRBDState;
 
-static void rbd_aio_bh_cb(void *opaque);
-
 static int qemu_rbd_next_tok(char *dst, int dst_len,
                              char *src, char delim,
                              const char *name,
@@ -369,9 +364,8 @@ static int qemu_rbd_create(const char *filename, QEMUOptionParameter *options,
 }
 
 /*
- * This aio completion is being called from qemu_rbd_aio_event_reader()
- * and runs in qemu context. It schedules a bh, but just in case the aio
- * was not cancelled before.
+ * This aio completion is being called from rbd_finish_bh() and runs in qemu
+ * BH context.
  */
 static void qemu_rbd_complete_aio(RADOSCB *rcb)
 {
@@ -401,36 +395,19 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
             acb->ret = r;
         }
     }
-    /* Note that acb->bh can be NULL in case where the aio was cancelled */
-    acb->bh = qemu_bh_new(rbd_aio_bh_cb, acb);
-    qemu_bh_schedule(acb->bh);
-    g_free(rcb);
-}
 
-/*
- * aio fd read handler. It runs in the qemu context and calls the
- * completion handling of completed rados aio operations.
- */
-static void qemu_rbd_aio_event_reader(void *opaque)
-{
-    BDRVRBDState *s = opaque;
+    g_free(rcb);
 
-    ssize_t ret;
+    if (acb->cmd == RBD_AIO_READ) {
+        qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
+    }
+    qemu_vfree(acb->bounce);
+    acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
+    acb->status = 0;
 
-    do {
-        char *p = (char *)&s->event_rcb;
-
-        /* now read the rcb pointer that was sent from a non qemu thread */
-        ret = read(s->fds[RBD_FD_READ], p + s->event_reader_pos,
-                   sizeof(s->event_rcb) - s->event_reader_pos);
-        if (ret > 0) {
-            s->event_reader_pos += ret;
-            if (s->event_reader_pos == sizeof(s->event_rcb)) {
-                s->event_reader_pos = 0;
-                qemu_rbd_complete_aio(s->event_rcb);
-            }
-        }
-    } while (ret < 0 && errno == EINTR);
+    if (!acb->cancelled) {
+        qemu_aio_release(acb);
+    }
 }
 
 /* TODO Convert to fine grained options */
@@ -538,23 +515,9 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
 
     bs->read_only = (s->snap != NULL);
 
-    s->event_reader_pos = 0;
-    r = qemu_pipe(s->fds);
-    if (r < 0) {
-        error_report("error opening eventfd");
-        goto failed;
-    }
-    fcntl(s->fds[0], F_SETFL, O_NONBLOCK);
-    fcntl(s->fds[1], F_SETFL, O_NONBLOCK);
-    qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], qemu_rbd_aio_event_reader,
-                            NULL, s);
-
-
     qemu_opts_del(opts);
     return 0;
 
-failed:
-    rbd_close(s->image);
 failed_open:
     rados_ioctx_destroy(s->io_ctx);
 failed_shutdown:
@@ -569,10 +532,6 @@ static void qemu_rbd_close(BlockDriverState *bs)
 {
     BDRVRBDState *s = bs->opaque;
 
-    close(s->fds[0]);
-    close(s->fds[1]);
-    qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], NULL, NULL, NULL);
-
     rbd_close(s->image);
     rados_ioctx_destroy(s->io_ctx);
     g_free(s->snap);
@@ -600,34 +559,11 @@ static const AIOCBInfo rbd_aiocb_info = {
     .cancel = qemu_rbd_aio_cancel,
 };
 
-static int qemu_rbd_send_pipe(BDRVRBDState *s, RADOSCB *rcb)
+static void rbd_finish_bh(void *opaque)
 {
-    int ret = 0;
-    while (1) {
-        fd_set wfd;
-        int fd = s->fds[RBD_FD_WRITE];
-
-        /* send the op pointer to the qemu thread that is responsible
-           for the aio/op completion. Must do it in a qemu thread context */
-        ret = write(fd, (void *)&rcb, sizeof(rcb));
-        if (ret >= 0) {
-            break;
-        }
-        if (errno == EINTR) {
-            continue;
-        }
-        if (errno != EAGAIN) {
-            break;
-        }
-
-        FD_ZERO(&wfd);
-        FD_SET(fd, &wfd);
-        do {
-            ret = select(fd + 1, NULL, &wfd, NULL, NULL);
-        } while (ret < 0 && errno == EINTR);
-    }
-
-    return ret;
+    RADOSCB *rcb = opaque;
+    qemu_bh_delete(rcb->acb->bh);
+    qemu_rbd_complete_aio(rcb);
 }
 
 /*
@@ -635,40 +571,18 @@ static int qemu_rbd_send_pipe(BDRVRBDState *s, RADOSCB *rcb)
  *
  * Note: this function is being called from a non qemu thread so
  * we need to be careful about what we do here. Generally we only
- * write to the block notification pipe, and do the rest of the
- * io completion handling from qemu_rbd_aio_event_reader() which
- * runs in a qemu context.
+ * schedule a BH, and do the rest of the io completion handling
+ * from rbd_finish_bh() which runs in a qemu context.
  */
 static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb)
 {
-    int ret;
+    RBDAIOCB *acb = rcb->acb;
+
     rcb->ret = rbd_aio_get_return_value(c);
     rbd_aio_release(c);
-    ret = qemu_rbd_send_pipe(rcb->s, rcb);
-    if (ret < 0) {
-        error_report("failed writing to acb->s->fds");
-        g_free(rcb);
-    }
-}
-
-/* Callback when all queued rbd_aio requests are complete */
 
-static void rbd_aio_bh_cb(void *opaque)
-{
-    RBDAIOCB *acb = opaque;
-
-    if (acb->cmd == RBD_AIO_READ) {
-        qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
-    }
-    qemu_vfree(acb->bounce);
-    acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
-    qemu_bh_delete(acb->bh);
-    acb->bh = NULL;
-    acb->status = 0;
-
-    if (!acb->cancelled) {
-        qemu_aio_release(acb);
-    }
+    acb->bh = qemu_bh_new(rbd_finish_bh, rcb);
+    qemu_bh_schedule(acb->bh);
 }
 
 static int rbd_aio_discard_wrapper(rbd_image_t image,
diff --git a/block/sheepdog.c b/block/sheepdog.c
index b94ab6e10a..672b9c97a2 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -161,7 +161,7 @@ typedef struct SheepdogVdiReq {
     uint32_t id;
     uint32_t data_length;
     uint64_t vdi_size;
-    uint32_t vdi_id;
+    uint32_t base_vdi_id;
     uint8_t copies;
     uint8_t copy_policy;
     uint8_t reserved[2];
@@ -1493,7 +1493,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot)
 
     memset(&hdr, 0, sizeof(hdr));
     hdr.opcode = SD_OP_NEW_VDI;
-    hdr.vdi_id = s->inode.vdi_id;
+    hdr.base_vdi_id = s->inode.vdi_id;
 
     wlen = SD_MAX_VDI_LEN;
 
@@ -1534,7 +1534,7 @@ static int sd_prealloc(const char *filename)
     Error *local_err = NULL;
     int ret;
 
-    ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR, &local_err);
+    ret = bdrv_file_open(&bs, filename, NULL, NULL, BDRV_O_RDWR, &local_err);
     if (ret < 0) {
         qerror_report_err(local_err);
         error_free(local_err);
@@ -1684,7 +1684,7 @@ static int sd_create(const char *filename, QEMUOptionParameter *options,
 
     if (backing_file) {
         BlockDriverState *bs;
-        BDRVSheepdogState *s;
+        BDRVSheepdogState *base;
         BlockDriver *drv;
 
         /* Currently, only Sheepdog backing image is supported. */
@@ -1695,22 +1695,22 @@ static int sd_create(const char *filename, QEMUOptionParameter *options,
             goto out;
         }
 
-        ret = bdrv_file_open(&bs, backing_file, NULL, 0, &local_err);
+        ret = bdrv_file_open(&bs, backing_file, NULL, NULL, 0, &local_err);
         if (ret < 0) {
             qerror_report_err(local_err);
             error_free(local_err);
             goto out;
         }
 
-        s = bs->opaque;
+        base = bs->opaque;
 
-        if (!is_snapshot(&s->inode)) {
+        if (!is_snapshot(&base->inode)) {
             error_report("cannot clone from a non snapshot vdi");
             bdrv_unref(bs);
             ret = -EINVAL;
             goto out;
         }
-
+        s->inode.vdi_id = base->inode.vdi_id;
         bdrv_unref(bs);
     }
 
@@ -1743,7 +1743,7 @@ static void sd_close(BlockDriverState *bs)
     memset(&hdr, 0, sizeof(hdr));
 
     hdr.opcode = SD_OP_RELEASE_VDI;
-    hdr.vdi_id = s->inode.vdi_id;
+    hdr.base_vdi_id = s->inode.vdi_id;
     wlen = strlen(s->name) + 1;
     hdr.data_length = wlen;
     hdr.flags = SD_FLAG_CMD_WRITE;
@@ -1846,7 +1846,7 @@ static bool sd_delete(BDRVSheepdogState *s)
     unsigned int wlen = SD_MAX_VDI_LEN, rlen = 0;
     SheepdogVdiReq hdr = {
         .opcode = SD_OP_DEL_VDI,
-        .vdi_id = s->inode.vdi_id,
+        .base_vdi_id = s->inode.vdi_id,
         .data_length = wlen,
         .flags = SD_FLAG_CMD_WRITE,
     };
@@ -2442,11 +2442,12 @@ sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
 {
     BDRVSheepdogState *s = bs->opaque;
     SheepdogInode *inode = &s->inode;
-    unsigned long start = sector_num * BDRV_SECTOR_SIZE / SD_DATA_OBJ_SIZE,
+    uint64_t offset = sector_num * BDRV_SECTOR_SIZE;
+    unsigned long start = offset / SD_DATA_OBJ_SIZE,
                   end = DIV_ROUND_UP((sector_num + nb_sectors) *
                                      BDRV_SECTOR_SIZE, SD_DATA_OBJ_SIZE);
     unsigned long idx;
-    int64_t ret = BDRV_BLOCK_DATA;
+    int64_t ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;
 
     for (idx = start; idx < end; idx++) {
         if (inode->data_vdi_id[idx] == 0) {
diff --git a/block/stream.c b/block/stream.c
index 46bec7d379..dd0b4ac3d2 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -75,6 +75,8 @@ static void close_unused_images(BlockDriverState *top, BlockDriverState *base,
         unused->backing_hd = NULL;
         bdrv_unref(unused);
     }
+
+    bdrv_refresh_limits(top);
 }
 
 static void coroutine_fn stream_run(void *opaque)
diff --git a/block/vhdx.c b/block/vhdx.c
index 1995778945..9ee0a612ff 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -1797,7 +1797,7 @@ static int vhdx_create(const char *filename, QEMUOptionParameter *options,
         goto exit;
     }
 
-    ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR, &local_err);
+    ret = bdrv_file_open(&bs, filename, NULL, NULL, BDRV_O_RDWR, &local_err);
     if (ret < 0) {
         error_propagate(errp, local_err);
         goto exit;
diff --git a/block/vmdk.c b/block/vmdk.c
index c6b60b4a91..99ca60fdb9 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -428,10 +428,6 @@ static int vmdk_add_extent(BlockDriverState *bs,
     extent->l2_size = l2_size;
     extent->cluster_sectors = flat ? sectors : cluster_sectors;
 
-    if (!flat) {
-        bs->bl.write_zeroes_alignment =
-            MAX(bs->bl.write_zeroes_alignment, cluster_sectors);
-    }
     if (s->num_extents > 1) {
         extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
     } else {
@@ -640,6 +636,13 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
     if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) {
         l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9;
     }
+    if (bdrv_getlength(file) <
+            le64_to_cpu(header.grain_offset) * BDRV_SECTOR_SIZE) {
+        error_report("File truncated, expecting at least %lld bytes",
+                le64_to_cpu(header.grain_offset) * BDRV_SECTOR_SIZE);
+        return -EINVAL;
+    }
+
     ret = vmdk_add_extent(bs, file, false,
                           le64_to_cpu(header.capacity),
                           le64_to_cpu(header.gd_offset) << 9,
@@ -654,6 +657,10 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
     }
     extent->compressed =
         le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
+    if (extent->compressed) {
+        g_free(s->create_type);
+        s->create_type = g_strdup("streamOptimized");
+    }
     extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER;
     extent->version = le32_to_cpu(header.version);
     extent->has_zero_grain = le32_to_cpu(header.flags) & VMDK4_FLAG_ZERO_GRAIN;
@@ -769,8 +776,8 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
 
         path_combine(extent_path, sizeof(extent_path),
                 desc_file_path, fname);
-        ret = bdrv_file_open(&extent_file, extent_path, NULL, bs->open_flags,
-                             errp);
+        ret = bdrv_file_open(&extent_file, extent_path, NULL, NULL,
+                             bs->open_flags, errp);
         if (ret) {
             return ret;
         }
@@ -891,6 +898,23 @@ fail:
     return ret;
 }
 
+
+static int vmdk_refresh_limits(BlockDriverState *bs)
+{
+    BDRVVmdkState *s = bs->opaque;
+    int i;
+
+    for (i = 0; i < s->num_extents; i++) {
+        if (!s->extents[i].flat) {
+            bs->bl.write_zeroes_alignment =
+                MAX(bs->bl.write_zeroes_alignment,
+                    s->extents[i].cluster_sectors);
+        }
+    }
+
+    return 0;
+}
+
 static int get_whole_cluster(BlockDriverState *bs,
                 VmdkExtent *extent,
                 uint64_t cluster_offset,
@@ -1325,8 +1349,8 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
 {
     BDRVVmdkState *s = bs->opaque;
     VmdkExtent *extent = NULL;
-    int n, ret;
-    int64_t index_in_cluster;
+    int ret;
+    int64_t index_in_cluster, n;
     uint64_t extent_begin_sector, extent_relative_sector_num;
     uint64_t cluster_offset;
     VmdkMetaData m_data;
@@ -1469,7 +1493,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
         goto exit;
     }
 
-    ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR, &local_err);
+    ret = bdrv_file_open(&bs, filename, NULL, NULL, BDRV_O_RDWR, &local_err);
     if (ret < 0) {
         error_propagate(errp, local_err);
         goto exit;
@@ -1807,7 +1831,7 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options,
             goto exit;
         }
     }
-    ret = bdrv_file_open(&new_bs, filename, NULL, BDRV_O_RDWR, &local_err);
+    ret = bdrv_file_open(&new_bs, filename, NULL, NULL, BDRV_O_RDWR, &local_err);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Could not write description");
         goto exit;
@@ -2002,6 +2026,7 @@ static BlockDriver bdrv_vmdk = {
     .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size,
     .bdrv_has_zero_init           = vmdk_has_zero_init,
     .bdrv_get_specific_info       = vmdk_get_specific_info,
+    .bdrv_refresh_limits          = vmdk_refresh_limits,
 
     .create_options               = vmdk_create_options,
 };
diff --git a/blockdev.c b/blockdev.c
index e457494342..36ceece9ff 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -307,12 +307,11 @@ static bool check_throttle_config(ThrottleConfig *cfg, Error **errp)
 typedef enum { MEDIA_DISK, MEDIA_CDROM } DriveMediaType;
 
 /* Takes the ownership of bs_opts */
-static DriveInfo *blockdev_init(QDict *bs_opts,
+static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
                                 BlockInterfaceType type,
                                 Error **errp)
 {
     const char *buf;
-    const char *file = NULL;
     const char *serial;
     int ro = 0;
     int bdrv_flags = 0;
@@ -354,7 +353,6 @@ static DriveInfo *blockdev_init(QDict *bs_opts,
     ro = qemu_opt_get_bool(opts, "read-only", 0);
     copy_on_read = qemu_opt_get_bool(opts, "copy-on-read", false);
 
-    file = qemu_opt_get(opts, "file");
     serial = qemu_opt_get(opts, "serial");
 
     if ((buf = qemu_opt_get(opts, "discard")) != NULL) {
@@ -599,6 +597,10 @@ QemuOptsList qemu_legacy_drive_opts = {
             .name = "addr",
             .type = QEMU_OPT_STRING,
             .help = "pci address (virtio only)",
+        },{
+            .name = "file",
+            .type = QEMU_OPT_STRING,
+            .help = "file name",
         },
 
         /* Options that are passed on, but have special semantics with -drive */
@@ -629,6 +631,7 @@ DriveInfo *drive_init(QemuOpts *all_opts, BlockInterfaceType block_default_type)
     const char *devaddr;
     bool read_only = false;
     bool copy_on_read;
+    const char *filename;
     Error *local_err = NULL;
 
     /* Change legacy command line options into QMP ones */
@@ -867,8 +870,10 @@ DriveInfo *drive_init(QemuOpts *all_opts, BlockInterfaceType block_default_type)
         }
     }
 
+    filename = qemu_opt_get(legacy_opts, "file");
+
     /* Actual block device init: Functionality shared with blockdev-add */
-    dinfo = blockdev_init(bs_opts, type, &local_err);
+    dinfo = blockdev_init(filename, bs_opts, type, &local_err);
     if (dinfo == NULL) {
         if (error_is_set(&local_err)) {
             qerror_report_err(local_err);
@@ -942,14 +947,22 @@ static void blockdev_do_action(int kind, void *data, Error **errp)
     qmp_transaction(&list, errp);
 }
 
-void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file,
+void qmp_blockdev_snapshot_sync(bool has_device, const char *device,
+                                bool has_node_name, const char *node_name,
+                                const char *snapshot_file,
+                                bool has_snapshot_node_name,
+                                const char *snapshot_node_name,
                                 bool has_format, const char *format,
-                                bool has_mode, enum NewImageMode mode,
-                                Error **errp)
+                                bool has_mode, NewImageMode mode, Error **errp)
 {
     BlockdevSnapshot snapshot = {
+        .has_device = has_device,
         .device = (char *) device,
+        .has_node_name = has_node_name,
+        .node_name = (char *) node_name,
         .snapshot_file = (char *) snapshot_file,
+        .has_snapshot_node_name = has_snapshot_node_name,
+        .snapshot_node_name = (char *) snapshot_node_name,
         .has_format = has_format,
         .format = (char *) format,
         .has_mode = has_mode,
@@ -1187,8 +1200,14 @@ static void external_snapshot_prepare(BlkTransactionState *common,
 {
     BlockDriver *drv;
     int flags, ret;
+    QDict *options = NULL;
     Error *local_err = NULL;
+    bool has_device = false;
     const char *device;
+    bool has_node_name = false;
+    const char *node_name;
+    bool has_snapshot_node_name = false;
+    const char *snapshot_node_name;
     const char *new_image_file;
     const char *format = "qcow2";
     enum NewImageMode mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
@@ -1199,7 +1218,14 @@ static void external_snapshot_prepare(BlkTransactionState *common,
     /* get parameters */
     g_assert(action->kind == TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC);
 
+    has_device = action->blockdev_snapshot_sync->has_device;
     device = action->blockdev_snapshot_sync->device;
+    has_node_name = action->blockdev_snapshot_sync->has_node_name;
+    node_name = action->blockdev_snapshot_sync->node_name;
+    has_snapshot_node_name =
+        action->blockdev_snapshot_sync->has_snapshot_node_name;
+    snapshot_node_name = action->blockdev_snapshot_sync->snapshot_node_name;
+
     new_image_file = action->blockdev_snapshot_sync->snapshot_file;
     if (action->blockdev_snapshot_sync->has_format) {
         format = action->blockdev_snapshot_sync->format;
@@ -1215,9 +1241,21 @@ static void external_snapshot_prepare(BlkTransactionState *common,
         return;
     }
 
-    state->old_bs = bdrv_find(device);
-    if (!state->old_bs) {
-        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+    state->old_bs = bdrv_lookup_bs(has_device ? device : NULL,
+                                   has_node_name ? node_name : NULL,
+                                   &local_err);
+    if (error_is_set(&local_err)) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    if (has_node_name && !has_snapshot_node_name) {
+        error_setg(errp, "New snapshot node name missing");
+        return;
+    }
+
+    if (has_snapshot_node_name && bdrv_find_node(snapshot_node_name)) {
+        error_setg(errp, "New snapshot node name already existing");
         return;
     }
 
@@ -1238,7 +1276,7 @@ static void external_snapshot_prepare(BlkTransactionState *common,
         }
     }
 
-    if (bdrv_check_ext_snapshot(state->old_bs) != EXT_SNAPSHOT_ALLOWED) {
+    if (!bdrv_is_first_non_filter(state->old_bs)) {
         error_set(errp, QERR_FEATURE_DISABLED, "snapshot");
         return;
     }
@@ -1257,15 +1295,23 @@ static void external_snapshot_prepare(BlkTransactionState *common,
         }
     }
 
+    if (has_snapshot_node_name) {
+        options = qdict_new();
+        qdict_put(options, "node-name",
+                  qstring_from_str(snapshot_node_name));
+    }
+
     /* We will manually add the backing_hd field to the bs later */
     state->new_bs = bdrv_new("");
     /* TODO Inherit bs->options or only take explicit options with an
      * extended QMP command? */
-    ret = bdrv_open(state->new_bs, new_image_file, NULL,
+    ret = bdrv_open(state->new_bs, new_image_file, options,
                     flags | BDRV_O_NO_BACKING, drv, &local_err);
     if (ret != 0) {
         error_propagate(errp, local_err);
     }
+
+    QDECREF(options);
 }
 
 static void external_snapshot_commit(BlkTransactionState *common)
@@ -1476,14 +1522,19 @@ void qmp_eject(const char *device, bool has_force, bool force, Error **errp)
     eject_device(bs, force, errp);
 }
 
-void qmp_block_passwd(const char *device, const char *password, Error **errp)
+void qmp_block_passwd(bool has_device, const char *device,
+                      bool has_node_name, const char *node_name,
+                      const char *password, Error **errp)
 {
+    Error *local_err = NULL;
     BlockDriverState *bs;
     int err;
 
-    bs = bdrv_find(device);
-    if (!bs) {
-        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+    bs = bdrv_lookup_bs(has_device ? device : NULL,
+                        has_node_name ? node_name : NULL,
+                        &local_err);
+    if (error_is_set(&local_err)) {
+        error_propagate(errp, local_err);
         return;
     }
 
@@ -1673,14 +1724,24 @@ int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
     return 0;
 }
 
-void qmp_block_resize(const char *device, int64_t size, Error **errp)
+void qmp_block_resize(bool has_device, const char *device,
+                      bool has_node_name, const char *node_name,
+                      int64_t size, Error **errp)
 {
+    Error *local_err = NULL;
     BlockDriverState *bs;
     int ret;
 
-    bs = bdrv_find(device);
-    if (!bs) {
-        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+    bs = bdrv_lookup_bs(has_device ? device : NULL,
+                        has_node_name ? node_name : NULL,
+                        &local_err);
+    if (error_is_set(&local_err)) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    if (!bdrv_is_first_non_filter(bs)) {
+        error_set(errp, QERR_FEATURE_DISABLED, "resize");
         return;
     }
 
@@ -1947,6 +2008,11 @@ void qmp_drive_backup(const char *device, const char *target,
     }
 }
 
+BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp)
+{
+    return bdrv_named_nodes_list();
+}
+
 #define DEFAULT_MIRROR_BUF_SIZE   (10 << 20)
 
 void qmp_drive_mirror(const char *device, const char *target,
@@ -2210,7 +2276,7 @@ void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
 
     qdict_flatten(qdict);
 
-    blockdev_init(qdict, IF_NONE, &local_err);
+    blockdev_init(NULL, qdict, IF_NONE, &local_err);
     if (error_is_set(&local_err)) {
         error_propagate(errp, local_err);
         goto fail;
@@ -2251,10 +2317,6 @@ QemuOptsList qemu_common_drive_opts = {
             .type = QEMU_OPT_BOOL,
             .help = "enable/disable snapshot mode",
         },{
-            .name = "file",
-            .type = QEMU_OPT_STRING,
-            .help = "disk image",
-        },{
             .name = "discard",
             .type = QEMU_OPT_STRING,
             .help = "discard operation (ignore/off, unmap/on)",
diff --git a/configure b/configure
index 3782a6a26a..b472694cb2 100755
--- a/configure
+++ b/configure
@@ -256,6 +256,7 @@ coroutine_pool=""
 seccomp=""
 glusterfs=""
 glusterfs_discard="no"
+glusterfs_zerofill="no"
 virtio_blk_data_plane=""
 gtk=""
 gtkabi="2.0"
@@ -2701,6 +2702,9 @@ if test "$glusterfs" != "no" ; then
     if $pkg_config --atleast-version=5 glusterfs-api; then
       glusterfs_discard="yes"
     fi
+    if $pkg_config --atleast-version=6 glusterfs-api; then
+      glusterfs_zerofill="yes"
+    fi
   else
     if test "$glusterfs" = "yes" ; then
       feature_not_found "GlusterFS backend support"
@@ -4229,6 +4233,10 @@ if test "$glusterfs_discard" = "yes" ; then
   echo "CONFIG_GLUSTERFS_DISCARD=y" >> $config_host_mak
 fi
 
+if test "$glusterfs_zerofill" = "yes" ; then
+  echo "CONFIG_GLUSTERFS_ZEROFILL=y" >> $config_host_mak
+fi
+
 if test "$libssh2" = "yes" ; then
   echo "CONFIG_LIBSSH2=y" >> $config_host_mak
 fi
diff --git a/hmp-commands.hx b/hmp-commands.hx
index feca0847d0..f3fc514427 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -35,6 +35,11 @@ STEXI
 @item commit
 @findex commit
 Commit changes to the disk images (if -snapshot is used) or backing files.
+If the backing file is smaller than the snapshot, then the backing file will be
+resized to be the same size as the snapshot.  If the snapshot is smaller than
+the backing file, the backing file will not be truncated.  If you want the
+backing file to match the size of the smaller snapshot, you can safely truncate
+it yourself once the commit operation successfully completes.
 ETEXI
 
     {
diff --git a/hmp.c b/hmp.c
index 79f9c7d2ce..1af0809305 100644
--- a/hmp.c
+++ b/hmp.c
@@ -871,7 +871,7 @@ void hmp_block_passwd(Monitor *mon, const QDict *qdict)
     const char *password = qdict_get_str(qdict, "password");
     Error *errp = NULL;
 
-    qmp_block_passwd(device, password, &errp);
+    qmp_block_passwd(true, device, false, NULL, password, &errp);
     hmp_handle_error(mon, &errp);
 }
 
@@ -893,7 +893,7 @@ void hmp_block_resize(Monitor *mon, const QDict *qdict)
     int64_t size = qdict_get_int(qdict, "size");
     Error *errp = NULL;
 
-    qmp_block_resize(device, size, &errp);
+    qmp_block_resize(true, device, false, NULL, size, &errp);
     hmp_handle_error(mon, &errp);
 }
 
@@ -972,7 +972,9 @@ void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict)
     }
 
     mode = reuse ? NEW_IMAGE_MODE_EXISTING : NEW_IMAGE_MODE_ABSOLUTE_PATHS;
-    qmp_blockdev_snapshot_sync(device, filename, !!format, format,
+    qmp_blockdev_snapshot_sync(true, device, false, NULL,
+                               filename, false, NULL,
+                               !!format, format,
                                true, mode, &errp);
     hmp_handle_error(mon, &errp);
 }
@@ -1092,11 +1094,11 @@ void hmp_eject(Monitor *mon, const QDict *qdict)
     hmp_handle_error(mon, &err);
 }
 
-static void hmp_change_read_arg(Monitor *mon, const char *password,
-                                void *opaque)
+static void hmp_change_read_arg(void *opaque, const char *password,
+                                void *readline_opaque)
 {
     qmp_change_vnc_password(password, NULL);
-    monitor_read_command(mon, 1);
+    monitor_read_command(opaque, 1);
 }
 
 void hmp_change(Monitor *mon, const QDict *qdict)
diff --git a/hw/audio/hda-codec.c b/hw/audio/hda-codec.c
index 07a43bfe89..986f2a9c92 100644
--- a/hw/audio/hda-codec.c
+++ b/hw/audio/hda-codec.c
@@ -559,6 +559,21 @@ static int hda_audio_post_load(void *opaque, int version)
     return 0;
 }
 
+static void hda_audio_reset(DeviceState *dev)
+{
+    HDAAudioState *a = DO_UPCAST(HDAAudioState, hda.qdev, dev);
+    HDAAudioStream *st;
+    int i;
+
+    dprint(a, 1, "%s\n", __func__);
+    for (i = 0; i < ARRAY_SIZE(a->st); i++) {
+        st = a->st + i;
+        if (st->node != NULL) {
+            hda_audio_set_running(st, false);
+        }
+    }
+}
+
 static const VMStateDescription vmstate_hda_audio_stream = {
     .name = "hda-audio-stream",
     .version_id = 1,
@@ -640,6 +655,7 @@ static void hda_audio_output_class_init(ObjectClass *klass, void *data)
     k->stream = hda_audio_stream;
     set_bit(DEVICE_CATEGORY_SOUND, dc->categories);
     dc->desc = "HDA Audio Codec, output-only (line-out)";
+    dc->reset = hda_audio_reset;
     dc->vmsd = &vmstate_hda_audio;
     dc->props = hda_audio_properties;
 }
@@ -662,6 +678,7 @@ static void hda_audio_duplex_class_init(ObjectClass *klass, void *data)
     k->stream = hda_audio_stream;
     set_bit(DEVICE_CATEGORY_SOUND, dc->categories);
     dc->desc = "HDA Audio Codec, duplex (line-out, line-in)";
+    dc->reset = hda_audio_reset;
     dc->vmsd = &vmstate_hda_audio;
     dc->props = hda_audio_properties;
 }
@@ -684,6 +701,7 @@ static void hda_audio_micro_class_init(ObjectClass *klass, void *data)
     k->stream = hda_audio_stream;
     set_bit(DEVICE_CATEGORY_SOUND, dc->categories);
     dc->desc = "HDA Audio Codec, duplex (speaker, microphone)";
+    dc->reset = hda_audio_reset;
     dc->vmsd = &vmstate_hda_audio;
     dc->props = hda_audio_properties;
 }
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 19d0961a47..8a568e5edb 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -731,7 +731,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
     register_savevm(dev, "virtio-blk", virtio_blk_id++, 2,
                     virtio_blk_save, virtio_blk_load, s);
     bdrv_set_dev_ops(s->bs, &virtio_block_ops, s);
-    bdrv_set_buffer_alignment(s->bs, s->conf->logical_block_size);
+    bdrv_set_guest_block_size(s->bs, s->conf->logical_block_size);
 
     bdrv_iostatus_enable(s->bs);
 
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 276641436e..a327d71fb1 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -393,6 +393,10 @@ static QEMUMachine pc_i440fx_machine_v1_7 = {
     PC_I440FX_1_7_MACHINE_OPTIONS,
     .name = "pc-i440fx-1.7",
     .init = pc_init_pci_1_7,
+    .compat_props = (GlobalProperty[]) {
+        PC_COMPAT_1_7,
+        { /* end of list */ }
+    },
 };
 
 #define PC_I440FX_1_6_MACHINE_OPTIONS PC_I440FX_MACHINE_OPTIONS
diff --git a/hw/ide/core.c b/hw/ide/core.c
index e1f4c33fb8..036cd4a6d1 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -2103,7 +2103,7 @@ int ide_init_drive(IDEState *s, BlockDriverState *bs, IDEDriveKind kind,
     s->smart_selftest_count = 0;
     if (kind == IDE_CD) {
         bdrv_set_dev_ops(bs, &ide_cd_block_ops, s);
-        bdrv_set_buffer_alignment(bs, 2048);
+        bdrv_set_guest_block_size(bs, 2048);
     } else {
         if (!bdrv_is_inserted(s->bs)) {
             error_report("Device needs media, but drive is empty");
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
index 3496c0bbd8..50b89ad4aa 100644
--- a/hw/scsi/scsi-bus.c
+++ b/hw/scsi/scsi-bus.c
@@ -469,6 +469,8 @@ static int32_t scsi_target_send_command(SCSIRequest *req, uint8_t *buf)
             r->req.dev->sense_is_ua = false;
         }
         break;
+    case TEST_UNIT_READY:
+        break;
     default:
         scsi_req_build_sense(req, SENSE_CODE(LUN_NOT_SUPPORTED));
         scsi_req_complete(req, CHECK_CONDITION);
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index bce617cb93..a8d0f15ebe 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -2254,7 +2254,7 @@ static int scsi_initfn(SCSIDevice *dev)
     } else {
         bdrv_set_dev_ops(s->qdev.conf.bs, &scsi_disk_block_ops, s);
     }
-    bdrv_set_buffer_alignment(s->qdev.conf.bs, s->qdev.blocksize);
+    bdrv_set_guest_block_size(s->qdev.conf.bs, s->qdev.blocksize);
 
     bdrv_iostatus_enable(s->qdev.conf.bs);
     add_boot_device_path(s->qdev.conf.bootindex, &dev->qdev, NULL);
@@ -2306,6 +2306,7 @@ static const SCSIReqOps scsi_disk_emulate_reqops = {
     .send_command = scsi_disk_emulate_command,
     .read_data    = scsi_disk_emulate_read_data,
     .write_data   = scsi_disk_emulate_write_data,
+    .cancel_io    = scsi_cancel_io,
     .get_buf      = scsi_get_buf,
 };
 
diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
index 8f195bec00..f08b64e177 100644
--- a/hw/scsi/scsi-generic.c
+++ b/hw/scsi/scsi-generic.c
@@ -210,7 +210,7 @@ static void scsi_read_complete(void * opaque, int ret)
             s->blocksize = ldl_be_p(&r->buf[8]);
             s->max_lba = ldq_be_p(&r->buf[0]);
         }
-        bdrv_set_buffer_alignment(s->conf.bs, s->blocksize);
+        bdrv_set_guest_block_size(s->conf.bs, s->blocksize);
 
         scsi_req_data(&r->req, len);
         if (!r->req.io_canceled) {
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index 6dcdd1b91c..6610b3aab3 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -306,6 +306,10 @@ static void virtio_scsi_command_complete(SCSIRequest *r, uint32_t status,
     VirtIOSCSIReq *req = r->hba_private;
     uint32_t sense_len;
 
+    if (r->io_canceled) {
+        return;
+    }
+
     req->resp.cmd->response = VIRTIO_SCSI_S_OK;
     req->resp.cmd->status = status;
     if (req->resp.cmd->status == GOOD) {
@@ -516,7 +520,7 @@ static void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev,
     evt->event = event;
     evt->reason = reason;
     if (!dev) {
-        assert(event == VIRTIO_SCSI_T_NO_EVENT);
+        assert(event == VIRTIO_SCSI_T_EVENTS_MISSED);
     } else {
         evt->lun[0] = 1;
         evt->lun[1] = dev->id;
diff --git a/hw/usb/Makefile.objs b/hw/usb/Makefile.objs
index a3eac3e5c1..97b457541f 100644
--- a/hw/usb/Makefile.objs
+++ b/hw/usb/Makefile.objs
@@ -1,5 +1,5 @@
 # usb subsystem core
-common-obj-y += core.o combined-packet.o bus.o desc.o
+common-obj-y += core.o combined-packet.o bus.o desc.o desc-msos.o
 common-obj-y += libhw.o
 
 # usb host adapters
diff --git a/hw/usb/bus.c b/hw/usb/bus.c
index 09848c6320..fe70429304 100644
--- a/hw/usb/bus.c
+++ b/hw/usb/bus.c
@@ -16,6 +16,8 @@ static Property usb_props[] = {
     DEFINE_PROP_STRING("serial", USBDevice, serial),
     DEFINE_PROP_BIT("full-path", USBDevice, flags,
                     USB_DEV_FLAG_FULL_PATH, true),
+    DEFINE_PROP_BIT("msos-desc", USBDevice, flags,
+                    USB_DEV_FLAG_MSOS_DESC_ENABLE, true),
     DEFINE_PROP_END_OF_LIST()
 };
 
diff --git a/hw/usb/desc-msos.c b/hw/usb/desc-msos.c
new file mode 100644
index 0000000000..ed8d62cab8
--- /dev/null
+++ b/hw/usb/desc-msos.c
@@ -0,0 +1,234 @@
+#include "hw/usb.h"
+#include "hw/usb/desc.h"
+
+/*
+ * Microsoft OS Descriptors
+ *
+ * Windows tries to fetch some special descriptors with informations
+ * specifically for windows.  Presence is indicated using a special
+ * string @ index 0xee.  There are two kinds of descriptors:
+ *
+ * compatid descriptor
+ *   Used to bind drivers, if usb class isn't specific enougth.
+ *   Used for PTP/MTP for example (both share the same usb class).
+ *
+ * properties descriptor
+ *   Does carry registry entries.  They show up in
+ *   HLM\SYSTEM\CurrentControlSet\Enum\USB\<devid>\<serial>\Device Parameters
+ *
+ * Note that Windows caches the stuff it got in the registry, so when
+ * playing with this you have to delete registry subtrees to make
+ * windows query the device again:
+ *   HLM\SYSTEM\CurrentControlSet\Control\usbflags
+ *   HLM\SYSTEM\CurrentControlSet\Enum\USB
+ * Windows will complain it can't delete entries on the second one.
+ * It has deleted everything it had permissions too, which is enouth
+ * as this includes "Device Parameters".
+ *
+ * http://msdn.microsoft.com/en-us/library/windows/hardware/ff537430.aspx
+ *
+ */
+
+/* ------------------------------------------------------------------ */
+
+typedef struct msos_compat_hdr {
+    uint32_t dwLength;
+    uint8_t  bcdVersion_lo;
+    uint8_t  bcdVersion_hi;
+    uint8_t  wIndex_lo;
+    uint8_t  wIndex_hi;
+    uint8_t  bCount;
+    uint8_t  reserved[7];
+} QEMU_PACKED msos_compat_hdr;
+
+typedef struct msos_compat_func {
+    uint8_t  bFirstInterfaceNumber;
+    uint8_t  reserved_1;
+    uint8_t  compatibleId[8];
+    uint8_t  subCompatibleId[8];
+    uint8_t  reserved_2[6];
+} QEMU_PACKED msos_compat_func;
+
+static int usb_desc_msos_compat(const USBDesc *desc, uint8_t *dest)
+{
+    msos_compat_hdr *hdr = (void *)dest;
+    msos_compat_func *func;
+    int length = sizeof(*hdr);
+    int count = 0;
+
+    func = (void *)(dest + length);
+    func->bFirstInterfaceNumber = 0;
+    func->reserved_1 = 0x01;
+    length += sizeof(*func);
+    count++;
+
+    hdr->dwLength      = cpu_to_le32(length);
+    hdr->bcdVersion_lo = 0x00;
+    hdr->bcdVersion_hi = 0x01;
+    hdr->wIndex_lo     = 0x04;
+    hdr->wIndex_hi     = 0x00;
+    hdr->bCount        = count;
+    return length;
+}
+
+/* ------------------------------------------------------------------ */
+
+typedef struct msos_prop_hdr {
+    uint32_t dwLength;
+    uint8_t  bcdVersion_lo;
+    uint8_t  bcdVersion_hi;
+    uint8_t  wIndex_lo;
+    uint8_t  wIndex_hi;
+    uint8_t  wCount_lo;
+    uint8_t  wCount_hi;
+} QEMU_PACKED msos_prop_hdr;
+
+typedef struct msos_prop {
+    uint32_t dwLength;
+    uint32_t dwPropertyDataType;
+    uint8_t  dwPropertyNameLength_lo;
+    uint8_t  dwPropertyNameLength_hi;
+    uint8_t  bPropertyName[];
+} QEMU_PACKED msos_prop;
+
+typedef struct msos_prop_data {
+    uint32_t dwPropertyDataLength;
+    uint8_t  bPropertyData[];
+} QEMU_PACKED msos_prop_data;
+
+typedef enum msos_prop_type {
+    MSOS_REG_SZ        = 1,
+    MSOS_REG_EXPAND_SZ = 2,
+    MSOS_REG_BINARY    = 3,
+    MSOS_REG_DWORD_LE  = 4,
+    MSOS_REG_DWORD_BE  = 5,
+    MSOS_REG_LINK      = 6,
+    MSOS_REG_MULTI_SZ  = 7,
+} msos_prop_type;
+
+static int usb_desc_msos_prop_name(struct msos_prop *prop,
+                                   const wchar_t *name)
+{
+    int length = wcslen(name) + 1;
+    int i;
+
+    prop->dwPropertyNameLength_lo = usb_lo(length*2);
+    prop->dwPropertyNameLength_hi = usb_hi(length*2);
+    for (i = 0; i < length; i++) {
+        prop->bPropertyName[i*2]   = usb_lo(name[i]);
+        prop->bPropertyName[i*2+1] = usb_hi(name[i]);
+    }
+    return length*2;
+}
+
+static int usb_desc_msos_prop_str(uint8_t *dest, msos_prop_type type,
+                                  const wchar_t *name, const wchar_t *value)
+{
+    struct msos_prop *prop = (void *)dest;
+    struct msos_prop_data *data;
+    int length = sizeof(*prop);
+    int i, vlen = wcslen(value) + 1;
+
+    prop->dwPropertyDataType = cpu_to_le32(type);
+    length += usb_desc_msos_prop_name(prop, name);
+    data = (void *)(dest + length);
+
+    data->dwPropertyDataLength = cpu_to_le32(vlen*2);
+    length += sizeof(*prop);
+
+    for (i = 0; i < vlen; i++) {
+        data->bPropertyData[i*2]   = usb_lo(value[i]);
+        data->bPropertyData[i*2+1] = usb_hi(value[i]);
+    }
+    length += vlen*2;
+
+    prop->dwLength = cpu_to_le32(length);
+    return length;
+}
+
+static int usb_desc_msos_prop_dword(uint8_t *dest, const wchar_t *name,
+                                    uint32_t value)
+{
+    struct msos_prop *prop = (void *)dest;
+    struct msos_prop_data *data;
+    int length = sizeof(*prop);
+
+    prop->dwPropertyDataType = cpu_to_le32(MSOS_REG_DWORD_LE);
+    length += usb_desc_msos_prop_name(prop, name);
+    data = (void *)(dest + length);
+
+    data->dwPropertyDataLength = cpu_to_le32(4);
+    data->bPropertyData[0] = (value)       & 0xff;
+    data->bPropertyData[1] = (value >>  8) & 0xff;
+    data->bPropertyData[2] = (value >> 16) & 0xff;
+    data->bPropertyData[3] = (value >> 24) & 0xff;
+    length += sizeof(*prop) + 4;
+
+    prop->dwLength = cpu_to_le32(length);
+    return length;
+}
+
+static int usb_desc_msos_prop(const USBDesc *desc, uint8_t *dest)
+{
+    msos_prop_hdr *hdr = (void *)dest;
+    int length = sizeof(*hdr);
+    int count = 0;
+
+    if (desc->msos->Label) {
+        /*
+         * Given as example in the specs.  Havn't figured yet where
+         * this label shows up in the windows gui.
+         */
+        length += usb_desc_msos_prop_str(dest+length, MSOS_REG_SZ,
+                                         L"Label", desc->msos->Label);
+        count++;
+    }
+
+    if (desc->msos->SelectiveSuspendEnabled) {
+        /*
+         * Signaling remote wakeup capability in the standard usb
+         * descriptors isn't enouth to make windows actually use it.
+         * This is the "Yes, we really mean it" registy entry to flip
+         * the switch in the windows drivers.
+         */
+        length += usb_desc_msos_prop_dword(dest+length,
+                                           L"SelectiveSuspendEnabled", 1);
+        count++;
+    }
+
+    hdr->dwLength      = cpu_to_le32(length);
+    hdr->bcdVersion_lo = 0x00;
+    hdr->bcdVersion_hi = 0x01;
+    hdr->wIndex_lo     = 0x05;
+    hdr->wIndex_hi     = 0x00;
+    hdr->wCount_lo     = usb_lo(count);
+    hdr->wCount_hi     = usb_hi(count);
+    return length;
+}
+
+/* ------------------------------------------------------------------ */
+
+int usb_desc_msos(const USBDesc *desc,  USBPacket *p,
+                  int index, uint8_t *dest, size_t len)
+{
+    void *buf = g_malloc0(4096);
+    int length = 0;
+
+    switch (index) {
+    case 0x0004:
+        length = usb_desc_msos_compat(desc, buf);
+        break;
+    case 0x0005:
+        length = usb_desc_msos_prop(desc, buf);
+        break;
+    }
+
+    if (length > len) {
+        length = len;
+    }
+    memcpy(dest, buf, length);
+    free(buf);
+
+    p->actual_length = length;
+    return 0;
+}
diff --git a/hw/usb/desc.c b/hw/usb/desc.c
index f18a043500..f133ddb9db 100644
--- a/hw/usb/desc.c
+++ b/hw/usb/desc.c
@@ -7,7 +7,7 @@
 /* ------------------------------------------------------------------ */
 
 int usb_desc_device(const USBDescID *id, const USBDescDevice *dev,
-                    uint8_t *dest, size_t len)
+                    bool msos, uint8_t *dest, size_t len)
 {
     uint8_t bLength = 0x12;
     USBDescriptor *d = (void *)dest;
@@ -19,8 +19,18 @@ int usb_desc_device(const USBDescID *id, const USBDescDevice *dev,
     d->bLength                     = bLength;
     d->bDescriptorType             = USB_DT_DEVICE;
 
-    d->u.device.bcdUSB_lo          = usb_lo(dev->bcdUSB);
-    d->u.device.bcdUSB_hi          = usb_hi(dev->bcdUSB);
+    if (msos && dev->bcdUSB < 0x0200) {
+        /*
+         * Version 2.0+ required for microsoft os descriptors to work.
+         * Done this way so msos-desc compat property will handle both
+         * the version and the new descriptors being present.
+         */
+        d->u.device.bcdUSB_lo          = usb_lo(0x0200);
+        d->u.device.bcdUSB_hi          = usb_hi(0x0200);
+    } else {
+        d->u.device.bcdUSB_lo          = usb_lo(dev->bcdUSB);
+        d->u.device.bcdUSB_hi          = usb_hi(dev->bcdUSB);
+    }
     d->u.device.bDeviceClass       = dev->bDeviceClass;
     d->u.device.bDeviceSubClass    = dev->bDeviceSubClass;
     d->u.device.bDeviceProtocol    = dev->bDeviceProtocol;
@@ -499,6 +509,10 @@ void usb_desc_init(USBDevice *dev)
     if (desc->super) {
         dev->speedmask |= USB_SPEED_MASK_SUPER;
     }
+    if (desc->msos && (dev->flags & (1 << USB_DEV_FLAG_MSOS_DESC_ENABLE))) {
+        dev->flags |= (1 << USB_DEV_FLAG_MSOS_DESC_IN_USE);
+        usb_desc_set_string(dev, 0xee, "MSFT100Q");
+    }
     usb_desc_setdefaults(dev);
 }
 
@@ -626,6 +640,7 @@ int usb_desc_string(USBDevice *dev, int index, uint8_t *dest, size_t len)
 int usb_desc_get_descriptor(USBDevice *dev, USBPacket *p,
                             int value, uint8_t *dest, size_t len)
 {
+    bool msos = (dev->flags & (1 << USB_DEV_FLAG_MSOS_DESC_IN_USE));
     const USBDesc *desc = usb_device_get_usb_desc(dev);
     const USBDescDevice *other_dev;
     uint8_t buf[256];
@@ -646,7 +661,7 @@ int usb_desc_get_descriptor(USBDevice *dev, USBPacket *p,
 
     switch(type) {
     case USB_DT_DEVICE:
-        ret = usb_desc_device(&desc->id, dev->device, buf, sizeof(buf));
+        ret = usb_desc_device(&desc->id, dev->device, msos, buf, sizeof(buf));
         trace_usb_desc_device(dev->addr, len, ret);
         break;
     case USB_DT_CONFIG:
@@ -703,6 +718,7 @@ int usb_desc_get_descriptor(USBDevice *dev, USBPacket *p,
 int usb_desc_handle_control(USBDevice *dev, USBPacket *p,
         int request, int value, int index, int length, uint8_t *data)
 {
+    bool msos = (dev->flags & (1 << USB_DEV_FLAG_MSOS_DESC_IN_USE));
     const USBDesc *desc = usb_device_get_usb_desc(dev);
     int ret = -1;
 
@@ -782,6 +798,19 @@ int usb_desc_handle_control(USBDevice *dev, USBPacket *p,
         trace_usb_set_interface(dev->addr, index, value, ret);
         break;
 
+    case VendorDeviceRequest | 'Q':
+        if (msos) {
+            ret = usb_desc_msos(desc, p, index, data, length);
+            trace_usb_desc_msos(dev->addr, index, length, ret);
+        }
+        break;
+    case VendorInterfaceRequest | 'Q':
+        if (msos) {
+            ret = usb_desc_msos(desc, p, index, data, length);
+            trace_usb_desc_msos(dev->addr, index, length, ret);
+        }
+        break;
+
     }
     return ret;
 }
diff --git a/hw/usb/desc.h b/hw/usb/desc.h
index 81327b0e74..2b4fcdae76 100644
--- a/hw/usb/desc.h
+++ b/hw/usb/desc.h
@@ -2,6 +2,7 @@
 #define QEMU_HW_USB_DESC_H
 
 #include <inttypes.h>
+#include <wchar.h>
 
 /* binary representation */
 typedef struct USBDescriptor {
@@ -182,6 +183,11 @@ struct USBDescOther {
     const uint8_t             *data;
 };
 
+struct USBDescMSOS {
+    const wchar_t             *Label;
+    bool                      SelectiveSuspendEnabled;
+};
+
 typedef const char *USBDescStrings[256];
 
 struct USBDesc {
@@ -190,6 +196,7 @@ struct USBDesc {
     const USBDescDevice       *high;
     const USBDescDevice       *super;
     const char* const         *str;
+    const USBDescMSOS         *msos;
 };
 
 #define USB_DESC_FLAG_SUPER (1 << 1)
@@ -207,7 +214,7 @@ static inline uint8_t usb_hi(uint16_t val)
 
 /* generate usb packages from structs */
 int usb_desc_device(const USBDescID *id, const USBDescDevice *dev,
-                    uint8_t *dest, size_t len);
+                    bool msos, uint8_t *dest, size_t len);
 int usb_desc_device_qualifier(const USBDescDevice *dev,
                               uint8_t *dest, size_t len);
 int usb_desc_config(const USBDescConfig *conf, int flags,
@@ -219,6 +226,8 @@ int usb_desc_iface(const USBDescIface *iface, int flags,
 int usb_desc_endpoint(const USBDescEndpoint *ep, int flags,
                       uint8_t *dest, size_t len);
 int usb_desc_other(const USBDescOther *desc, uint8_t *dest, size_t len);
+int usb_desc_msos(const USBDesc *desc, USBPacket *p,
+                  int index, uint8_t *dest, size_t len);
 
 /* control message emulation helpers */
 void usb_desc_init(USBDevice *dev);
diff --git a/hw/usb/dev-hid.c b/hw/usb/dev-hid.c
index 5e667f0199..2966066682 100644
--- a/hw/usb/dev-hid.c
+++ b/hw/usb/dev-hid.c
@@ -261,6 +261,10 @@ static const USBDescDevice desc_device_keyboard = {
     },
 };
 
+static const USBDescMSOS desc_msos_suspend = {
+    .SelectiveSuspendEnabled = true,
+};
+
 static const USBDesc desc_mouse = {
     .id = {
         .idVendor          = 0x0627,
@@ -272,6 +276,7 @@ static const USBDesc desc_mouse = {
     },
     .full = &desc_device_mouse,
     .str  = desc_strings,
+    .msos = &desc_msos_suspend,
 };
 
 static const USBDesc desc_tablet = {
@@ -285,6 +290,7 @@ static const USBDesc desc_tablet = {
     },
     .full = &desc_device_tablet,
     .str  = desc_strings,
+    .msos = &desc_msos_suspend,
 };
 
 static const USBDesc desc_tablet2 = {
@@ -299,6 +305,7 @@ static const USBDesc desc_tablet2 = {
     .full = &desc_device_tablet,
     .high = &desc_device_tablet2,
     .str  = desc_strings,
+    .msos = &desc_msos_suspend,
 };
 
 static const USBDesc desc_keyboard = {
@@ -312,6 +319,7 @@ static const USBDesc desc_keyboard = {
     },
     .full = &desc_device_keyboard,
     .str  = desc_strings,
+    .msos = &desc_msos_suspend,
 };
 
 static const uint8_t qemu_mouse_hid_report_descriptor[] = {
diff --git a/hw/virtio/dataplane/vring.c b/hw/virtio/dataplane/vring.c
index 250d45ec3d..665a1ffcb3 100644
--- a/hw/virtio/dataplane/vring.c
+++ b/hw/virtio/dataplane/vring.c
@@ -376,7 +376,7 @@ int vring_pop(VirtIODevice *vdev, Vring *vring,
         barrier();
 
         if (desc.flags & VRING_DESC_F_INDIRECT) {
-            int ret = get_indirect(vring, elem, &desc);
+            ret = get_indirect(vring, elem, &desc);
             if (ret < 0) {
                 goto out;
             }
diff --git a/include/block/block.h b/include/block/block.h
index 36efaeac2d..963a61fa4c 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -184,7 +184,11 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top);
 int bdrv_parse_cache_flags(const char *mode, int *flags);
 int bdrv_parse_discard_flags(const char *mode, int *flags);
 int bdrv_file_open(BlockDriverState **pbs, const char *filename,
-                   QDict *options, int flags, Error **errp);
+                   const char *reference, QDict *options, int flags,
+                   Error **errp);
+int bdrv_open_image(BlockDriverState **pbs, const char *filename,
+                    QDict *options, const char *bdref_key, int flags,
+                    bool force_raw, bool allow_none, Error **errp);
 int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp);
 int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
               int flags, BlockDriver *drv, Error **errp);
@@ -220,7 +224,6 @@ BlockDriverAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs, int64_t sector_num
                                         int nb_sectors, BdrvRequestFlags flags,
                                         BlockDriverCompletionFunc *cb, void *opaque);
 int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags);
-int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov);
 int bdrv_pread(BlockDriverState *bs, int64_t offset,
                void *buf, int count);
 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
@@ -249,6 +252,7 @@ int bdrv_truncate(BlockDriverState *bs, int64_t offset);
 int64_t bdrv_getlength(BlockDriverState *bs);
 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
+int bdrv_refresh_limits(BlockDriverState *bs);
 int bdrv_commit(BlockDriverState *bs);
 int bdrv_commit_all(void);
 int bdrv_change_backing_file(BlockDriverState *bs,
@@ -283,16 +287,16 @@ int bdrv_amend_options(BlockDriverState *bs_new, QEMUOptionParameter *options);
 /* external snapshots */
 
 typedef enum {
-    EXT_SNAPSHOT_ALLOWED,
-    EXT_SNAPSHOT_FORBIDDEN,
-} ExtSnapshotPerm;
+    BS_IS_A_FILTER,
+    BS_FILTER_PASS_DOWN,
+    BS_AUTHORIZATION_COUNT,
+} BsAuthorization;
 
-/* return EXT_SNAPSHOT_ALLOWED if external snapshot is allowed
- * return EXT_SNAPSHOT_FORBIDDEN if external snapshot is forbidden
- */
-ExtSnapshotPerm bdrv_check_ext_snapshot(BlockDriverState *bs);
-/* helper used to forbid external snapshots like in blkverify */
-ExtSnapshotPerm bdrv_check_ext_snapshot_forbidden(BlockDriverState *bs);
+bool bdrv_generic_is_first_non_filter(BlockDriverState *bs,
+                                      BlockDriverState *candidate);
+bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
+                                      BlockDriverState *candidate);
+bool bdrv_is_first_non_filter(BlockDriverState *candidate);
 
 /* async block I/O */
 typedef void BlockDriverDirtyHandler(BlockDriverState *bs, int64_t sector,
@@ -374,6 +378,11 @@ void bdrv_lock_medium(BlockDriverState *bs, bool locked);
 void bdrv_eject(BlockDriverState *bs, bool eject_flag);
 const char *bdrv_get_format_name(BlockDriverState *bs);
 BlockDriverState *bdrv_find(const char *name);
+BlockDriverState *bdrv_find_node(const char *node_name);
+BlockDeviceInfoList *bdrv_named_nodes_list(void);
+BlockDriverState *bdrv_lookup_bs(const char *device,
+                                 const char *node_name,
+                                 Error **errp);
 BlockDriverState *bdrv_next(BlockDriverState *bs);
 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs),
                   void *opaque);
@@ -418,7 +427,10 @@ void bdrv_img_create(const char *filename, const char *fmt,
                      char *options, uint64_t img_size, int flags,
                      Error **errp, bool quiet);
 
-void bdrv_set_buffer_alignment(BlockDriverState *bs, int align);
+/* Returns the alignment in bytes that is required so that no bounce buffer
+ * is required throughout the stack */
+size_t bdrv_opt_mem_align(BlockDriverState *bs);
+void bdrv_set_guest_block_size(BlockDriverState *bs, int align);
 void *qemu_blockalign(BlockDriverState *bs, size_t size);
 bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
 
@@ -515,6 +527,14 @@ typedef enum {
     BLKDBG_FLUSH_TO_OS,
     BLKDBG_FLUSH_TO_DISK,
 
+    BLKDBG_PWRITEV_RMW_HEAD,
+    BLKDBG_PWRITEV_RMW_AFTER_HEAD,
+    BLKDBG_PWRITEV_RMW_TAIL,
+    BLKDBG_PWRITEV_RMW_AFTER_TAIL,
+    BLKDBG_PWRITEV,
+    BLKDBG_PWRITEV_ZERO,
+    BLKDBG_PWRITEV_DONE,
+
     BLKDBG_EVENT_MAX,
 } BlkDebugEvent;
 
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 2772f2f1bd..0bcf1c9b8c 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -57,22 +57,35 @@
 
 typedef struct BdrvTrackedRequest {
     BlockDriverState *bs;
-    int64_t sector_num;
-    int nb_sectors;
+    int64_t offset;
+    unsigned int bytes;
     bool is_write;
+
+    bool serialising;
+    int64_t overlap_offset;
+    unsigned int overlap_bytes;
+
     QLIST_ENTRY(BdrvTrackedRequest) list;
     Coroutine *co; /* owner, used for deadlock detection */
     CoQueue wait_queue; /* coroutines blocked on this request */
+
+    struct BdrvTrackedRequest *waiting_for;
 } BdrvTrackedRequest;
 
 struct BlockDriver {
     const char *format_name;
     int instance_size;
 
-    /* if not defined external snapshots are allowed
-     * future block filters will query their children to build the response
+    /* this table of boolean contains authorizations for the block operations */
+    bool authorizations[BS_AUTHORIZATION_COUNT];
+    /* for snapshots complex block filter like Quorum can implement the
+     * following recursive callback instead of BS_IS_A_FILTER.
+     * It's purpose is to recurse on the filter children while calling
+     * bdrv_recurse_is_first_non_filter on them.
+     * For a sample implementation look in the future Quorum block filter.
      */
-    ExtSnapshotPerm (*bdrv_check_ext_snapshot)(BlockDriverState *bs);
+    bool (*bdrv_recurse_is_first_non_filter)(BlockDriverState *bs,
+                                             BlockDriverState *candidate);
 
     int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
     int (*bdrv_probe_device)(const char *filename);
@@ -226,6 +239,8 @@ struct BlockDriver {
     int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag);
     bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag);
 
+    int (*bdrv_refresh_limits)(BlockDriverState *bs);
+
     /*
      * Returns 1 if newly created images are guaranteed to contain only
      * zeros, 0 otherwise.
@@ -250,6 +265,9 @@ typedef struct BlockLimits {
 
     /* optimal transfer length in sectors */
     int opt_transfer_length;
+
+    /* memory alignment so that no bounce buffer is needed */
+    size_t opt_mem_alignment;
 } BlockLimits;
 
 /*
@@ -291,8 +309,8 @@ struct BlockDriverState {
     /* Callback before write request is processed */
     NotifierWithReturnList before_write_notifiers;
 
-    /* number of in-flight copy-on-read requests */
-    unsigned int copy_on_read_in_flight;
+    /* number of in-flight serialising requests */
+    unsigned int serialising_in_flight;
 
     /* I/O throttling */
     ThrottleState throttle_state;
@@ -314,8 +332,11 @@ struct BlockDriverState {
     /* Whether produces zeros when read beyond eof */
     bool zero_beyond_eof;
 
-    /* the memory alignment required for the buffers handled by this driver */
-    int buffer_alignment;
+    /* Alignment requirement for offset/length of I/O requests */
+    unsigned int request_alignment;
+
+    /* the block size for which the guest device expects atomicity */
+    int guest_block_size;
 
     /* do we need to tell the quest if we have a volatile write cache? */
     int enable_write_cache;
@@ -325,11 +346,18 @@ struct BlockDriverState {
     BlockdevOnError on_read_error, on_write_error;
     bool iostatus_enabled;
     BlockDeviceIoStatus iostatus;
+
+    /* the following member gives a name to every node on the bs graph. */
+    char node_name[32];
+    /* element of the list of named nodes building the graph */
+    QTAILQ_ENTRY(BlockDriverState) node_list;
+    /* Device name is the name associated with the "drive" the guest sees */
     char device_name[32];
+    /* element of the list of "drives" the guest sees */
+    QTAILQ_ENTRY(BlockDriverState) device_list;
     QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
     int refcnt;
     int in_use; /* users other than guest access, eg. block migration */
-    QTAILQ_ENTRY(BlockDriverState) list;
 
     QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
 
diff --git a/include/block/qapi.h b/include/block/qapi.h
index 9518ee4001..e92c00daf6 100644
--- a/include/block/qapi.h
+++ b/include/block/qapi.h
@@ -29,6 +29,7 @@
 #include "block/block.h"
 #include "block/snapshot.h"
 
+BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs);
 int bdrv_query_snapshot_info_list(BlockDriverState *bs,
                                   SnapshotInfoList **p_list,
                                   Error **errp);
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index eb3da964f0..7fe2bd17f6 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -241,6 +241,7 @@ uint16_t pvpanic_port(void);
 int e820_add_entry(uint64_t, uint64_t, uint32_t);
 
 #define PC_Q35_COMPAT_1_7 \
+        PC_COMPAT_1_7, \
         {\
             .driver   = "hpet",\
             .property = HPET_INTCAP,\
@@ -259,7 +260,15 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t);
         PC_COMPAT_1_4, \
         PC_Q35_COMPAT_1_5
 
+#define PC_COMPAT_1_7 \
+        {\
+            .driver   = TYPE_USB_DEVICE,\
+            .property = "msos-desc",\
+            .value    = "no",\
+        }
+
 #define PC_COMPAT_1_6 \
+        PC_COMPAT_1_7, \
         {\
             .driver   = "e1000",\
             .property = "mitigation",\
diff --git a/include/hw/usb.h b/include/hw/usb.h
index 2a3ea0c92e..3ef7af7413 100644
--- a/include/hw/usb.h
+++ b/include/hw/usb.h
@@ -182,6 +182,7 @@ typedef struct USBDescIface USBDescIface;
 typedef struct USBDescEndpoint USBDescEndpoint;
 typedef struct USBDescOther USBDescOther;
 typedef struct USBDescString USBDescString;
+typedef struct USBDescMSOS USBDescMSOS;
 
 struct USBDescString {
     uint8_t index;
@@ -208,6 +209,8 @@ struct USBEndpoint {
 enum USBDeviceFlags {
     USB_DEV_FLAG_FULL_PATH,
     USB_DEV_FLAG_IS_HOST,
+    USB_DEV_FLAG_MSOS_DESC_ENABLE,
+    USB_DEV_FLAG_MSOS_DESC_IN_USE,
 };
 
 /* definition of a USB device */
diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
index 22d8b8f3e0..7e5f752b7a 100644
--- a/include/monitor/monitor.h
+++ b/include/monitor/monitor.h
@@ -5,7 +5,7 @@
 #include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qdict.h"
 #include "block/block.h"
-#include "monitor/readline.h"
+#include "qemu/readline.h"
 
 extern Monitor *cur_mon;
 extern Monitor *default_mon;
diff --git a/include/qapi/qmp/qdict.h b/include/qapi/qmp/qdict.h
index 5cefd8022a..1ddf97b1c3 100644
--- a/include/qapi/qmp/qdict.h
+++ b/include/qapi/qmp/qdict.h
@@ -68,5 +68,6 @@ QDict *qdict_clone_shallow(const QDict *src);
 void qdict_flatten(QDict *qdict);
 
 void qdict_extract_subqdict(QDict *src, QDict **dst, const char *start);
+void qdict_array_split(QDict *src, QList **dst);
 
 #endif /* QDICT_H */
diff --git a/include/qemu-io.h b/include/qemu-io.h
index a418b46a40..7e7c07c09b 100644
--- a/include/qemu-io.h
+++ b/include/qemu-io.h
@@ -42,5 +42,8 @@ bool qemuio_command(BlockDriverState *bs, const char *cmd);
 
 void qemuio_add_command(const cmdinfo_t *ci);
 int qemuio_command_usage(const cmdinfo_t *ci);
+void qemuio_complete_command(const char *input,
+                             void (*fn)(const char *cmd, void *opaque),
+                             void *opaque);
 
 #endif /* QEMU_IO_H */
diff --git a/include/qemu/config-file.h b/include/qemu/config-file.h
index 508428ff32..dbd97c4bdb 100644
--- a/include/qemu/config-file.h
+++ b/include/qemu/config-file.h
@@ -4,6 +4,7 @@
 #include <stdio.h>
 #include "qemu/option.h"
 #include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
 
 QemuOptsList *qemu_find_opts(const char *group);
 QemuOptsList *qemu_find_opts_err(const char *group, Error **errp);
@@ -18,6 +19,11 @@ int qemu_config_parse(FILE *fp, QemuOptsList **lists, const char *fname);
 
 int qemu_read_config_file(const char *filename);
 
+/* Parse QDict options as a replacement for a config file (allowing multiple
+   enumerated (0..(n-1)) configuration "sections") */
+void qemu_config_parse_qdict(QDict *options, QemuOptsList **lists,
+                             Error **errp);
+
 /* Read default QEMU config files
  */
 int qemu_read_default_config_files(bool userconfig);
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index b3e2b6d8ea..eac7172bcb 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -240,4 +240,6 @@ static inline void qemu_init_auxval(char **envp) { }
 void qemu_init_auxval(char **envp);
 #endif
 
+void qemu_set_tty_echo(int fd, bool echo);
+
 #endif
diff --git a/include/monitor/readline.h b/include/qemu/readline.h
index 0faf6e1db7..a89fe4a9a9 100644
--- a/include/monitor/readline.h
+++ b/include/qemu/readline.h
@@ -1,14 +1,15 @@
 #ifndef READLINE_H
 #define READLINE_H
 
-#include "qemu-common.h"
-
 #define READLINE_CMD_BUF_SIZE 4095
 #define READLINE_MAX_CMDS 64
 #define READLINE_MAX_COMPLETIONS 256
 
-typedef void ReadLineFunc(Monitor *mon, const char *str, void *opaque);
-typedef void ReadLineCompletionFunc(Monitor *mon,
+typedef void ReadLinePrintfFunc(void *opaque, const char *fmt, ...);
+typedef void ReadLineFlushFunc(void *opaque);
+typedef void ReadLineFunc(void *opaque, const char *str,
+                          void *readline_opaque);
+typedef void ReadLineCompletionFunc(void *opaque,
                                     const char *cmdline);
 
 typedef struct ReadLineState {
@@ -35,7 +36,10 @@ typedef struct ReadLineState {
     void *readline_opaque;
     int read_password;
     char prompt[256];
-    Monitor *mon;
+
+    ReadLinePrintfFunc *printf_func;
+    ReadLineFlushFunc *flush_func;
+    void *opaque;
 } ReadLineState;
 
 void readline_add_completion(ReadLineState *rs, const char *str);
@@ -46,11 +50,13 @@ const char *readline_get_history(ReadLineState *rs, unsigned int index);
 void readline_handle_byte(ReadLineState *rs, int ch);
 
 void readline_start(ReadLineState *rs, const char *prompt, int read_password,
-                    ReadLineFunc *readline_func, void *opaque);
+                    ReadLineFunc *readline_func, void *readline_opaque);
 void readline_restart(ReadLineState *rs);
 void readline_show_prompt(ReadLineState *rs);
 
-ReadLineState *readline_init(Monitor *mon,
+ReadLineState *readline_init(ReadLinePrintfFunc *printf_func,
+                             ReadLineFlushFunc *flush_func,
+                             void *opaque,
                              ReadLineCompletionFunc *completion_finder);
 
 #endif /* !READLINE_H */
diff --git a/monitor.c b/monitor.c
index 845f608665..80456fbe5b 100644
--- a/monitor.c
+++ b/monitor.c
@@ -37,7 +37,7 @@
 #include "ui/qemu-spice.h"
 #include "sysemu/sysemu.h"
 #include "monitor/monitor.h"
-#include "monitor/readline.h"
+#include "qemu/readline.h"
 #include "ui/console.h"
 #include "sysemu/blockdev.h"
 #include "audio/audio.h"
@@ -217,8 +217,8 @@ static const mon_cmd_t qmp_cmds[];
 Monitor *cur_mon;
 Monitor *default_mon;
 
-static void monitor_command_cb(Monitor *mon, const char *cmdline,
-                               void *opaque);
+static void monitor_command_cb(void *opaque, const char *cmdline,
+                               void *readline_opaque);
 
 static inline int qmp_cmd_mode(const Monitor *mon)
 {
@@ -4338,9 +4338,10 @@ static void monitor_find_completion_by_table(Monitor *mon,
     }
 }
 
-static void monitor_find_completion(Monitor *mon,
+static void monitor_find_completion(void *opaque,
                                     const char *cmdline)
 {
+    Monitor *mon = opaque;
     char *args[MAX_ARGS];
     int nb_args, len;
 
@@ -4751,8 +4752,11 @@ static void monitor_read(void *opaque, const uint8_t *buf, int size)
     cur_mon = old_mon;
 }
 
-static void monitor_command_cb(Monitor *mon, const char *cmdline, void *opaque)
+static void monitor_command_cb(void *opaque, const char *cmdline,
+                               void *readline_opaque)
 {
+    Monitor *mon = opaque;
+
     monitor_suspend(mon);
     handle_user_command(mon, cmdline);
     monitor_resume(mon);
@@ -4881,6 +4885,22 @@ static void sortcmdlist(void)
  * End:
  */
 
+/* These functions just adapt the readline interface in a typesafe way.  We
+ * could cast function pointers but that discards compiler checks.
+ */
+static void monitor_readline_printf(void *opaque, const char *fmt, ...)
+{
+    va_list ap;
+    va_start(ap, fmt);
+    monitor_vprintf(opaque, fmt, ap);
+    va_end(ap);
+}
+
+static void monitor_readline_flush(void *opaque)
+{
+    monitor_flush(opaque);
+}
+
 void monitor_init(CharDriverState *chr, int flags)
 {
     static int is_first_init = 1;
@@ -4898,7 +4918,10 @@ void monitor_init(CharDriverState *chr, int flags)
     mon->chr = chr;
     mon->flags = flags;
     if (flags & MONITOR_USE_READLINE) {
-        mon->rs = readline_init(mon, monitor_find_completion);
+        mon->rs = readline_init(monitor_readline_printf,
+                                monitor_readline_flush,
+                                mon,
+                                monitor_find_completion);
         monitor_read_command(mon, 0);
     }
 
@@ -4920,9 +4943,11 @@ void monitor_init(CharDriverState *chr, int flags)
         default_mon = mon;
 }
 
-static void bdrv_password_cb(Monitor *mon, const char *password, void *opaque)
+static void bdrv_password_cb(void *opaque, const char *password,
+                             void *readline_opaque)
 {
-    BlockDriverState *bs = opaque;
+    Monitor *mon = opaque;
+    BlockDriverState *bs = readline_opaque;
     int ret = 0;
 
     if (bdrv_set_key(bs, password) != 0) {
diff --git a/qapi-schema.json b/qapi-schema.json
index f27c48a285..05ced9d572 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -810,6 +810,8 @@
 #
 # @file: the filename of the backing device
 #
+# @node-name: #optional the name of the block driver node (Since 2.0)
+#
 # @ro: true if the backing device was open read-only
 #
 # @drv: the name of the block format used to open the backing device. As of
@@ -857,10 +859,9 @@
 #
 # Since: 0.14.0
 #
-# Notes: This interface is only found in @BlockInfo.
 ##
 { 'type': 'BlockDeviceInfo',
-  'data': { 'file': 'str', 'ro': 'bool', 'drv': 'str',
+  'data': { 'file': 'str', '*node-name': 'str', 'ro': 'bool', 'drv': 'str',
             '*backing_file': 'str', 'backing_file_depth': 'int',
             'encrypted': 'bool', 'encryption_key_missing': 'bool',
             'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
@@ -1022,15 +1023,17 @@
 #
 # @stats:  A @BlockDeviceStats for the device.
 #
-# @parent: #optional This may point to the backing block device if this is a
-#          a virtual block device.  If it's a backing block, this will point
-#          to the backing file is one is present.
+# @parent: #optional This describes the file block device if it has one.
+#
+# @backing: #optional This describes the backing block device if it has one.
+#           (Since 2.0)
 #
 # Since: 0.14.0
 ##
 { 'type': 'BlockStats',
   'data': {'*device': 'str', 'stats': 'BlockDeviceStats',
-           '*parent': 'BlockStats'} }
+           '*parent': 'BlockStats',
+           '*backing': 'BlockStats'} }
 
 ##
 # @query-blockstats:
@@ -1675,7 +1678,11 @@
 # determine which ones are encrypted, set the passwords with this command, and
 # then start the guest with the @cont command.
 #
-# @device:   the name of the device to set the password on
+# Either @device or @node-name must be set but not both.
+#
+# @device: #optional the name of the block backend device to set the password on
+#
+# @node-name: #optional graph node name to set the password on (Since 2.0)
 #
 # @password: the password to use for the device
 #
@@ -1689,7 +1696,8 @@
 #
 # Since: 0.14.0
 ##
-{ 'command': 'block_passwd', 'data': {'device': 'str', 'password': 'str'} }
+{ 'command': 'block_passwd', 'data': {'*device': 'str',
+                                      '*node-name': 'str', 'password': 'str'} }
 
 ##
 # @balloon:
@@ -1716,7 +1724,11 @@
 #
 # Resize a block image while a guest is running.
 #
-# @device:  the name of the device to get the image resized
+# Either @device or @node-name must be set but not both.
+#
+# @device: #optional the name of the device to get the image resized
+#
+# @node-name: #optional graph node name to get the image resized (Since 2.0)
 #
 # @size:  new image size in bytes
 #
@@ -1725,7 +1737,9 @@
 #
 # Since: 0.14.0
 ##
-{ 'command': 'block_resize', 'data': { 'device': 'str', 'size': 'int' }}
+{ 'command': 'block_resize', 'data': { '*device': 'str',
+                                       '*node-name': 'str',
+                                       'size': 'int' }}
 
 ##
 # @NewImageMode
@@ -1747,18 +1761,25 @@
 ##
 # @BlockdevSnapshot
 #
-# @device:  the name of the device to generate the snapshot from.
+# Either @device or @node-name must be set but not both.
+#
+# @device: #optional the name of the device to generate the snapshot from.
+#
+# @node-name: #optional graph node name to generate the snapshot from (Since 2.0)
 #
 # @snapshot-file: the target of the new image. A new file will be created.
 #
+# @snapshot-node-name: #optional the graph node name of the new image (Since 2.0)
+#
 # @format: #optional the format of the snapshot image, default is 'qcow2'.
 #
 # @mode: #optional whether and how QEMU should create a new image, default is
 #        'absolute-paths'.
 ##
 { 'type': 'BlockdevSnapshot',
-  'data': { 'device': 'str', 'snapshot-file': 'str', '*format': 'str',
-            '*mode': 'NewImageMode' } }
+  'data': { '*device': 'str', '*node-name': 'str',
+            'snapshot-file': 'str', '*snapshot-node-name': 'str',
+            '*format': 'str', '*mode': 'NewImageMode' } }
 
 ##
 # @BlockdevSnapshotInternal
@@ -1973,6 +1994,13 @@
 #                    user needs to complete the job with the block-job-complete
 #                    command after getting the ready event. (Since 2.0)
 #
+#                    If the base image is smaller than top, then the base image
+#                    will be resized to be the same size as top.  If top is
+#                    smaller than the base image, the base will not be
+#                    truncated.  If you want the base image size to match the
+#                    size of the smaller top, you can safely truncate it
+#                    yourself once the commit operation successfully completes.
+#
 #
 # @speed:  #optional the maximum speed, in bytes per second
 #
@@ -2009,6 +2037,17 @@
 { 'command': 'drive-backup', 'data': 'DriveBackup' }
 
 ##
+# @query-named-block-nodes
+#
+# Get the named block driver list
+#
+# Returns: the list of BlockDeviceInfo
+#
+# Since 2.0
+##
+{ 'command': 'query-named-block-nodes', 'returns': [ 'BlockDeviceInfo' ] }
+
+##
 # @drive-mirror
 #
 # Start mirroring a block device's writes to a new destination.
@@ -4090,6 +4129,7 @@
 # @id:          #optional id by which the new block device can be referred to.
 #               This is a required option on the top level of blockdev-add, and
 #               currently not allowed on any other level.
+# @node-name:   #optional the name of a block driver state node (Since 2.0)
 # @discard:     #optional discard-related options (default: ignore)
 # @cache:       #optional cache-related options
 # @aio:         #optional AIO backend (default: threads)
@@ -4105,6 +4145,7 @@
 { 'type': 'BlockdevOptionsBase',
   'data': { 'driver': 'str',
             '*id': 'str',
+            '*node-name': 'str',
             '*discard': 'BlockdevDiscardOptions',
             '*cache': 'BlockdevCacheOptions',
             '*aio': 'BlockdevAioOptions',
@@ -4201,6 +4242,116 @@
             '*pass-discard-other': 'bool' } }
 
 ##
+# @BlkdebugEvent
+#
+# Trigger events supported by blkdebug.
+##
+{ 'enum': 'BlkdebugEvent',
+  'data': [ 'l1_update', 'l1_grow.alloc_table', 'l1_grow.write_table',
+            'l1_grow.activate_table', 'l2_load', 'l2_update',
+            'l2_update_compressed', 'l2_alloc.cow_read', 'l2_alloc.write',
+            'read_aio', 'read_backing_aio', 'read_compressed', 'write_aio',
+            'write_compressed', 'vmstate_load', 'vmstate_save', 'cow_read',
+            'cow_write', 'reftable_load', 'reftable_grow', 'reftable_update',
+            'refblock_load', 'refblock_update', 'refblock_update_part',
+            'refblock_alloc', 'refblock_alloc.hookup', 'refblock_alloc.write',
+            'refblock_alloc.write_blocks', 'refblock_alloc.write_table',
+            'refblock_alloc.switch_table', 'cluster_alloc',
+            'cluster_alloc_bytes', 'cluster_free', 'flush_to_os',
+            'flush_to_disk' ] }
+
+##
+# @BlkdebugInjectErrorOptions
+#
+# Describes a single error injection for blkdebug.
+#
+# @event:       trigger event
+#
+# @state:       #optional the state identifier blkdebug needs to be in to
+#               actually trigger the event; defaults to "any"
+#
+# @errno:       #optional error identifier (errno) to be returned; defaults to
+#               EIO
+#
+# @sector:      #optional specifies the sector index which has to be affected
+#               in order to actually trigger the event; defaults to "any
+#               sector"
+#
+# @once:        #optional disables further events after this one has been
+#               triggered; defaults to false
+#
+# @immediately: #optional fail immediately; defaults to false
+#
+# Since: 2.0
+##
+{ 'type': 'BlkdebugInjectErrorOptions',
+  'data': { 'event': 'BlkdebugEvent',
+            '*state': 'int',
+            '*errno': 'int',
+            '*sector': 'int',
+            '*once': 'bool',
+            '*immediately': 'bool' } }
+
+##
+# @BlkdebugSetStateOptions
+#
+# Describes a single state-change event for blkdebug.
+#
+# @event:       trigger event
+#
+# @state:       #optional the current state identifier blkdebug needs to be in;
+#               defaults to "any"
+#
+# @new_state:   the state identifier blkdebug is supposed to assume if
+#               this event is triggered
+#
+# Since: 2.0
+##
+{ 'type': 'BlkdebugSetStateOptions',
+  'data': { 'event': 'BlkdebugEvent',
+            '*state': 'int',
+            'new_state': 'int' } }
+
+##
+# @BlockdevOptionsBlkdebug
+#
+# Driver specific block device options for blkdebug.
+#
+# @image:           underlying raw block device (or image file)
+#
+# @config:          #optional filename of the configuration file
+#
+# @align:           #optional required alignment for requests in bytes
+#
+# @inject-error:    #optional array of error injection descriptions
+#
+# @set-state:       #optional array of state-change descriptions
+#
+# Since: 2.0
+##
+{ 'type': 'BlockdevOptionsBlkdebug',
+  'data': { 'image': 'BlockdevRef',
+            '*config': 'str',
+            '*align': 'int',
+            '*inject-error': ['BlkdebugInjectErrorOptions'],
+            '*set-state': ['BlkdebugSetStateOptions'] } }
+
+##
+# @BlockdevOptionsBlkverify
+#
+# Driver specific block device options for blkverify.
+#
+# @test:    block device to be tested
+#
+# @raw:     raw image used for verification
+#
+# Since: 2.0
+##
+{ 'type': 'BlockdevOptionsBlkverify',
+  'data': { 'test': 'BlockdevRef',
+            'raw': 'BlockdevRef' } }
+
+##
 # @BlockdevOptions
 #
 # Options for creating a block device.
@@ -4224,10 +4375,8 @@
 # TODO sheepdog: Wait for structured options
 # TODO ssh: Should take InetSocketAddress for 'host'?
       'vvfat':      'BlockdevOptionsVVFAT',
-
-# TODO blkdebug: Wait for structured options
-# TODO blkverify: Wait for structured options
-
+      'blkdebug':   'BlockdevOptionsBlkdebug',
+      'blkverify':  'BlockdevOptionsBlkverify',
       'bochs':      'BlockdevOptionsGenericFormat',
       'cloop':      'BlockdevOptionsGenericFormat',
       'cow':        'BlockdevOptionsGenericCOWFormat',
diff --git a/qemu-doc.texi b/qemu-doc.texi
index 4e9c6e9b6e..ce61f30d6e 100644
--- a/qemu-doc.texi
+++ b/qemu-doc.texi
@@ -536,11 +536,11 @@ support of multiple VM snapshots.
 Supported options:
 @table @code
 @item compat
-Determines the qcow2 version to use. @code{compat=0.10} uses the traditional
-image format that can be read by any QEMU since 0.10 (this is the default).
+Determines the qcow2 version to use. @code{compat=0.10} uses the
+traditional image format that can be read by any QEMU since 0.10.
 @code{compat=1.1} enables image format extensions that only QEMU 1.1 and
-newer understand. Amongst others, this includes zero clusters, which allow
-efficient copy-on-read for sparse images.
+newer understand (this is the default). Amongst others, this includes
+zero clusters, which allow efficient copy-on-read for sparse images.
 
 @item backing_file
 File name of a base image (see @option{create} subcommand)
diff --git a/qemu-img.texi b/qemu-img.texi
index 1bba91efde..526d56a458 100644
--- a/qemu-img.texi
+++ b/qemu-img.texi
@@ -57,7 +57,9 @@ indicates that target image must be compressed (qcow format only)
 @item -h
 with or without a command shows help and lists the supported formats
 @item -p
-display progress bar (convert and rebase commands only)
+display progress bar (compare, convert and rebase commands only).
+If the @var{-p} option is not used for a command that supports it, the
+progress is reported when the process receives a @code{SIGUSR1} signal.
 @item -q
 Quiet mode - do not print any output (except errors). There's no progress bar
 in case both @var{-q} and @var{-p} options are used.
@@ -140,7 +142,12 @@ it doesn't need to be specified separately in this case.
 
 @item commit [-f @var{fmt}] [-t @var{cache}] @var{filename}
 
-Commit the changes recorded in @var{filename} in its base image.
+Commit the changes recorded in @var{filename} in its base image or backing file.
+If the backing file is smaller than the snapshot, then the backing file will be
+resized to be the same size as the snapshot.  If the snapshot is smaller than
+the backing file, the backing file will not be truncated.  If you want the
+backing file to match the size of the smaller snapshot, you can safely truncate
+it yourself once the commit operation successfully completes.
 
 @item compare [-f @var{fmt}] [-F @var{fmt}] [-p] [-s] [-q] @var{filename1} @var{filename2}
 
@@ -391,11 +398,11 @@ support of multiple VM snapshots.
 Supported options:
 @table @code
 @item compat
-Determines the qcow2 version to use. @code{compat=0.10} uses the traditional
-image format that can be read by any QEMU since 0.10 (this is the default).
+Determines the qcow2 version to use. @code{compat=0.10} uses the
+traditional image format that can be read by any QEMU since 0.10.
 @code{compat=1.1} enables image format extensions that only QEMU 1.1 and
-newer understand. Amongst others, this includes zero clusters, which allow
-efficient copy-on-read for sparse images.
+newer understand (this is the default). Amongst others, this includes zero
+clusters, which allow efficient copy-on-read for sparse images.
 
 @item backing_file
 File name of a base image (see @option{create} subcommand)
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index 85e4982bd8..f1de24c91c 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -12,6 +12,7 @@
 #include "block/block_int.h"
 #include "block/qapi.h"
 #include "qemu/main-loop.h"
+#include "qemu/timer.h"
 
 #define CMD_NOFILE_OK   0x01
 
@@ -94,6 +95,21 @@ static const cmdinfo_t *find_command(const char *cmd)
     return NULL;
 }
 
+/* Invoke fn() for commands with a matching prefix */
+void qemuio_complete_command(const char *input,
+                             void (*fn)(const char *cmd, void *opaque),
+                             void *opaque)
+{
+    cmdinfo_t *ct;
+    size_t input_len = strlen(input);
+
+    for (ct = cmdtab; ct < &cmdtab[ncmds]; ct++) {
+        if (strncmp(input, ct->name, input_len) == 0) {
+            fn(ct->name, opaque);
+        }
+    }
+}
+
 static char **breakline(char *input, int *count)
 {
     int c = 0;
@@ -2038,6 +2054,46 @@ static const cmdinfo_t abort_cmd = {
        .oneline        = "simulate a program crash using abort(3)",
 };
 
+static void sleep_cb(void *opaque)
+{
+    bool *expired = opaque;
+    *expired = true;
+}
+
+static int sleep_f(BlockDriverState *bs, int argc, char **argv)
+{
+    char *endptr;
+    long ms;
+    struct QEMUTimer *timer;
+    bool expired = false;
+
+    ms = strtol(argv[1], &endptr, 0);
+    if (ms < 0 || *endptr != '\0') {
+        printf("%s is not a valid number\n", argv[1]);
+        return 0;
+    }
+
+    timer = timer_new_ns(QEMU_CLOCK_HOST, sleep_cb, &expired);
+    timer_mod(timer, qemu_clock_get_ns(QEMU_CLOCK_HOST) + SCALE_MS * ms);
+
+    while (!expired) {
+        main_loop_wait(false);
+    }
+
+    timer_free(timer);
+
+    return 0;
+}
+
+static const cmdinfo_t sleep_cmd = {
+       .name           = "sleep",
+       .argmin         = 1,
+       .argmax         = 1,
+       .cfunc          = sleep_f,
+       .flags          = CMD_NOFILE_OK,
+       .oneline        = "waits for the given value in milliseconds",
+};
+
 static void help_oneline(const char *cmd, const cmdinfo_t *ct)
 {
     if (cmd) {
@@ -2151,4 +2207,5 @@ static void __attribute((constructor)) init_qemuio_commands(void)
     qemuio_add_command(&resume_cmd);
     qemuio_add_command(&wait_break_cmd);
     qemuio_add_command(&abort_cmd);
+    qemuio_add_command(&sleep_cmd);
 }
diff --git a/qemu-io.c b/qemu-io.c
index 3b3340ab1b..d6690289b8 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -18,6 +18,7 @@
 #include "qemu/main-loop.h"
 #include "qemu/option.h"
 #include "qemu/config-file.h"
+#include "qemu/readline.h"
 #include "block/block_int.h"
 #include "trace/control.h"
 
@@ -32,6 +33,8 @@ extern int qemuio_misalign;
 static int ncmdline;
 static char **cmdline;
 
+static ReadLineState *readline_state;
+
 static int close_f(BlockDriverState *bs, int argc, char **argv)
 {
     bdrv_unref(bs);
@@ -56,7 +59,7 @@ static int openfile(char *name, int flags, int growable, QDict *opts)
     }
 
     if (growable) {
-        if (bdrv_file_open(&qemuio_bs, name, opts, flags, &local_err)) {
+        if (bdrv_file_open(&qemuio_bs, name, NULL, opts, flags, &local_err)) {
             fprintf(stderr, "%s: can't open device %s: %s\n", progname, name,
                     error_get_pretty(local_err));
             error_free(local_err);
@@ -160,11 +163,13 @@ static int open_f(BlockDriverState *bs, int argc, char **argv)
         flags |= BDRV_O_RDWR;
     }
 
-    if (optind != argc - 1) {
+    if (optind == argc - 1) {
+        return openfile(argv[optind], flags, growable, opts);
+    } else if (optind == argc) {
+        return openfile(NULL, flags, growable, opts);
+    } else {
         return qemuio_command_usage(&open_cmd);
     }
-
-    return openfile(argv[optind], flags, growable, opts);
 }
 
 static int quit_f(BlockDriverState *bs, int argc, char **argv)
@@ -203,14 +208,6 @@ static void usage(const char *name)
     name);
 }
 
-
-#if defined(ENABLE_READLINE)
-# include <readline/history.h>
-# include <readline/readline.h>
-#elif defined(ENABLE_EDITLINE)
-# include <histedit.h>
-#endif
-
 static char *get_prompt(void)
 {
     static char prompt[FILENAME_MAX + 2 /*"> "*/ + 1 /*"\0"*/ ];
@@ -222,52 +219,53 @@ static char *get_prompt(void)
     return prompt;
 }
 
-#if defined(ENABLE_READLINE)
-static char *fetchline(void)
+static void readline_printf_func(void *opaque, const char *fmt, ...)
 {
-    char *line = readline(get_prompt());
-    if (line && *line) {
-        add_history(line);
-    }
-    return line;
+    va_list ap;
+    va_start(ap, fmt);
+    vprintf(fmt, ap);
+    va_end(ap);
 }
-#elif defined(ENABLE_EDITLINE)
-static char *el_get_prompt(EditLine *e)
+
+static void readline_flush_func(void *opaque)
 {
-    return get_prompt();
+    fflush(stdout);
 }
 
-static char *fetchline(void)
+static void readline_func(void *opaque, const char *str, void *readline_opaque)
 {
-    static EditLine *el;
-    static History *hist;
-    HistEvent hevent;
-    char *line;
-    int count;
-
-    if (!el) {
-        hist = history_init();
-        history(hist, &hevent, H_SETSIZE, 100);
-        el = el_init(progname, stdin, stdout, stderr);
-        el_source(el, NULL);
-        el_set(el, EL_SIGNAL, 1);
-        el_set(el, EL_PROMPT, el_get_prompt);
-        el_set(el, EL_HIST, history, (const char *)hist);
-    }
-    line = strdup(el_gets(el, &count));
-    if (line) {
-        if (count > 0) {
-            line[count-1] = '\0';
-        }
-        if (*line) {
-            history(hist, &hevent, H_ENTER, line);
+    char **line = readline_opaque;
+    *line = g_strdup(str);
+}
+
+static void completion_match(const char *cmd, void *opaque)
+{
+    readline_add_completion(readline_state, cmd);
+}
+
+static void readline_completion_func(void *opaque, const char *str)
+{
+    readline_set_completion_index(readline_state, strlen(str));
+    qemuio_complete_command(str, completion_match, NULL);
+}
+
+static char *fetchline_readline(void)
+{
+    char *line = NULL;
+
+    readline_start(readline_state, get_prompt(), 0, readline_func, &line);
+    while (!line) {
+        int ch = getchar();
+        if (ch == EOF) {
+            break;
         }
+        readline_handle_byte(readline_state, ch);
     }
     return line;
 }
-#else
-# define MAXREADLINESZ 1024
-static char *fetchline(void)
+
+#define MAXREADLINESZ 1024
+static char *fetchline_fgets(void)
 {
     char *p, *line = g_malloc(MAXREADLINESZ);
 
@@ -283,7 +281,15 @@ static char *fetchline(void)
 
     return line;
 }
-#endif
+
+static char *fetchline(void)
+{
+    if (readline_state) {
+        return fetchline_readline();
+    } else {
+        return fetchline_fgets();
+    }
+}
 
 static void prep_fetchline(void *opaque)
 {
@@ -339,6 +345,11 @@ static void add_user_command(char *optarg)
     cmdline[ncmdline-1] = optarg;
 }
 
+static void reenable_tty_echo(void)
+{
+    qemu_set_tty_echo(STDIN_FILENO, true);
+}
+
 int main(int argc, char **argv)
 {
     int readonly = 0;
@@ -435,6 +446,15 @@ int main(int argc, char **argv)
     qemuio_add_command(&open_cmd);
     qemuio_add_command(&close_cmd);
 
+    if (isatty(STDIN_FILENO)) {
+        readline_state = readline_init(readline_printf_func,
+                                       readline_flush_func,
+                                       NULL,
+                                       readline_completion_func);
+        qemu_set_tty_echo(STDIN_FILENO, false);
+        atexit(reenable_tty_echo);
+    }
+
     /* open the device */
     if (!readonly) {
         flags |= BDRV_O_RDWR;
@@ -453,5 +473,6 @@ int main(int argc, char **argv)
     if (qemuio_bs) {
         bdrv_unref(qemuio_bs);
     }
+    g_free(readline_state);
     return 0;
 }
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 02cc815bc5..cce6b81da4 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -931,7 +931,7 @@ EQMP
 
     {
         .name       = "block_resize",
-        .args_type  = "device:B,size:o",
+        .args_type  = "device:s?,node-name:s?,size:o",
         .mhandler.cmd_new = qmp_marshal_input_block_resize,
     },
 
@@ -944,6 +944,7 @@ Resize a block image while a guest is running.
 Arguments:
 
 - "device": the device's ID, must be unique (json-string)
+- "node-name": the node name in the block driver state graph (json-string)
 - "size": new size
 
 Example:
@@ -965,6 +966,45 @@ EQMP
         .mhandler.cmd_new = qmp_marshal_input_block_commit,
     },
 
+SQMP
+block-commit
+------------
+
+Live commit of data from overlay image nodes into backing nodes - i.e., writes
+data between 'top' and 'base' into 'base'.
+
+Arguments:
+
+- "device": The device's ID, must be unique (json-string)
+- "base": The file name of the backing image to write data into.
+          If not specified, this is the deepest backing image
+          (json-string, optional)
+- "top":  The file name of the backing image within the image chain,
+          which contains the topmost data to be committed down.
+
+          If top == base, that is an error.
+          If top == active, the job will not be completed by itself,
+          user needs to complete the job with the block-job-complete
+          command after getting the ready event. (Since 2.0)
+
+          If the base image is smaller than top, then the base image
+          will be resized to be the same size as top.  If top is
+          smaller than the base image, the base will not be
+          truncated.  If you want the base image size to match the
+          size of the smaller top, you can safely truncate it
+          yourself once the commit operation successfully completes.
+          (json-string)
+- "speed":  the maximum speed, in bytes per second (json-int, optional)
+
+
+Example:
+
+-> { "execute": "block-commit", "arguments": { "device": "virtio0",
+                                              "top": "/tmp/snap1.qcow2" } }
+<- { "return": {} }
+
+EQMP
+
     {
         .name       = "drive-backup",
         .args_type  = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?,"
@@ -1088,7 +1128,9 @@ actions array:
     - "data": a dictionary.  The contents depend on the value
       of "type".  When "type" is "blockdev-snapshot-sync":
       - "device": device name to snapshot (json-string)
+      - "node-name": graph node name to snapshot (json-string)
       - "snapshot-file": name of new image file (json-string)
+      - "snapshot-node-name": graph node name of the new snapshot (json-string)
       - "format": format of new image (json-string, optional)
       - "mode": whether and how QEMU should create the snapshot file
         (NewImageMode, optional, default "absolute-paths")
@@ -1103,6 +1145,11 @@ Example:
          { 'type': 'blockdev-snapshot-sync', 'data' : { "device": "ide-hd0",
                                          "snapshot-file": "/some/place/my-image",
                                          "format": "qcow2" } },
+         { 'type': 'blockdev-snapshot-sync', 'data' : { "node-name": "myfile",
+                                         "snapshot-file": "/some/place/my-image2",
+                                         "snapshot-node-name": "node3432",
+                                         "mode": "existing",
+                                         "format": "qcow2" } },
          { 'type': 'blockdev-snapshot-sync', 'data' : { "device": "ide-hd1",
                                          "snapshot-file": "/some/place/my-image2",
                                          "mode": "existing",
@@ -1116,7 +1163,7 @@ EQMP
 
     {
         .name       = "blockdev-snapshot-sync",
-        .args_type  = "device:B,snapshot-file:s,format:s?,mode:s?",
+        .args_type  = "device:s?,node-name:s?,snapshot-file:s,snapshot-node-name:s?,format:s?,mode:s?",
         .mhandler.cmd_new = qmp_marshal_input_blockdev_snapshot_sync,
     },
 
@@ -1133,7 +1180,9 @@ snapshot image, default is qcow2.
 Arguments:
 
 - "device": device name to snapshot (json-string)
+- "node-name": graph node name to snapshot (json-string)
 - "snapshot-file": name of new image file (json-string)
+- "snapshot-node-name": graph node name of the new snapshot (json-string)
 - "mode": whether and how QEMU should create the snapshot file
   (NewImageMode, optional, default "absolute-paths")
 - "format": format of new image (json-string, optional)
@@ -1503,7 +1552,7 @@ EQMP
 
     {
         .name       = "block_passwd",
-        .args_type  = "device:B,password:s",
+        .args_type  = "device:s?,node-name:s?,password:s",
         .mhandler.cmd_new = qmp_marshal_input_block_passwd,
     },
 
@@ -1516,6 +1565,7 @@ Set the password of encrypted block devices.
 Arguments:
 
 - "device": device name (json-string)
+- "node-name": name in the block driver state graph (json-string)
 - "password": password (json-string)
 
 Example:
@@ -3346,3 +3396,64 @@ Example (2):
 <- { "return": {} }
 
 EQMP
+
+    {
+        .name       = "query-named-block-nodes",
+        .args_type  = "",
+        .mhandler.cmd_new = qmp_marshal_input_query_named_block_nodes,
+    },
+
+SQMP
+@query-named-block-nodes
+------------------------
+
+Return a list of BlockDeviceInfo for all the named block driver nodes
+
+Example:
+
+-> { "execute": "query-named-block-nodes" }
+<- { "return": [ { "ro":false,
+                   "drv":"qcow2",
+                   "encrypted":false,
+                   "file":"disks/test.qcow2",
+                   "node-name": "my-node",
+                   "backing_file_depth":1,
+                   "bps":1000000,
+                   "bps_rd":0,
+                   "bps_wr":0,
+                   "iops":1000000,
+                   "iops_rd":0,
+                   "iops_wr":0,
+                   "bps_max": 8000000,
+                   "bps_rd_max": 0,
+                   "bps_wr_max": 0,
+                   "iops_max": 0,
+                   "iops_rd_max": 0,
+                   "iops_wr_max": 0,
+                   "iops_size": 0,
+                   "image":{
+                      "filename":"disks/test.qcow2",
+                      "format":"qcow2",
+                      "virtual-size":2048000,
+                      "backing_file":"base.qcow2",
+                      "full-backing-filename":"disks/base.qcow2",
+                      "backing-filename-format:"qcow2",
+                      "snapshots":[
+                         {
+                            "id": "1",
+                            "name": "snapshot1",
+                            "vm-state-size": 0,
+                            "date-sec": 10000200,
+                            "date-nsec": 12,
+                            "vm-clock-sec": 206,
+                            "vm-clock-nsec": 30
+                         }
+                      ],
+                      "backing-image":{
+                          "filename":"disks/base.qcow2",
+                          "format":"qcow2",
+                          "virtual-size":2048000
+                      }
+                   } } ] }
+
+EQMP
diff --git a/qobject/qdict.c b/qobject/qdict.c
index 17e14f08b1..a3924f24bd 100644
--- a/qobject/qdict.c
+++ b/qobject/qdict.c
@@ -477,7 +477,43 @@ static void qdict_destroy_obj(QObject *obj)
     g_free(qdict);
 }
 
-static void qdict_do_flatten(QDict *qdict, QDict *target, const char *prefix)
+static void qdict_flatten_qdict(QDict *qdict, QDict *target,
+                                const char *prefix);
+
+static void qdict_flatten_qlist(QList *qlist, QDict *target, const char *prefix)
+{
+    QObject *value;
+    const QListEntry *entry;
+    char *new_key;
+    int i;
+
+    /* This function is never called with prefix == NULL, i.e., it is always
+     * called from within qdict_flatten_q(list|dict)(). Therefore, it does not
+     * need to remove list entries during the iteration (the whole list will be
+     * deleted eventually anyway from qdict_flatten_qdict()). */
+    assert(prefix);
+
+    entry = qlist_first(qlist);
+
+    for (i = 0; entry; entry = qlist_next(entry), i++) {
+        value = qlist_entry_obj(entry);
+        new_key = g_strdup_printf("%s.%i", prefix, i);
+
+        if (qobject_type(value) == QTYPE_QDICT) {
+            qdict_flatten_qdict(qobject_to_qdict(value), target, new_key);
+        } else if (qobject_type(value) == QTYPE_QLIST) {
+            qdict_flatten_qlist(qobject_to_qlist(value), target, new_key);
+        } else {
+            /* All other types are moved to the target unchanged. */
+            qobject_incref(value);
+            qdict_put_obj(target, new_key, value);
+        }
+
+        g_free(new_key);
+    }
+}
+
+static void qdict_flatten_qdict(QDict *qdict, QDict *target, const char *prefix)
 {
     QObject *value;
     const QDictEntry *entry, *next;
@@ -500,8 +536,12 @@ static void qdict_do_flatten(QDict *qdict, QDict *target, const char *prefix)
         if (qobject_type(value) == QTYPE_QDICT) {
             /* Entries of QDicts are processed recursively, the QDict object
              * itself disappears. */
-            qdict_do_flatten(qobject_to_qdict(value), target,
-                             new_key ? new_key : entry->key);
+            qdict_flatten_qdict(qobject_to_qdict(value), target,
+                                new_key ? new_key : entry->key);
+            delete = true;
+        } else if (qobject_type(value) == QTYPE_QLIST) {
+            qdict_flatten_qlist(qobject_to_qlist(value), target,
+                                new_key ? new_key : entry->key);
             delete = true;
         } else if (prefix) {
             /* All other objects are moved to the target unchanged. */
@@ -526,12 +566,14 @@ static void qdict_do_flatten(QDict *qdict, QDict *target, const char *prefix)
 
 /**
  * qdict_flatten(): For each nested QDict with key x, all fields with key y
- * are moved to this QDict and their key is renamed to "x.y". This operation
- * is applied recursively for nested QDicts.
+ * are moved to this QDict and their key is renamed to "x.y". For each nested
+ * QList with key x, the field at index y is moved to this QDict with the key
+ * "x.y" (i.e., the reverse of what qdict_array_split() does).
+ * This operation is applied recursively for nested QDicts and QLists.
  */
 void qdict_flatten(QDict *qdict)
 {
-    qdict_do_flatten(qdict, qdict, NULL);
+    qdict_flatten_qdict(qdict, qdict, NULL);
 }
 
 /* extract all the src QDict entries starting by start into dst */
@@ -554,3 +596,40 @@ void qdict_extract_subqdict(QDict *src, QDict **dst, const char *start)
         entry = next;
     }
 }
+
+/**
+ * qdict_array_split(): This function moves array-like elements of a QDict into
+ * a new QList of QDicts. Every entry in the original QDict with a key prefixed
+ * "%u.", where %u designates an unsigned integer starting at 0 and
+ * incrementally counting up, will be moved to a new QDict at index %u in the
+ * output QList with the key prefix removed. The function terminates when there
+ * is no entry in the QDict with a prefix directly (incrementally) following the
+ * last one.
+ * Example: {"0.a": 42, "0.b": 23, "1.x": 0, "3.y": 1, "o.o": 7}
+ *      (or {"1.x": 0, "3.y": 1, "0.a": 42, "o.o": 7, "0.b": 23})
+ *       => [{"a": 42, "b": 23}, {"x": 0}]
+ *      and {"3.y": 1, "o.o": 7} (remainder of the old QDict)
+ */
+void qdict_array_split(QDict *src, QList **dst)
+{
+    unsigned i;
+
+    *dst = qlist_new();
+
+    for (i = 0; i < UINT_MAX; i++) {
+        QDict *subqdict;
+        char prefix[32];
+        size_t snprintf_ret;
+
+        snprintf_ret = snprintf(prefix, 32, "%u.", i);
+        assert(snprintf_ret < 32);
+
+        qdict_extract_subqdict(src, &subqdict, prefix);
+        if (!qdict_size(subqdict)) {
+            QDECREF(subqdict);
+            break;
+        }
+
+        qlist_append_obj(*dst, QOBJECT(subqdict));
+    }
+}
diff --git a/scripts/qapi.py b/scripts/qapi.py
index 750e9fb552..9b3de4c7c3 100644
--- a/scripts/qapi.py
+++ b/scripts/qapi.py
@@ -247,7 +247,7 @@ def c_var(name, protect=True):
                      'and', 'and_eq', 'bitand', 'bitor', 'compl', 'not',
                      'not_eq', 'or', 'or_eq', 'xor', 'xor_eq'])
     # namespace pollution:
-    polluted_words = set(['unix'])
+    polluted_words = set(['unix', 'errno'])
     if protect and (name in c89_words | c99_words | c11_words | gcc_words | cpp_words | polluted_words):
         return "q_" + name
     return name.replace('-', '_').lstrip("*")
diff --git a/tests/Makefile b/tests/Makefile
index 0aaf657be5..fd36eee641 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -151,7 +151,7 @@ tests/check-qdict$(EXESUF): tests/check-qdict.o libqemuutil.a
 tests/check-qlist$(EXESUF): tests/check-qlist.o libqemuutil.a
 tests/check-qfloat$(EXESUF): tests/check-qfloat.o libqemuutil.a
 tests/check-qjson$(EXESUF): tests/check-qjson.o libqemuutil.a libqemustub.a
-tests/check-qom-interface$(EXESUF): tests/check-qom-interface.o $(qom-core-obj) libqemuutil.a
+tests/check-qom-interface$(EXESUF): tests/check-qom-interface.o $(qom-core-obj) libqemuutil.a libqemustub.a
 tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(block-obj-y) libqemuutil.a libqemustub.a
 tests/test-aio$(EXESUF): tests/test-aio.o $(block-obj-y) libqemuutil.a libqemustub.a
 tests/test-throttle$(EXESUF): tests/test-throttle.o $(block-obj-y) libqemuutil.a libqemustub.a
diff --git a/tests/check-qdict.c b/tests/check-qdict.c
index dc5f05a85f..7a7461b0b2 100644
--- a/tests/check-qdict.c
+++ b/tests/check-qdict.c
@@ -227,6 +227,160 @@ static void qdict_iterapi_test(void)
     QDECREF(tests_dict);
 }
 
+static void qdict_flatten_test(void)
+{
+    QList *list1 = qlist_new();
+    QList *list2 = qlist_new();
+    QDict *dict1 = qdict_new();
+    QDict *dict2 = qdict_new();
+    QDict *dict3 = qdict_new();
+
+    /*
+     * Test the flattening of
+     *
+     * {
+     *     "e": [
+     *         42,
+     *         [
+     *             23,
+     *             66,
+     *             {
+     *                 "a": 0,
+     *                 "b": 1
+     *             }
+     *         ]
+     *     ],
+     *     "f": {
+     *         "c": 2,
+     *         "d": 3,
+     *     },
+     *     "g": 4
+     * }
+     *
+     * to
+     *
+     * {
+     *     "e.0": 42,
+     *     "e.1.0": 23,
+     *     "e.1.1": 66,
+     *     "e.1.2.a": 0,
+     *     "e.1.2.b": 1,
+     *     "f.c": 2,
+     *     "f.d": 3,
+     *     "g": 4
+     * }
+     */
+
+    qdict_put(dict1, "a", qint_from_int(0));
+    qdict_put(dict1, "b", qint_from_int(1));
+
+    qlist_append_obj(list1, QOBJECT(qint_from_int(23)));
+    qlist_append_obj(list1, QOBJECT(qint_from_int(66)));
+    qlist_append_obj(list1, QOBJECT(dict1));
+    qlist_append_obj(list2, QOBJECT(qint_from_int(42)));
+    qlist_append_obj(list2, QOBJECT(list1));
+
+    qdict_put(dict2, "c", qint_from_int(2));
+    qdict_put(dict2, "d", qint_from_int(3));
+    qdict_put_obj(dict3, "e", QOBJECT(list2));
+    qdict_put_obj(dict3, "f", QOBJECT(dict2));
+    qdict_put(dict3, "g", qint_from_int(4));
+
+    qdict_flatten(dict3);
+
+    g_assert(qdict_get_int(dict3, "e.0") == 42);
+    g_assert(qdict_get_int(dict3, "e.1.0") == 23);
+    g_assert(qdict_get_int(dict3, "e.1.1") == 66);
+    g_assert(qdict_get_int(dict3, "e.1.2.a") == 0);
+    g_assert(qdict_get_int(dict3, "e.1.2.b") == 1);
+    g_assert(qdict_get_int(dict3, "f.c") == 2);
+    g_assert(qdict_get_int(dict3, "f.d") == 3);
+    g_assert(qdict_get_int(dict3, "g") == 4);
+
+    g_assert(qdict_size(dict3) == 8);
+
+    QDECREF(dict3);
+}
+
+static void qdict_array_split_test(void)
+{
+    QDict *test_dict = qdict_new();
+    QDict *dict1, *dict2;
+    QList *test_list;
+
+    /*
+     * Test the split of
+     *
+     * {
+     *     "1.x": 0,
+     *     "3.y": 1,
+     *     "0.a": 42,
+     *     "o.o": 7,
+     *     "0.b": 23
+     * }
+     *
+     * to
+     *
+     * [
+     *     {
+     *         "a": 42,
+     *         "b": 23
+     *     },
+     *     {
+     *         "x": 0
+     *     }
+     * ]
+     *
+     * and
+     *
+     * {
+     *     "3.y": 1,
+     *     "o.o": 7
+     * }
+     *
+     * (remaining in the old QDict)
+     *
+     * This example is given in the comment of qdict_array_split().
+     */
+
+    qdict_put(test_dict, "1.x", qint_from_int(0));
+    qdict_put(test_dict, "3.y", qint_from_int(1));
+    qdict_put(test_dict, "0.a", qint_from_int(42));
+    qdict_put(test_dict, "o.o", qint_from_int(7));
+    qdict_put(test_dict, "0.b", qint_from_int(23));
+
+    qdict_array_split(test_dict, &test_list);
+
+    dict1 = qobject_to_qdict(qlist_pop(test_list));
+    dict2 = qobject_to_qdict(qlist_pop(test_list));
+
+    g_assert(dict1);
+    g_assert(dict2);
+    g_assert(qlist_empty(test_list));
+
+    QDECREF(test_list);
+
+    g_assert(qdict_get_int(dict1, "a") == 42);
+    g_assert(qdict_get_int(dict1, "b") == 23);
+
+    g_assert(qdict_size(dict1) == 2);
+
+    QDECREF(dict1);
+
+    g_assert(qdict_get_int(dict2, "x") == 0);
+
+    g_assert(qdict_size(dict2) == 1);
+
+    QDECREF(dict2);
+
+    g_assert(qdict_get_int(test_dict, "3.y") == 1);
+    g_assert(qdict_get_int(test_dict, "o.o") == 7);
+
+    g_assert(qdict_size(test_dict) == 2);
+
+    QDECREF(test_dict);
+}
+
 /*
  * Errors test-cases
  */
@@ -365,6 +519,8 @@ int main(int argc, char **argv)
     g_test_add_func("/public/del", qdict_del_test);
     g_test_add_func("/public/to_qdict", qobject_to_qdict_test);
     g_test_add_func("/public/iterapi", qdict_iterapi_test);
+    g_test_add_func("/public/flatten", qdict_flatten_test);
+    g_test_add_func("/public/array_split", qdict_array_split_test);
 
     g_test_add_func("/errors/put_exists", qdict_put_exists_test);
     g_test_add_func("/errors/get_not_exists", qdict_get_not_exists_test);
diff --git a/tests/fdc-test.c b/tests/fdc-test.c
index 38b5b178d0..37096dcc13 100644
--- a/tests/fdc-test.c
+++ b/tests/fdc-test.c
@@ -518,7 +518,6 @@ static void fuzz_registers(void)
 int main(int argc, char **argv)
 {
     const char *arch = qtest_get_arch();
-    char *cmdline;
     int fd;
     int ret;
 
@@ -538,9 +537,7 @@ int main(int argc, char **argv)
     /* Run the tests */
     g_test_init(&argc, &argv, NULL);
 
-    cmdline = g_strdup_printf("-vnc none ");
-
-    qtest_start(cmdline);
+    qtest_start(NULL);
     qtest_irq_intercept_in(global_qtest, "ioapic");
     qtest_add_func("/fdc/cmos", test_cmos);
     qtest_add_func("/fdc/no_media_on_start", test_no_media_on_start);
diff --git a/tests/ide-test.c b/tests/ide-test.c
index d5cec5a1fc..4a0d97f197 100644
--- a/tests/ide-test.c
+++ b/tests/ide-test.c
@@ -380,7 +380,6 @@ static void test_bmdma_no_busmaster(void)
 static void test_bmdma_setup(void)
 {
     ide_test_start(
-        "-vnc none "
         "-drive file=%s,if=ide,serial=%s,cache=writeback "
         "-global ide-hd.ver=%s",
         tmp_path, "testdisk", "version");
@@ -410,7 +409,6 @@ static void test_identify(void)
     int ret;
 
     ide_test_start(
-        "-vnc none "
         "-drive file=%s,if=ide,serial=%s,cache=writeback "
         "-global ide-hd.ver=%s",
         tmp_path, "testdisk", "version");
@@ -455,7 +453,6 @@ static void test_flush(void)
     uint8_t data;
 
     ide_test_start(
-        "-vnc none "
         "-drive file=blkdebug::%s,if=ide,cache=writeback",
         tmp_path);
 
diff --git a/tests/qemu-iotests/017 b/tests/qemu-iotests/017
index aba3faf712..3af3cdfbc3 100755
--- a/tests/qemu-iotests/017
+++ b/tests/qemu-iotests/017
@@ -43,6 +43,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 _supported_fmt qcow qcow2 vmdk qed
 _supported_proto generic
 _supported_os Linux
+_unsupported_imgopts "subformat=monolithicFlat" "subformat=twoGbMaxExtentFlat"
 
 TEST_OFFSETS="0 4294967296"
 
diff --git a/tests/qemu-iotests/018 b/tests/qemu-iotests/018
index 15fcfe5670..6f7f0545d0 100755
--- a/tests/qemu-iotests/018
+++ b/tests/qemu-iotests/018
@@ -43,6 +43,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 _supported_fmt qcow qcow2 vmdk qed
 _supported_proto generic
 _supported_os Linux
+_unsupported_imgopts "subformat=monolithicFlat" "subformat=twoGbMaxExtentFlat"
 
 TEST_OFFSETS="0 4294967296"
 
diff --git a/tests/qemu-iotests/019 b/tests/qemu-iotests/019
index 5bb18d0c0a..b43e70f3cb 100755
--- a/tests/qemu-iotests/019
+++ b/tests/qemu-iotests/019
@@ -47,6 +47,9 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 _supported_fmt qcow qcow2 vmdk qed
 _supported_proto generic
 _supported_os Linux
+_unsupported_imgopts "subformat=monolithicFlat" \
+                     "subformat=twoGbMaxExtentFlat" \
+                     "subformat=twoGbMaxExtentSparse"
 
 TEST_OFFSETS="0 4294967296"
 CLUSTER_SIZE=65536
diff --git a/tests/qemu-iotests/020 b/tests/qemu-iotests/020
index b3c86d844e..73a0429481 100755
--- a/tests/qemu-iotests/020
+++ b/tests/qemu-iotests/020
@@ -45,6 +45,9 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 _supported_fmt qcow qcow2 vmdk qed
 _supported_proto generic
 _supported_os Linux
+_unsupported_imgopts "subformat=monolithicFlat" \
+                     "subformat=twoGbMaxExtentFlat" \
+                     "subformat=twoGbMaxExtentSparse"
 
 TEST_OFFSETS="0 4294967296"
 
diff --git a/tests/qemu-iotests/034 b/tests/qemu-iotests/034
index 67f1959690..7349789583 100755
--- a/tests/qemu-iotests/034
+++ b/tests/qemu-iotests/034
@@ -41,6 +41,9 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 _supported_fmt qcow qcow2 vmdk qed
 _supported_proto generic
 _supported_os Linux
+_unsupported_imgopts "subformat=monolithicFlat" \
+                     "subformat=twoGbMaxExtentFlat" \
+                     "subformat=twoGbMaxExtentSparse"
 
 CLUSTER_SIZE=4k
 size=128M
diff --git a/tests/qemu-iotests/037 b/tests/qemu-iotests/037
index 743bae33d3..e444349e6d 100755
--- a/tests/qemu-iotests/037
+++ b/tests/qemu-iotests/037
@@ -41,6 +41,9 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 _supported_fmt qcow qcow2 vmdk qed
 _supported_proto generic
 _supported_os Linux
+_unsupported_imgopts "subformat=monolithicFlat" \
+                     "subformat=twoGbMaxExtentFlat" \
+                     "subformat=twoGbMaxExtentSparse"
 
 CLUSTER_SIZE=4k
 size=128M
diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out
index c2cadba2fc..d0c5173626 100644
--- a/tests/qemu-iotests/051.out
+++ b/tests/qemu-iotests/051.out
@@ -222,7 +222,7 @@ QEMU X.Y.Z monitor - type 'help' for more information
 (qemu) qququiquit
 
 Testing: -drive file=TEST_DIR/t.qcow2,file.driver=qcow2
-QEMU_PROG: -drive file=TEST_DIR/t.qcow2,file.driver=qcow2: could not open disk image TEST_DIR/t.qcow2: Can't use 'qcow2' as a block driver for the protocol level
+QEMU_PROG: -drive file=TEST_DIR/t.qcow2,file.driver=qcow2: could not open disk image TEST_DIR/t.qcow2: Block format 'qcow2' used by device '' doesn't support the option 'filename'
 
 
 === Parsing protocol from file name ===
diff --git a/tests/qemu-iotests/059 b/tests/qemu-iotests/059
index 65bea1d6c6..2d604d3a91 100755
--- a/tests/qemu-iotests/059
+++ b/tests/qemu-iotests/059
@@ -42,6 +42,9 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 _supported_fmt vmdk
 _supported_proto generic
 _supported_os Linux
+_unsupported_imgopts "subformat=monolithicFlat" \
+                     "subformat=twoGbMaxExtentFlat" \
+                     "subformat=twoGbMaxExtentSparse"
 
 capacity_offset=16
 granularity_offset=20
@@ -95,10 +98,23 @@ EOF
 _img_info
 
 echo
+echo "=== Testing truncated sparse ==="
+IMGOPTS="subformat=monolithicSparse" _make_test_img 100G
+truncate -s 10M $TEST_IMG
+_img_info
+
+echo
 echo "=== Testing version 3 ==="
 _use_sample_img iotest-version3.vmdk.bz2
 _img_info
 
+echo
+echo "=== Testing 4TB monolithicFlat creation and IO ==="
+IMGOPTS="subformat=monolithicFlat" _make_test_img 4T
+_img_info
+$QEMU_IO -c "write -P 0xa 900G 512" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -v 900G 1024" "$TEST_IMG" | _filter_qemu_io
+
 # success, all done
 echo "*** done"
 rm -f $seq.full
diff --git a/tests/qemu-iotests/059.out b/tests/qemu-iotests/059.out
index 16ab7c6c1f..4ffeb54710 100644
--- a/tests/qemu-iotests/059.out
+++ b/tests/qemu-iotests/059.out
@@ -2043,8 +2043,87 @@ qemu-img: Could not open 'TEST_DIR/t.IMGFMT': Invalid extent lines:
 RW 12582912 VMFS "dummy.IMGFMT" 1
 
 
+=== Testing truncated sparse ===
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=107374182400
+qemu-img: File truncated, expecting at least 13172736 bytes
+qemu-img: Could not open 'TEST_DIR/t.IMGFMT': Could not open 'TEST_DIR/t.IMGFMT': Wrong medium type
+
 === Testing version 3 ===
 image: TEST_DIR/iotest-version3.IMGFMT
 file format: IMGFMT
 virtual size: 1.0G (1073741824 bytes)
+
+=== Testing 4TB monolithicFlat creation and IO ===
+Formatting 'TEST_DIR/iotest-version3.IMGFMT', fmt=IMGFMT size=4398046511104
+image: TEST_DIR/iotest-version3.IMGFMT
+file format: IMGFMT
+virtual size: 4.0T (4398046511104 bytes)
+wrote 512/512 bytes at offset 966367641600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+e100000000:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  ................
+e100000010:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  ................
+e100000020:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  ................
+e100000030:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  ................
+e100000040:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  ................
+e100000050:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  ................
+e100000060:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  ................
+e100000070:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  ................
+e100000080:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  ................
+e100000090:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  ................
+e1000000a0:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  ................
+e1000000b0:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  ................
+e1000000c0:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  ................
+e1000000d0:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  ................
+e1000000e0:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  ................
+e1000000f0:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  ................
+e100000100:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  ................
+e100000110:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  ................
+e100000120:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  ................
+e100000130:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  ................
+e100000140:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  ................
+e100000150:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  ................
+e100000160:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  ................
+e100000170:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  ................
+e100000180:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  ................
+e100000190:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  ................
+e1000001a0:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  ................
+e1000001b0:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  ................
+e1000001c0:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  ................
+e1000001d0:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  ................
+e1000001e0:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  ................
+e1000001f0:  0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a  ................
+e100000200:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+e100000210:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+e100000220:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+e100000230:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+e100000240:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+e100000250:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+e100000260:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+e100000270:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+e100000280:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+e100000290:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+e1000002a0:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+e1000002b0:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+e1000002c0:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+e1000002d0:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+e1000002e0:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+e1000002f0:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+e100000300:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+e100000310:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+e100000320:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+e100000330:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+e100000340:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+e100000350:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+e100000360:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+e100000370:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+e100000380:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+e100000390:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+e1000003a0:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+e1000003b0:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+e1000003c0:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+e1000003d0:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+e1000003e0:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+e1000003f0:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+read 1024/1024 bytes at offset 966367641600
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 *** done
diff --git a/tests/qemu-iotests/063 b/tests/qemu-iotests/063
index 2ab8f20e02..77503a2984 100755
--- a/tests/qemu-iotests/063
+++ b/tests/qemu-iotests/063
@@ -44,6 +44,9 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 _supported_fmt qcow qcow2 vmdk qed raw
 _supported_proto generic
 _supported_os Linux
+_unsupported_imgopts "subformat=monolithicFlat" \
+                     "subformat=twoGbMaxExtentFlat" \
+                     "subformat=twoGbMaxExtentSparse"
 
 _make_test_img 4M
 
diff --git a/tests/qemu-iotests/069 b/tests/qemu-iotests/069
index 3042803a81..50347d91d2 100755
--- a/tests/qemu-iotests/069
+++ b/tests/qemu-iotests/069
@@ -41,6 +41,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 _supported_fmt cow qed qcow qcow2 vmdk
 _supported_proto generic
 _supported_os Linux
+_unsupported_imgopts "subformat=monolithicFlat" "subformat=twoGbMaxExtentFlat"
 
 IMG_SIZE=128K
 
diff --git a/tests/qemu-iotests/071 b/tests/qemu-iotests/071
new file mode 100755
index 0000000000..2a22546e1a
--- /dev/null
+++ b/tests/qemu-iotests/071
@@ -0,0 +1,239 @@
+#!/bin/bash
+#
+# Test case for the QMP blkdebug and blkverify interfaces
+#
+# Copyright (C) 2013 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=mreitz@redhat.com
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+here="$PWD"
+tmp=/tmp/$$
+status=1	# failure is the default!
+
+_cleanup()
+{
+	_cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt generic
+_supported_proto generic
+_supported_os Linux
+
+function do_run_qemu()
+{
+    echo Testing: "$@" | _filter_imgfmt
+    $QEMU -nographic -qmp stdio -serial none "$@"
+    echo
+}
+
+function run_qemu()
+{
+    do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | _filter_qemu_io
+}
+
+IMG_SIZE=64M
+
+echo
+echo "=== Testing blkverify through filename ==="
+echo
+
+TEST_IMG="$TEST_IMG.base" IMGOPTS="" IMGFMT="raw" _make_test_img $IMG_SIZE |\
+    _filter_imgfmt
+_make_test_img $IMG_SIZE
+$QEMU_IO -c "open -o file.driver=blkverify,file.raw.filename=$TEST_IMG.base $TEST_IMG" \
+         -c 'read 0 512' -c 'write -P 42 0x38000 512' -c 'read -P 42 0x38000 512' | _filter_qemu_io
+
+$QEMU_IO -c 'write -P 42 0 512' "$TEST_IMG" | _filter_qemu_io
+
+$QEMU_IO -c "open -o file.driver=blkverify,file.raw.filename=$TEST_IMG.base $TEST_IMG" \
+         -c 'read -P 42 0 512' | _filter_qemu_io
+
+echo
+echo "=== Testing blkverify through file blockref ==="
+echo
+
+TEST_IMG="$TEST_IMG.base" IMGOPTS="" IMGFMT="raw" _make_test_img $IMG_SIZE |\
+    _filter_imgfmt
+_make_test_img $IMG_SIZE
+$QEMU_IO -c "open -o file.driver=blkverify,file.raw.filename=$TEST_IMG.base,file.test.driver=$IMGFMT,file.test.file.filename=$TEST_IMG" \
+         -c 'read 0 512' -c 'write -P 42 0x38000 512' -c 'read -P 42 0x38000 512' | _filter_qemu_io
+
+$QEMU_IO -c 'write -P 42 0 512' "$TEST_IMG" | _filter_qemu_io
+
+$QEMU_IO -c "open -o file.driver=blkverify,file.raw.filename=$TEST_IMG.base $TEST_IMG" \
+         -c 'read -P 42 0 512' | _filter_qemu_io
+
+echo
+echo "=== Testing blkdebug through filename ==="
+echo
+
+$QEMU_IO -c "open -o file.driver=blkdebug,file.inject-error.event=l2_load $TEST_IMG" \
+         -c 'read -P 42 0x38000 512'
+
+echo
+echo "=== Testing blkdebug through file blockref ==="
+echo
+
+$QEMU_IO -c "open -o driver=$IMGFMT,file.driver=blkdebug,file.inject-error.event=l2_load,file.image.filename=$TEST_IMG" \
+         -c 'read -P 42 0x38000 512'
+
+echo
+echo "=== Testing blkdebug on existing block device ==="
+echo
+
+run_qemu -drive "file=$TEST_IMG,format=raw,if=none,id=drive0" <<EOF
+{ "execute": "qmp_capabilities" }
+{ "execute": "blockdev-add",
+    "arguments": {
+        "options": {
+            "driver": "$IMGFMT",
+            "id": "drive0-debug",
+            "file": {
+                "driver": "blkdebug",
+                "image": "drive0",
+                "inject-error": [{
+                    "event": "l2_load"
+                }]
+            }
+        }
+    }
+}
+{ "execute": "human-monitor-command",
+    "arguments": {
+        "command-line": 'qemu-io drive0-debug "read 0 512"'
+    }
+}
+{ "execute": "quit" }
+EOF
+
+echo
+echo "=== Testing blkverify on existing block device ==="
+echo
+
+run_qemu -drive "file=$TEST_IMG,format=$IMGFMT,if=none,id=drive0" <<EOF
+{ "execute": "qmp_capabilities" }
+{ "execute": "blockdev-add",
+    "arguments": {
+        "options": {
+            "driver": "blkverify",
+            "id": "drive0-verify",
+            "test": "drive0",
+            "raw": {
+                "driver": "raw",
+                "file": {
+                    "driver": "file",
+                    "filename": "$TEST_IMG.base"
+                }
+            }
+        }
+    }
+}
+{ "execute": "human-monitor-command",
+    "arguments": {
+        "command-line": 'qemu-io drive0-verify "read 0 512"'
+    }
+}
+{ "execute": "quit" }
+EOF
+
+echo
+echo "=== Testing blkverify on existing raw block device ==="
+echo
+
+run_qemu -drive "file=$TEST_IMG.base,if=none,id=drive0" <<EOF
+{ "execute": "qmp_capabilities" }
+{ "execute": "blockdev-add",
+    "arguments": {
+        "options": {
+            "driver": "blkverify",
+            "id": "drive0-verify",
+            "test": {
+                "driver": "$IMGFMT",
+                "file": {
+                    "driver": "file",
+                    "filename": "$TEST_IMG"
+                }
+            },
+            "raw": "drive0"
+        }
+    }
+}
+{ "execute": "human-monitor-command",
+    "arguments": {
+        "command-line": 'qemu-io drive0-verify "read 0 512"'
+    }
+}
+{ "execute": "quit" }
+EOF
+
+echo
+echo "=== Testing blkdebug's set-state through QMP ==="
+echo
+
+run_qemu -drive "file=$TEST_IMG,format=raw,if=none,id=drive0" <<EOF
+{ "execute": "qmp_capabilities" }
+{ "execute": "blockdev-add",
+    "arguments": {
+        "options": {
+            "driver": "$IMGFMT",
+            "id": "drive0-debug",
+            "file": {
+                "driver": "blkdebug",
+                "image": "drive0",
+                "inject-error": [{
+                    "event": "read_aio",
+                    "state": 42
+                }],
+                "set-state": [{
+                    "event": "write_aio",
+                    "new_state": 42
+                }]
+            }
+        }
+    }
+}
+{ "execute": "human-monitor-command",
+    "arguments": {
+        "command-line": 'qemu-io drive0-debug "read 0 512"'
+    }
+}
+{ "execute": "human-monitor-command",
+    "arguments": {
+        "command-line": 'qemu-io drive0-debug "write 0 512"'
+    }
+}
+{ "execute": "human-monitor-command",
+    "arguments": {
+        "command-line": 'qemu-io drive0-debug "read 0 512"'
+    }
+}
+{ "execute": "quit" }
+EOF
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/071.out b/tests/qemu-iotests/071.out
new file mode 100644
index 0000000000..5f840a9980
--- /dev/null
+++ b/tests/qemu-iotests/071.out
@@ -0,0 +1,90 @@
+QA output created by 071
+
+=== Testing blkverify through filename ===
+
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 
+read 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 512/512 bytes at offset 229376
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 229376
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+blkverify: read sector_num=0 nb_sectors=4 contents mismatch in sector 0
+
+=== Testing blkverify through file blockref ===
+
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 
+read 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 512/512 bytes at offset 229376
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 229376
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+blkverify: read sector_num=0 nb_sectors=4 contents mismatch in sector 0
+
+=== Testing blkdebug through filename ===
+
+read failed: Input/output error
+
+=== Testing blkdebug through file blockref ===
+
+read failed: Input/output error
+
+=== Testing blkdebug on existing block device ===
+
+Testing: -drive file=TEST_DIR/t.IMGFMT,format=raw,if=none,id=drive0
+QMP_VERSION
+{"return": {}}
+{"return": {}}
+read failed: Input/output error
+{"return": ""}
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN"}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "ide1-cd0", "tray-open": true}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "floppy0", "tray-open": true}}
+
+
+=== Testing blkverify on existing block device ===
+
+Testing: -drive file=TEST_DIR/t.IMGFMT,format=IMGFMT,if=none,id=drive0
+QMP_VERSION
+{"return": {}}
+{"return": {}}
+blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0
+
+
+=== Testing blkverify on existing raw block device ===
+
+Testing: -drive file=TEST_DIR/t.IMGFMT.base,if=none,id=drive0
+QMP_VERSION
+{"return": {}}
+{"return": {}}
+blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0
+
+
+=== Testing blkdebug's set-state through QMP ===
+
+Testing: -drive file=TEST_DIR/t.IMGFMT,format=raw,if=none,id=drive0
+QMP_VERSION
+{"return": {}}
+{"return": {}}
+read 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+{"return": ""}
+wrote 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+{"return": ""}
+read failed: Input/output error
+{"return": ""}
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN"}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "ide1-cd0", "tray-open": true}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "floppy0", "tray-open": true}}
+
+*** done
diff --git a/tests/qemu-iotests/072 b/tests/qemu-iotests/072
new file mode 100755
index 0000000000..a3876c2161
--- /dev/null
+++ b/tests/qemu-iotests/072
@@ -0,0 +1,69 @@
+#!/bin/bash
+#
+# Test case for nested image formats
+#
+# Copyright (C) 2013 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=mreitz@redhat.com
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+here="$PWD"
+tmp=/tmp/$$
+status=1	# failure is the default!
+
+_cleanup()
+{
+	_cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt vpc vmdk vhdx vdi qed qcow2 qcow cow
+_supported_proto generic
+_supported_os Linux
+
+IMG_SIZE=64M
+
+echo
+echo "=== Testing nested image formats ==="
+echo
+
+TEST_IMG="$TEST_IMG.base" _make_test_img $IMG_SIZE
+
+$QEMU_IO -c 'write -P 42 0 512' -c 'write -P 23 512 512' \
+         -c 'write -P 66 1024 512' "$TEST_IMG.base" | _filter_qemu_io
+
+$QEMU_IMG convert -f raw -O $IMGFMT "$TEST_IMG.base" "$TEST_IMG"
+
+$QEMU_IO -c "open -o driver=$IMGFMT,file.driver=$IMGFMT,file.file.filename=$TEST_IMG" \
+         -c 'read -P 42 0 512' -c 'read -P 23 512 512' \
+         -c 'read -P 66 1024 512' | _filter_qemu_io
+
+# When not giving any format, qemu should open only one "layer". Therefore, this
+# should not work for any image formats with a header.
+$QEMU_IO -c 'read -P 42 0 512' "$TEST_IMG" | _filter_qemu_io
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/072.out b/tests/qemu-iotests/072.out
new file mode 100644
index 0000000000..efe577c1c0
--- /dev/null
+++ b/tests/qemu-iotests/072.out
@@ -0,0 +1,21 @@
+QA output created by 072
+
+=== Testing nested image formats ===
+
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 
+wrote 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 512/512 bytes at offset 512
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 512/512 bytes at offset 1024
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 512
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 1024
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Pattern verification failed at offset 0, 512 bytes
+read 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+*** done
diff --git a/tests/qemu-iotests/077 b/tests/qemu-iotests/077
new file mode 100755
index 0000000000..bbf7b5145a
--- /dev/null
+++ b/tests/qemu-iotests/077
@@ -0,0 +1,278 @@
+#!/bin/bash
+#
+# Test concurrent pread/pwrite
+#
+# Copyright (C) 2014 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=kwolf@redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1	# failure is the default!
+
+_cleanup()
+{
+	_cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt generic
+_supported_proto generic
+_supported_os Linux
+
+CLUSTER_SIZE=4k
+size=128M
+
+_make_test_img $size
+
+echo
+echo "== Some concurrent requests involving RMW =="
+
+function test_io()
+{
+echo "open -o file.align=4k blkdebug::$TEST_IMG"
+# A simple RMW request
+cat  <<EOF
+aio_write -P 10 0x200 0x200
+aio_flush
+EOF
+
+# Sequential RMW requests on the same physical sector
+off=0x1000
+for ev in "head" "after_head" "tail" "after_tail"; do
+cat  <<EOF
+break pwritev_rmw.$ev A
+aio_write -P 10 $((off + 0x200)) 0x200
+wait_break A
+aio_write -P 11 $((off + 0x400)) 0x200
+sleep 100
+resume A
+aio_flush
+EOF
+off=$((off + 0x1000))
+done
+
+# Chained dependencies
+cat  <<EOF
+break pwritev_rmw.after_tail A
+aio_write -P 10 0x5000 0x200
+wait_break A
+aio_write -P 11 0x5200 0x200
+aio_write -P 12 0x5400 0x200
+aio_write -P 13 0x5600 0x200
+aio_write -P 14 0x5800 0x200
+aio_write -P 15 0x5a00 0x200
+aio_write -P 16 0x5c00 0x200
+aio_write -P 17 0x5e00 0x200
+sleep 100
+resume A
+aio_flush
+EOF
+
+# Overlapping multiple requests
+cat  <<EOF
+break pwritev_rmw.after_tail A
+aio_write -P 10 0x6000 0x200
+wait_break A
+break pwritev_rmw.after_head B
+aio_write -P 10 0x7e00 0x200
+wait_break B
+aio_write -P 11 0x6800 0x1000
+resume A
+sleep 100
+resume B
+aio_flush
+EOF
+
+cat  <<EOF
+break pwritev_rmw.after_tail A
+aio_write -P 10 0x8000 0x200
+wait_break A
+break pwritev_rmw.after_head B
+aio_write -P 10 0x9e00 0x200
+wait_break B
+aio_write -P 11 0x8800 0x1000
+resume B
+sleep 100
+resume A
+aio_flush
+EOF
+
+cat  <<EOF
+break pwritev_rmw.after_tail A
+aio_write -P 10 0xa000 0x200
+wait_break A
+aio_write -P 11 0xa800 0x1000
+break pwritev_rmw.after_head B
+aio_write -P 10 0xbe00 0x200
+wait_break B
+resume A
+sleep 100
+resume B
+aio_flush
+EOF
+
+cat  <<EOF
+break pwritev_rmw.after_tail A
+aio_write -P 10 0xc000 0x200
+wait_break A
+aio_write -P 11 0xc800 0x1000
+break pwritev_rmw.after_head B
+aio_write -P 10 0xde00 0x200
+wait_break B
+resume B
+sleep 100
+resume A
+aio_flush
+EOF
+
+# Only RMW for the tail part
+cat  <<EOF
+break pwritev_rmw.after_tail A
+aio_write -P 10 0xe000 0x1800
+wait_break A
+aio_write -P 11 0xf000 0xc00
+sleep 100
+resume A
+aio_flush
+EOF
+
+cat  <<EOF
+break pwritev A
+aio_write -P 10 0x10000 0x800
+wait_break A
+break pwritev_rmw.after_tail B
+aio_write -P 11 0x10000 0x400
+break pwritev_done C
+resume A
+wait_break C
+resume C
+sleep 100
+wait_break B
+resume B
+aio_flush
+EOF
+
+cat  <<EOF
+break pwritev A
+aio_write -P 10 0x11000 0x800
+wait_break A
+aio_write -P 11 0x11000 0x1000
+sleep 100
+resume A
+aio_flush
+EOF
+}
+
+test_io | $QEMU_IO  | _filter_qemu_io | \
+    sed -e 's,[0-9/]* bytes at offset [0-9]*,XXX/XXX bytes at offset XXX,g' \
+        -e 's/^[0-9]* \(bytes\|KiB\)/XXX bytes/' \
+        -e '/Suspended/d'
+
+echo
+echo "== Verify image content =="
+
+function verify_io()
+{
+    # A simple RMW request
+    echo read -P 0       0 0x200
+    echo read -P 10  0x200 0x200
+    echo read -P 0   0x400 0xc00
+
+    # Sequential RMW requests on the same physical sector
+    echo read -P 0  0x1000 0x200
+    echo read -P 10 0x1200 0x200
+    echo read -P 11 0x1400 0x200
+    echo read -P 0  0x1600 0xa00
+
+    echo read -P 0  0x2000 0x200
+    echo read -P 10 0x2200 0x200
+    echo read -P 11 0x2400 0x200
+    echo read -P 0  0x2600 0xa00
+
+    echo read -P 0  0x3000 0x200
+    echo read -P 10 0x3200 0x200
+    echo read -P 11 0x3400 0x200
+    echo read -P 0  0x3600 0xa00
+
+    echo read -P 0  0x4000 0x200
+    echo read -P 10 0x4200 0x200
+    echo read -P 11 0x4400 0x200
+    echo read -P 0  0x4600 0xa00
+
+    # Chained dependencies
+    echo read -P 10 0x5000 0x200
+    echo read -P 11 0x5200 0x200
+    echo read -P 12 0x5400 0x200
+    echo read -P 13 0x5600 0x200
+    echo read -P 14 0x5800 0x200
+    echo read -P 15 0x5a00 0x200
+    echo read -P 16 0x5c00 0x200
+    echo read -P 17 0x5e00 0x200
+
+    # Overlapping multiple requests
+    echo read -P 10 0x6000 0x200
+    echo read -P  0 0x6200 0x600
+    echo read -P 11 0x6800 0x1000
+    echo read -P  0 0x7800 0x600
+    echo read -P 10 0x7e00 0x200
+
+    echo read -P 10 0x8000 0x200
+    echo read -P  0 0x8200 0x600
+    echo read -P 11 0x8800 0x1000
+    echo read -P  0 0x9800 0x600
+    echo read -P 10 0x9e00 0x200
+
+    echo read -P 10 0xa000 0x200
+    echo read -P  0 0xa200 0x600
+    echo read -P 11 0xa800 0x1000
+    echo read -P  0 0xb800 0x600
+    echo read -P 10 0xbe00 0x200
+
+    echo read -P 10 0xc000 0x200
+    echo read -P  0 0xc200 0x600
+    echo read -P 11 0xc800 0x1000
+    echo read -P  0 0xd800 0x600
+    echo read -P 10 0xde00 0x200
+
+    # Only RMW for the tail part
+    echo read -P 10 0xe000 0x1000
+    echo read -P 11 0xf800 0x400
+    echo read -P  0 0xfc00 0x400
+
+    echo read -P 11 0x10000 0x400
+    echo read -P 10 0x10400 0x400
+
+    echo read -P 11 0x11800 0x800
+}
+
+verify_io | $QEMU_IO "$TEST_IMG" | _filter_qemu_io
+
+_check_test_img
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/077.out b/tests/qemu-iotests/077.out
new file mode 100644
index 0000000000..ab612344d6
--- /dev/null
+++ b/tests/qemu-iotests/077.out
@@ -0,0 +1,202 @@
+QA output created by 077
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 
+
+== Some concurrent requests involving RMW ==
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+blkdebug: Resuming request 'A'
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+blkdebug: Resuming request 'A'
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+blkdebug: Resuming request 'A'
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+blkdebug: Resuming request 'A'
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+blkdebug: Resuming request 'A'
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+blkdebug: Resuming request 'A'
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+blkdebug: Resuming request 'B'
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+blkdebug: Resuming request 'B'
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+blkdebug: Resuming request 'A'
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+blkdebug: Resuming request 'A'
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+blkdebug: Resuming request 'B'
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+blkdebug: Resuming request 'B'
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+blkdebug: Resuming request 'A'
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+blkdebug: Resuming request 'A'
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+blkdebug: Resuming request 'A'
+blkdebug: Resuming request 'C'
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+blkdebug: Resuming request 'B'
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+blkdebug: Resuming request 'A'
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote XXX/XXX bytes at offset XXX
+XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== Verify image content ==
+read 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 512
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 3072/3072 bytes at offset 1024
+3 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 4096
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 4608
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 5120
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 2560/2560 bytes at offset 5632
+2.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 8192
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 8704
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 9216
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 2560/2560 bytes at offset 9728
+2.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 12288
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 12800
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 13312
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 2560/2560 bytes at offset 13824
+2.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 16384
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 16896
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 17408
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 2560/2560 bytes at offset 17920
+2.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 20480
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 20992
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 21504
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 22016
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 22528
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 23040
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 23552
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 24064
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 24576
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1536/1536 bytes at offset 25088
+1.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4096/4096 bytes at offset 26624
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1536/1536 bytes at offset 30720
+1.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 32256
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 32768
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1536/1536 bytes at offset 33280
+1.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4096/4096 bytes at offset 34816
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1536/1536 bytes at offset 38912
+1.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 40448
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 40960
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1536/1536 bytes at offset 41472
+1.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4096/4096 bytes at offset 43008
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1536/1536 bytes at offset 47104
+1.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 48640
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 49152
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1536/1536 bytes at offset 49664
+1.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4096/4096 bytes at offset 51200
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1536/1536 bytes at offset 55296
+1.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 56832
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 4096/4096 bytes at offset 57344
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1024/1024 bytes at offset 63488
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1024/1024 bytes at offset 64512
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1024/1024 bytes at offset 65536
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1024/1024 bytes at offset 66560
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 2048/2048 bytes at offset 71680
+2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+No errors were found on the image.
+*** done
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index 28ba0d9ad5..0f68156400 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -170,6 +170,17 @@ _make_test_img()
     fi
 }
 
+_rm_test_img()
+{
+    local img=$1
+    if [ "$IMGFMT" = "vmdk" ]; then
+        # Remove all the extents for vmdk
+        $QEMU_IMG info $img 2>/dev/null | grep 'filename:' | cut -f 2 -d: \
+            | xargs -I {} rm -f "{}"
+    fi
+    rm -f $img
+}
+
 _cleanup_test_img()
 {
     case "$IMGPROTO" in
@@ -179,9 +190,9 @@ _cleanup_test_img()
             rm -f "$TEST_IMG_FILE"
             ;;
         file)
-            rm -f "$TEST_DIR/t.$IMGFMT"
-            rm -f "$TEST_DIR/t.$IMGFMT.orig"
-            rm -f "$TEST_DIR/t.$IMGFMT.base"
+            _rm_test_img "$TEST_DIR/t.$IMGFMT"
+            _rm_test_img "$TEST_DIR/t.$IMGFMT.orig"
+            _rm_test_img "$TEST_DIR/t.$IMGFMT.base"
             if [ -n "$SAMPLE_IMG_FILE" ]
             then
                 rm -f "$TEST_DIR/$SAMPLE_IMG_FILE"
@@ -406,6 +417,17 @@ _default_cache_mode()
     fi
 }
 
+_unsupported_imgopts()
+{
+    for bad_opt
+    do
+        if echo "$IMGOPTS" | grep -q 2>/dev/null "$bad_opt"
+        then
+            _notrun "not suitable for image option: $bad_opt"
+        fi
+    done
+}
+
 # this test requires that a specified command (executable) exists
 #
 _require_command()
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index cc750c986e..03c762fb4f 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -77,5 +77,8 @@
 068 rw auto
 069 rw auto
 070 rw auto
+071 rw auto
+072 rw auto
 073 rw auto
 074 rw auto
+077 rw auto
diff --git a/trace-events b/trace-events
index 9f4456a82e..1b668d1ac2 100644
--- a/trace-events
+++ b/trace-events
@@ -402,6 +402,7 @@ usb_desc_config(int addr, int index, int len, int ret) "dev %d query config %d,
 usb_desc_other_speed_config(int addr, int index, int len, int ret) "dev %d query config %d, len %d, ret %d"
 usb_desc_string(int addr, int index, int len, int ret) "dev %d query string %d, len %d, ret %d"
 usb_desc_bos(int addr, int len, int ret) "dev %d bos, len %d, ret %d"
+usb_desc_msos(int addr, int index, int len, int ret) "dev %d msos, index 0x%x, len %d, ret %d"
 usb_set_addr(int addr) "dev %d"
 usb_set_config(int addr, int config, int ret) "dev %d, config %d, ret %d"
 usb_set_interface(int addr, int iface, int alt, int ret) "dev %d, interface %d, altsetting %d, ret %d"
diff --git a/ui/gtk.c b/ui/gtk.c
index 6316f5ba00..a633d89346 100644
--- a/ui/gtk.c
+++ b/ui/gtk.c
@@ -34,6 +34,10 @@
 #define GETTEXT_PACKAGE "qemu"
 #define LOCALEDIR "po"
 
+#ifdef _WIN32
+# define _WIN32_WINNT 0x0601 /* needed to get definition of MAPVK_VK_TO_VSC */
+#endif
+
 #include "qemu-common.h"
 
 #ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
@@ -704,11 +708,18 @@ static gboolean gd_button_event(GtkWidget *widget, GdkEventButton *button,
 static gboolean gd_key_event(GtkWidget *widget, GdkEventKey *key, void *opaque)
 {
     GtkDisplayState *s = opaque;
-    int gdk_keycode;
-    int qemu_keycode;
+    int gdk_keycode = key->hardware_keycode;
     int i;
 
-    gdk_keycode = key->hardware_keycode;
+#ifdef _WIN32
+    UINT qemu_keycode = MapVirtualKey(gdk_keycode, MAPVK_VK_TO_VSC);
+    switch (qemu_keycode) {
+    case 103:   /* alt gr */
+        qemu_keycode = 56 | SCANCODE_GREY;
+        break;
+    }
+#else
+    int qemu_keycode;
 
     if (gdk_keycode < 9) {
         qemu_keycode = 0;
@@ -723,6 +734,7 @@ static gboolean gd_key_event(GtkWidget *widget, GdkEventKey *key, void *opaque)
     } else {
         qemu_keycode = 0;
     }
+#endif
 
     trace_gd_key_event(gdk_keycode, qemu_keycode,
                        (key->type == GDK_KEY_PRESS) ? "down" : "up");
diff --git a/util/Makefile.objs b/util/Makefile.objs
index af3e5cb157..937376b082 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -13,3 +13,4 @@ util-obj-y += hexdump.o
 util-obj-y += crc32c.o
 util-obj-y += throttle.o
 util-obj-y += getauxval.o
+util-obj-y += readline.o
diff --git a/util/error.c b/util/error.c
index e5de34f3bb..f11f1d57a0 100644
--- a/util/error.c
+++ b/util/error.c
@@ -44,7 +44,7 @@ void error_set(Error **errp, ErrorClass err_class, const char *fmt, ...)
     err->err_class = err_class;
 
     if (errp == &error_abort) {
-        fprintf(stderr, "%s\n", error_get_pretty(err));
+        error_report("%s", error_get_pretty(err));
         abort();
     }
 
@@ -80,7 +80,7 @@ void error_set_errno(Error **errp, int os_errno, ErrorClass err_class,
     err->err_class = err_class;
 
     if (errp == &error_abort) {
-        fprintf(stderr, "%s\n", error_get_pretty(err));
+        error_report("%s", error_get_pretty(err));
         abort();
     }
 
@@ -125,7 +125,7 @@ void error_set_win32(Error **errp, int win32_err, ErrorClass err_class,
     err->err_class = err_class;
 
     if (errp == &error_abort) {
-        fprintf(stderr, "%s\n", error_get_pretty(err));
+        error_report("%s", error_get_pretty(err));
         abort();
     }
 
@@ -171,7 +171,7 @@ void error_free(Error *err)
 void error_propagate(Error **dst_err, Error *local_err)
 {
     if (local_err && dst_err == &error_abort) {
-        fprintf(stderr, "%s\n", error_get_pretty(local_err));
+        error_report("%s", error_get_pretty(local_err));
         abort();
     } else if (dst_err && !*dst_err) {
         *dst_err = local_err;
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index e00a44c86f..d5dca4729a 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -47,6 +47,9 @@ extern int daemon(int, int);
 #  define QEMU_VMALLOC_ALIGN getpagesize()
 #endif
 
+#include <termios.h>
+#include <unistd.h>
+
 #include <glib/gprintf.h>
 
 #include "config-host.h"
@@ -85,6 +88,11 @@ void *qemu_oom_check(void *ptr)
 void *qemu_memalign(size_t alignment, size_t size)
 {
     void *ptr;
+
+    if (alignment < sizeof(void*)) {
+        alignment = sizeof(void*);
+    }
+
 #if defined(_POSIX_C_SOURCE) && !defined(__sun__)
     int ret;
     ret = posix_memalign(&ptr, alignment, size);
@@ -251,3 +259,18 @@ qemu_get_local_state_pathname(const char *relative_pathname)
     return g_strdup_printf("%s/%s", CONFIG_QEMU_LOCALSTATEDIR,
                            relative_pathname);
 }
+
+void qemu_set_tty_echo(int fd, bool echo)
+{
+    struct termios tty;
+
+    tcgetattr(fd, &tty);
+
+    if (echo) {
+        tty.c_lflag |= ECHO | ECHONL | ICANON | IEXTEN;
+    } else {
+        tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN);
+    }
+
+    tcsetattr(fd, TCSANOW, &tty);
+}
diff --git a/util/oslib-win32.c b/util/oslib-win32.c
index 776ccfaaf0..50be0440f2 100644
--- a/util/oslib-win32.c
+++ b/util/oslib-win32.c
@@ -189,3 +189,22 @@ qemu_get_local_state_pathname(const char *relative_pathname)
     return g_strdup_printf("%s" G_DIR_SEPARATOR_S "%s", base_path,
                            relative_pathname);
 }
+
+void qemu_set_tty_echo(int fd, bool echo)
+{
+    HANDLE handle = (HANDLE)_get_osfhandle(fd);
+    DWORD dwMode = 0;
+
+    if (handle == INVALID_HANDLE_VALUE) {
+        return;
+    }
+
+    GetConsoleMode(handle, &dwMode);
+
+    if (echo) {
+        SetConsoleMode(handle, dwMode | ENABLE_ECHO_INPUT | ENABLE_LINE_INPUT);
+    } else {
+        SetConsoleMode(handle,
+                       dwMode & ~(ENABLE_ECHO_INPUT | ENABLE_LINE_INPUT));
+    }
+}
diff --git a/util/qemu-config.c b/util/qemu-config.c
index 7973659518..9298f55ecf 100644
--- a/util/qemu-config.c
+++ b/util/qemu-config.c
@@ -356,3 +356,103 @@ int qemu_read_config_file(const char *filename)
         return -EINVAL;
     }
 }
+
+static void config_parse_qdict_section(QDict *options, QemuOptsList *opts,
+                                       Error **errp)
+{
+    QemuOpts *subopts;
+    QDict *subqdict;
+    QList *list = NULL;
+    Error *local_err = NULL;
+    size_t orig_size, enum_size;
+    char *prefix;
+
+    prefix = g_strdup_printf("%s.", opts->name);
+    qdict_extract_subqdict(options, &subqdict, prefix);
+    g_free(prefix);
+    orig_size = qdict_size(subqdict);
+    if (!orig_size) {
+        goto out;
+    }
+
+    subopts = qemu_opts_create(opts, NULL, 0, &local_err);
+    if (error_is_set(&local_err)) {
+        error_propagate(errp, local_err);
+        goto out;
+    }
+
+    qemu_opts_absorb_qdict(subopts, subqdict, &local_err);
+    if (error_is_set(&local_err)) {
+        error_propagate(errp, local_err);
+        goto out;
+    }
+
+    enum_size = qdict_size(subqdict);
+    if (enum_size < orig_size && enum_size) {
+        error_setg(errp, "Unknown option '%s' for [%s]",
+                   qdict_first(subqdict)->key, opts->name);
+        goto out;
+    }
+
+    if (enum_size) {
+        /* Multiple, enumerated sections */
+        QListEntry *list_entry;
+        unsigned i = 0;
+
+        /* Not required anymore */
+        qemu_opts_del(subopts);
+
+        qdict_array_split(subqdict, &list);
+        if (qdict_size(subqdict)) {
+            error_setg(errp, "Unused option '%s' for [%s]",
+                       qdict_first(subqdict)->key, opts->name);
+            goto out;
+        }
+
+        QLIST_FOREACH_ENTRY(list, list_entry) {
+            QDict *section = qobject_to_qdict(qlist_entry_obj(list_entry));
+            char *opt_name;
+
+            opt_name = g_strdup_printf("%s.%u", opts->name, i++);
+            subopts = qemu_opts_create(opts, opt_name, 1, &local_err);
+            g_free(opt_name);
+            if (error_is_set(&local_err)) {
+                error_propagate(errp, local_err);
+                goto out;
+            }
+
+            qemu_opts_absorb_qdict(subopts, section, &local_err);
+            if (error_is_set(&local_err)) {
+                error_propagate(errp, local_err);
+                qemu_opts_del(subopts);
+                goto out;
+            }
+
+            if (qdict_size(section)) {
+                error_setg(errp, "[%s] section doesn't support the option '%s'",
+                           opts->name, qdict_first(section)->key);
+                qemu_opts_del(subopts);
+                goto out;
+            }
+        }
+    }
+
+out:
+    QDECREF(subqdict);
+    QDECREF(list);
+}
+
+void qemu_config_parse_qdict(QDict *options, QemuOptsList **lists,
+                             Error **errp)
+{
+    int i;
+    Error *local_err = NULL;
+
+    for (i = 0; lists[i]; i++) {
+        config_parse_qdict_section(options, lists[i], &local_err);
+        if (error_is_set(&local_err)) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+}
diff --git a/util/qemu-progress.c b/util/qemu-progress.c
index 9a3f96cd47..4ee5cd07f2 100644
--- a/util/qemu-progress.c
+++ b/util/qemu-progress.c
@@ -24,7 +24,6 @@
 
 #include "qemu-common.h"
 #include "qemu/osdep.h"
-#include "sysemu/sysemu.h"
 #include <stdio.h>
 
 struct progress_state {
@@ -83,12 +82,22 @@ static void progress_dummy_init(void)
 {
 #ifdef CONFIG_POSIX
     struct sigaction action;
+    sigset_t set;
 
     memset(&action, 0, sizeof(action));
     sigfillset(&action.sa_mask);
     action.sa_handler = sigusr_print;
     action.sa_flags = 0;
     sigaction(SIGUSR1, &action, NULL);
+
+    /*
+     * SIGUSR1 is SIG_IPI and gets blocked in qemu_init_main_loop(). In the
+     * tools that use the progress report SIGUSR1 isn't used in this meaning
+     * and instead should print the progress, so reenable it.
+     */
+    sigemptyset(&set);
+    sigaddset(&set, SIGUSR1);
+    pthread_sigmask(SIG_UNBLOCK, &set, NULL);
 #endif
 
     state.print = progress_dummy_print;
diff --git a/readline.c b/util/readline.c
index abf27ddec3..8441be484c 100644
--- a/readline.c
+++ b/util/readline.c
@@ -21,21 +21,19 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
-#include "monitor/readline.h"
-#include "monitor/monitor.h"
+
+#include "qemu-common.h"
+#include "qemu/readline.h"
 
 #define IS_NORM 0
 #define IS_ESC  1
 #define IS_CSI  2
 #define IS_SS3  3
 
-#undef printf
-#define printf do_not_use_printf
-
 void readline_show_prompt(ReadLineState *rs)
 {
-    monitor_printf(rs->mon, "%s", rs->prompt);
-    monitor_flush(rs->mon);
+    rs->printf_func(rs->opaque, "%s", rs->prompt);
+    rs->flush_func(rs->opaque);
     rs->last_cmd_buf_index = 0;
     rs->last_cmd_buf_size = 0;
     rs->esc_state = IS_NORM;
@@ -49,17 +47,17 @@ static void readline_update(ReadLineState *rs)
     if (rs->cmd_buf_size != rs->last_cmd_buf_size ||
         memcmp(rs->cmd_buf, rs->last_cmd_buf, rs->cmd_buf_size) != 0) {
         for(i = 0; i < rs->last_cmd_buf_index; i++) {
-            monitor_printf(rs->mon, "\033[D");
+            rs->printf_func(rs->opaque, "\033[D");
         }
         rs->cmd_buf[rs->cmd_buf_size] = '\0';
         if (rs->read_password) {
             len = strlen(rs->cmd_buf);
             for(i = 0; i < len; i++)
-                monitor_printf(rs->mon, "*");
+                rs->printf_func(rs->opaque, "*");
         } else {
-            monitor_printf(rs->mon, "%s", rs->cmd_buf);
+            rs->printf_func(rs->opaque, "%s", rs->cmd_buf);
         }
-        monitor_printf(rs->mon, "\033[K");
+        rs->printf_func(rs->opaque, "\033[K");
         memcpy(rs->last_cmd_buf, rs->cmd_buf, rs->cmd_buf_size);
         rs->last_cmd_buf_size = rs->cmd_buf_size;
         rs->last_cmd_buf_index = rs->cmd_buf_size;
@@ -68,17 +66,17 @@ static void readline_update(ReadLineState *rs)
         delta = rs->cmd_buf_index - rs->last_cmd_buf_index;
         if (delta > 0) {
             for(i = 0;i < delta; i++) {
-                monitor_printf(rs->mon, "\033[C");
+                rs->printf_func(rs->opaque, "\033[C");
             }
         } else {
             delta = -delta;
             for(i = 0;i < delta; i++) {
-                monitor_printf(rs->mon, "\033[D");
+                rs->printf_func(rs->opaque, "\033[D");
             }
         }
         rs->last_cmd_buf_index = rs->cmd_buf_index;
     }
-    monitor_flush(rs->mon);
+    rs->flush_func(rs->opaque);
 }
 
 static void readline_insert_char(ReadLineState *rs, int ch)
@@ -284,7 +282,7 @@ static void readline_completion(ReadLineState *rs)
     cmdline = g_malloc(rs->cmd_buf_index + 1);
     memcpy(cmdline, rs->cmd_buf, rs->cmd_buf_index);
     cmdline[rs->cmd_buf_index] = '\0';
-    rs->completion_finder(rs->mon, cmdline);
+    rs->completion_finder(rs->opaque, cmdline);
     g_free(cmdline);
 
     /* no completion found */
@@ -299,7 +297,7 @@ static void readline_completion(ReadLineState *rs)
         if (len > 0 && rs->completions[0][len - 1] != '/')
             readline_insert_char(rs, ' ');
     } else {
-        monitor_printf(rs->mon, "\n");
+        rs->printf_func(rs->opaque, "\n");
         max_width = 0;
         max_prefix = 0;	
         for(i = 0; i < rs->nb_completions; i++) {
@@ -329,9 +327,9 @@ static void readline_completion(ReadLineState *rs)
         nb_cols = 80 / max_width;
         j = 0;
         for(i = 0; i < rs->nb_completions; i++) {
-            monitor_printf(rs->mon, "%-*s", max_width, rs->completions[i]);
+            rs->printf_func(rs->opaque, "%-*s", max_width, rs->completions[i]);
             if (++j == nb_cols || i == (rs->nb_completions - 1)) {
-                monitor_printf(rs->mon, "\n");
+                rs->printf_func(rs->opaque, "\n");
                 j = 0;
             }
         }
@@ -365,12 +363,12 @@ void readline_handle_byte(ReadLineState *rs, int ch)
             rs->cmd_buf[rs->cmd_buf_size] = '\0';
             if (!rs->read_password)
                 readline_hist_add(rs, rs->cmd_buf);
-            monitor_printf(rs->mon, "\n");
+            rs->printf_func(rs->opaque, "\n");
             rs->cmd_buf_index = 0;
             rs->cmd_buf_size = 0;
             rs->last_cmd_buf_index = 0;
             rs->last_cmd_buf_size = 0;
-            rs->readline_func(rs->mon, rs->cmd_buf, rs->readline_opaque);
+            rs->readline_func(rs->opaque, rs->cmd_buf, rs->readline_opaque);
             break;
         case 23:
             /* ^W */
@@ -480,13 +478,17 @@ const char *readline_get_history(ReadLineState *rs, unsigned int index)
     return rs->history[index];
 }
 
-ReadLineState *readline_init(Monitor *mon,
+ReadLineState *readline_init(ReadLinePrintfFunc *printf_func,
+                             ReadLineFlushFunc *flush_func,
+                             void *opaque,
                              ReadLineCompletionFunc *completion_finder)
 {
     ReadLineState *rs = g_malloc0(sizeof(*rs));
 
     rs->hist_entry = -1;
-    rs->mon = mon;
+    rs->opaque = opaque;
+    rs->printf_func = printf_func;
+    rs->flush_func = flush_func;
     rs->completion_finder = completion_finder;
 
     return rs;