summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--block.c396
-rw-r--r--block/Makefile.objs2
-rw-r--r--block/backup.c11
-rw-r--r--block/block-backend.c23
-rw-r--r--block/dirty-bitmap.c387
-rw-r--r--block/parallels.c24
-rw-r--r--block/qapi.c178
-rw-r--r--block/qcow.c24
-rw-r--r--block/qcow2.c61
-rw-r--r--block/qed.c28
-rw-r--r--block/quorum.c50
-rw-r--r--block/sheepdog.c41
-rw-r--r--block/vdi.c22
-rw-r--r--block/vhdx.c28
-rw-r--r--block/vmdk.c121
-rw-r--r--block/vpc.c165
-rw-r--r--blockdev.c60
-rw-r--r--device-hotplug.c7
-rw-r--r--docs/qmp-events.txt11
-rw-r--r--hmp-commands.hx4
-rw-r--r--include/block/block.h41
-rw-r--r--include/block/block_int.h2
-rw-r--r--include/block/dirty-bitmap.h44
-rw-r--r--include/qemu/typedefs.h1
-rw-r--r--include/sysemu/block-backend.h1
-rw-r--r--monitor.c22
-rw-r--r--qapi/block.json16
-rw-r--r--qapi/event.json4
-rw-r--r--qemu-img.c9
-rw-r--r--target-i386/cpu.c4
-rw-r--r--target-i386/kvm.c1
-rw-r--r--target-i386/translate.c284
-rw-r--r--tests/qemu-iotests/081.out2
-rwxr-xr-xtests/qemu-iotests/146165
-rw-r--r--tests/qemu-iotests/146.out70
-rw-r--r--tests/qemu-iotests/148129
-rw-r--r--tests/qemu-iotests/148.out5
-rw-r--r--tests/qemu-iotests/group2
-rw-r--r--tests/qemu-iotests/sample_images/d2v-zerofilled.vhd.bz2bin0 -> 1021 bytes
-rw-r--r--tests/qemu-iotests/sample_images/hyperv2012r2-dynamic.vhd.bz2bin0 -> 214 bytes
-rw-r--r--tests/qemu-iotests/sample_images/virtualpc-dynamic.vhd.bz2bin0 -> 212 bytes
41 files changed, 1619 insertions, 826 deletions
diff --git a/block.c b/block.c
index ba24b8e674..59a18a3a66 100644
--- a/block.c
+++ b/block.c
@@ -53,23 +53,6 @@
 #include <windows.h>
 #endif
 
-/**
- * A BdrvDirtyBitmap can be in three possible states:
- * (1) successor is NULL and disabled is false: full r/w mode
- * (2) successor is NULL and disabled is true: read only mode ("disabled")
- * (3) successor is set: frozen mode.
- *     A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
- *     or enabled. A frozen bitmap can only abdicate() or reclaim().
- */
-struct BdrvDirtyBitmap {
-    HBitmap *bitmap;            /* Dirty sector bitmap implementation */
-    BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
-    char *name;                 /* Optional non-empty unique ID */
-    int64_t size;               /* Size of the bitmap (Number of sectors) */
-    bool disabled;              /* Bitmap is read-only */
-    QLIST_ENTRY(BdrvDirtyBitmap) list;
-};
-
 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
 
 struct BdrvStates bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states);
@@ -88,9 +71,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
                              BlockDriverState *parent,
                              const BdrvChildRole *child_role, Error **errp);
 
-static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
-static void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs);
-
 /* If non-zero, use only whitelisted block drivers */
 static int use_bdrv_whitelist;
 
@@ -687,13 +667,19 @@ int bdrv_parse_cache_flags(const char *mode, int *flags)
 }
 
 /*
- * Returns the flags that a temporary snapshot should get, based on the
- * originally requested flags (the originally requested image will have flags
- * like a backing file)
+ * Returns the options and flags that a temporary snapshot should get, based on
+ * the originally requested flags (the originally requested image will have
+ * flags like a backing file)
  */
-static int bdrv_temp_snapshot_flags(int flags)
+static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
+                                       int parent_flags, QDict *parent_options)
 {
-    return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
+    *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
+
+    /* For temporary files, unconditional cache=unsafe is fine */
+    qdict_set_default_str(child_options, BDRV_OPT_CACHE_WB, "on");
+    qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
+    qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
 }
 
 /*
@@ -1424,13 +1410,13 @@ done:
     return c;
 }
 
-int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
+static int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags,
+                                     QDict *snapshot_options, Error **errp)
 {
     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
     char *tmp_filename = g_malloc0(PATH_MAX + 1);
     int64_t total_size;
     QemuOpts *opts = NULL;
-    QDict *snapshot_options;
     BlockDriverState *bs_snapshot;
     Error *local_err = NULL;
     int ret;
@@ -1464,8 +1450,7 @@ int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
         goto out;
     }
 
-    /* Prepare a new options QDict for the temporary file */
-    snapshot_options = qdict_new();
+    /* Prepare options QDict for the temporary file */
     qdict_put(snapshot_options, "file.driver",
               qstring_from_str("file"));
     qdict_put(snapshot_options, "file.filename",
@@ -1477,6 +1462,7 @@ int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
 
     ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
                     flags, &local_err);
+    snapshot_options = NULL;
     if (ret < 0) {
         error_propagate(errp, local_err);
         goto out;
@@ -1485,6 +1471,7 @@ int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
     bdrv_append(bs_snapshot, bs);
 
 out:
+    QDECREF(snapshot_options);
     g_free(tmp_filename);
     return ret;
 }
@@ -1516,6 +1503,7 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
     const char *drvname;
     const char *backing;
     Error *local_err = NULL;
+    QDict *snapshot_options = NULL;
     int snapshot_flags = 0;
 
     assert(pbs);
@@ -1607,7 +1595,9 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
             flags |= BDRV_O_ALLOW_RDWR;
         }
         if (flags & BDRV_O_SNAPSHOT) {
-            snapshot_flags = bdrv_temp_snapshot_flags(flags);
+            snapshot_options = qdict_new();
+            bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options,
+                                       flags, options);
             bdrv_backing_options(&flags, options, flags, options);
         }
 
@@ -1709,7 +1699,9 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
     /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
      * temporary snapshot afterwards. */
     if (snapshot_flags) {
-        ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
+        ret = bdrv_append_temp_snapshot(bs, snapshot_flags, snapshot_options,
+                                        &local_err);
+        snapshot_options = NULL;
         if (local_err) {
             goto close_and_fail;
         }
@@ -1721,6 +1713,7 @@ fail:
     if (file != NULL) {
         bdrv_unref_child(bs, file);
     }
+    QDECREF(snapshot_options);
     QDECREF(bs->explicit_options);
     QDECREF(bs->options);
     QDECREF(options);
@@ -1743,6 +1736,7 @@ close_and_fail:
     } else {
         bdrv_unref(bs);
     }
+    QDECREF(snapshot_options);
     QDECREF(options);
     if (local_err) {
         error_propagate(errp, local_err);
@@ -3431,346 +3425,6 @@ void bdrv_lock_medium(BlockDriverState *bs, bool locked)
     }
 }
 
-BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
-{
-    BdrvDirtyBitmap *bm;
-
-    assert(name);
-    QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
-        if (bm->name && !strcmp(name, bm->name)) {
-            return bm;
-        }
-    }
-    return NULL;
-}
-
-void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
-{
-    assert(!bdrv_dirty_bitmap_frozen(bitmap));
-    g_free(bitmap->name);
-    bitmap->name = NULL;
-}
-
-BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
-                                          uint32_t granularity,
-                                          const char *name,
-                                          Error **errp)
-{
-    int64_t bitmap_size;
-    BdrvDirtyBitmap *bitmap;
-    uint32_t sector_granularity;
-
-    assert((granularity & (granularity - 1)) == 0);
-
-    if (name && bdrv_find_dirty_bitmap(bs, name)) {
-        error_setg(errp, "Bitmap already exists: %s", name);
-        return NULL;
-    }
-    sector_granularity = granularity >> BDRV_SECTOR_BITS;
-    assert(sector_granularity);
-    bitmap_size = bdrv_nb_sectors(bs);
-    if (bitmap_size < 0) {
-        error_setg_errno(errp, -bitmap_size, "could not get length of device");
-        errno = -bitmap_size;
-        return NULL;
-    }
-    bitmap = g_new0(BdrvDirtyBitmap, 1);
-    bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
-    bitmap->size = bitmap_size;
-    bitmap->name = g_strdup(name);
-    bitmap->disabled = false;
-    QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
-    return bitmap;
-}
-
-bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
-{
-    return bitmap->successor;
-}
-
-bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
-{
-    return !(bitmap->disabled || bitmap->successor);
-}
-
-DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
-{
-    if (bdrv_dirty_bitmap_frozen(bitmap)) {
-        return DIRTY_BITMAP_STATUS_FROZEN;
-    } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
-        return DIRTY_BITMAP_STATUS_DISABLED;
-    } else {
-        return DIRTY_BITMAP_STATUS_ACTIVE;
-    }
-}
-
-/**
- * Create a successor bitmap destined to replace this bitmap after an operation.
- * Requires that the bitmap is not frozen and has no successor.
- */
-int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
-                                       BdrvDirtyBitmap *bitmap, Error **errp)
-{
-    uint64_t granularity;
-    BdrvDirtyBitmap *child;
-
-    if (bdrv_dirty_bitmap_frozen(bitmap)) {
-        error_setg(errp, "Cannot create a successor for a bitmap that is "
-                   "currently frozen");
-        return -1;
-    }
-    assert(!bitmap->successor);
-
-    /* Create an anonymous successor */
-    granularity = bdrv_dirty_bitmap_granularity(bitmap);
-    child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
-    if (!child) {
-        return -1;
-    }
-
-    /* Successor will be on or off based on our current state. */
-    child->disabled = bitmap->disabled;
-
-    /* Install the successor and freeze the parent */
-    bitmap->successor = child;
-    return 0;
-}
-
-/**
- * For a bitmap with a successor, yield our name to the successor,
- * delete the old bitmap, and return a handle to the new bitmap.
- */
-BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
-                                            BdrvDirtyBitmap *bitmap,
-                                            Error **errp)
-{
-    char *name;
-    BdrvDirtyBitmap *successor = bitmap->successor;
-
-    if (successor == NULL) {
-        error_setg(errp, "Cannot relinquish control if "
-                   "there's no successor present");
-        return NULL;
-    }
-
-    name = bitmap->name;
-    bitmap->name = NULL;
-    successor->name = name;
-    bitmap->successor = NULL;
-    bdrv_release_dirty_bitmap(bs, bitmap);
-
-    return successor;
-}
-
-/**
- * In cases of failure where we can no longer safely delete the parent,
- * we may wish to re-join the parent and child/successor.
- * The merged parent will be un-frozen, but not explicitly re-enabled.
- */
-BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
-                                           BdrvDirtyBitmap *parent,
-                                           Error **errp)
-{
-    BdrvDirtyBitmap *successor = parent->successor;
-
-    if (!successor) {
-        error_setg(errp, "Cannot reclaim a successor when none is present");
-        return NULL;
-    }
-
-    if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
-        error_setg(errp, "Merging of parent and successor bitmap failed");
-        return NULL;
-    }
-    bdrv_release_dirty_bitmap(bs, successor);
-    parent->successor = NULL;
-
-    return parent;
-}
-
-/**
- * Truncates _all_ bitmaps attached to a BDS.
- */
-static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
-{
-    BdrvDirtyBitmap *bitmap;
-    uint64_t size = bdrv_nb_sectors(bs);
-
-    QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
-        assert(!bdrv_dirty_bitmap_frozen(bitmap));
-        hbitmap_truncate(bitmap->bitmap, size);
-        bitmap->size = size;
-    }
-}
-
-static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
-                                                  BdrvDirtyBitmap *bitmap,
-                                                  bool only_named)
-{
-    BdrvDirtyBitmap *bm, *next;
-    QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
-        if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) {
-            assert(!bdrv_dirty_bitmap_frozen(bm));
-            QLIST_REMOVE(bm, list);
-            hbitmap_free(bm->bitmap);
-            g_free(bm->name);
-            g_free(bm);
-
-            if (bitmap) {
-                return;
-            }
-        }
-    }
-}
-
-void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
-{
-    bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false);
-}
-
-/**
- * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()).
- * There must not be any frozen bitmaps attached.
- */
-static void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
-{
-    bdrv_do_release_matching_dirty_bitmap(bs, NULL, true);
-}
-
-void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
-{
-    assert(!bdrv_dirty_bitmap_frozen(bitmap));
-    bitmap->disabled = true;
-}
-
-void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
-{
-    assert(!bdrv_dirty_bitmap_frozen(bitmap));
-    bitmap->disabled = false;
-}
-
-BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
-{
-    BdrvDirtyBitmap *bm;
-    BlockDirtyInfoList *list = NULL;
-    BlockDirtyInfoList **plist = &list;
-
-    QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
-        BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
-        BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
-        info->count = bdrv_get_dirty_count(bm);
-        info->granularity = bdrv_dirty_bitmap_granularity(bm);
-        info->has_name = !!bm->name;
-        info->name = g_strdup(bm->name);
-        info->status = bdrv_dirty_bitmap_status(bm);
-        entry->value = info;
-        *plist = entry;
-        plist = &entry->next;
-    }
-
-    return list;
-}
-
-int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
-{
-    if (bitmap) {
-        return hbitmap_get(bitmap->bitmap, sector);
-    } else {
-        return 0;
-    }
-}
-
-/**
- * Chooses a default granularity based on the existing cluster size,
- * but clamped between [4K, 64K]. Defaults to 64K in the case that there
- * is no cluster size information available.
- */
-uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
-{
-    BlockDriverInfo bdi;
-    uint32_t granularity;
-
-    if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
-        granularity = MAX(4096, bdi.cluster_size);
-        granularity = MIN(65536, granularity);
-    } else {
-        granularity = 65536;
-    }
-
-    return granularity;
-}
-
-uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
-{
-    return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
-}
-
-void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
-{
-    hbitmap_iter_init(hbi, bitmap->bitmap, 0);
-}
-
-void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
-                           int64_t cur_sector, int nr_sectors)
-{
-    assert(bdrv_dirty_bitmap_enabled(bitmap));
-    hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
-}
-
-void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
-                             int64_t cur_sector, int nr_sectors)
-{
-    assert(bdrv_dirty_bitmap_enabled(bitmap));
-    hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
-}
-
-void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
-{
-    assert(bdrv_dirty_bitmap_enabled(bitmap));
-    if (!out) {
-        hbitmap_reset_all(bitmap->bitmap);
-    } else {
-        HBitmap *backup = bitmap->bitmap;
-        bitmap->bitmap = hbitmap_alloc(bitmap->size,
-                                       hbitmap_granularity(backup));
-        *out = backup;
-    }
-}
-
-void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
-{
-    HBitmap *tmp = bitmap->bitmap;
-    assert(bdrv_dirty_bitmap_enabled(bitmap));
-    bitmap->bitmap = in;
-    hbitmap_free(tmp);
-}
-
-void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
-                    int nr_sectors)
-{
-    BdrvDirtyBitmap *bitmap;
-    QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
-        if (!bdrv_dirty_bitmap_enabled(bitmap)) {
-            continue;
-        }
-        hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
-    }
-}
-
-/**
- * Advance an HBitmapIter to an arbitrary offset.
- */
-void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
-{
-    assert(hbi->hb);
-    hbitmap_iter_init(hbi, hbi->hb, offset);
-}
-
-int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
-{
-    return hbitmap_count(bitmap->bitmap);
-}
-
 /* Get a reference to bs */
 void bdrv_ref(BlockDriverState *bs)
 {
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 58ef2ef3f2..cdd865597a 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -20,7 +20,7 @@ block-obj-$(CONFIG_RBD) += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
 block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
 block-obj-$(CONFIG_LIBSSH2) += ssh.o
-block-obj-y += accounting.o
+block-obj-y += accounting.o dirty-bitmap.o
 block-obj-y += write-threshold.o
 
 common-obj-y += stream.o
diff --git a/block/backup.c b/block/backup.c
index 0f1b1bc084..ab3e345e92 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -20,6 +20,7 @@
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
 #include "sysemu/block-backend.h"
+#include "qemu/bitmap.h"
 
 #define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
 #define SLICE_TIME 100000000ULL /* ns */
@@ -42,7 +43,7 @@ typedef struct BackupBlockJob {
     BlockdevOnError on_target_error;
     CoRwlock flush_rwlock;
     uint64_t sectors_read;
-    HBitmap *bitmap;
+    unsigned long *done_bitmap;
     int64_t cluster_size;
     QLIST_HEAD(, CowRequest) inflight_reqs;
 } BackupBlockJob;
@@ -116,7 +117,7 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
     cow_request_begin(&cow_request, job, start, end);
 
     for (; start < end; start++) {
-        if (hbitmap_get(job->bitmap, start)) {
+        if (test_bit(start, job->done_bitmap)) {
             trace_backup_do_cow_skip(job, start);
             continue; /* already copied */
         }
@@ -167,7 +168,7 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
             goto out;
         }
 
-        hbitmap_set(job->bitmap, start, 1);
+        set_bit(start, job->done_bitmap);
 
         /* Publish progress, guest I/O counts as progress too.  Note that the
          * offset field is an opaque progress value, it is not a disk offset.
@@ -399,7 +400,7 @@ static void coroutine_fn backup_run(void *opaque)
     start = 0;
     end = DIV_ROUND_UP(job->common.len, job->cluster_size);
 
-    job->bitmap = hbitmap_alloc(end, 0);
+    job->done_bitmap = bitmap_new(end);
 
     bdrv_set_enable_write_cache(target, true);
     if (target->blk) {
@@ -480,7 +481,7 @@ static void coroutine_fn backup_run(void *opaque)
     /* wait until pending backup_do_cow() calls have completed */
     qemu_co_rwlock_wrlock(&job->flush_rwlock);
     qemu_co_rwlock_unlock(&job->flush_rwlock);
-    hbitmap_free(job->bitmap);
+    g_free(job->done_bitmap);
 
     if (target->blk) {
         blk_iostatus_disable(target->blk);
diff --git a/block/block-backend.c b/block/block-backend.c
index ebdf78a11c..03e71b4368 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -50,6 +50,8 @@ struct BlockBackend {
     bool iostatus_enabled;
     BlockDeviceIoStatus iostatus;
 
+    bool allow_write_beyond_eof;
+
     NotifierList remove_bs_notifiers, insert_bs_notifiers;
 };
 
@@ -579,6 +581,11 @@ void blk_iostatus_set_err(BlockBackend *blk, int error)
     }
 }
 
+void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow)
+{
+    blk->allow_write_beyond_eof = allow;
+}
+
 static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
                                   size_t size)
 {
@@ -592,17 +599,19 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
         return -ENOMEDIUM;
     }
 
-    len = blk_getlength(blk);
-    if (len < 0) {
-        return len;
-    }
-
     if (offset < 0) {
         return -EIO;
     }
 
-    if (offset > len || len - offset < size) {
-        return -EIO;
+    if (!blk->allow_write_beyond_eof) {
+        len = blk_getlength(blk);
+        if (len < 0) {
+            return len;
+        }
+
+        if (offset > len || len - offset < size) {
+            return -EIO;
+        }
     }
 
     return 0;
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
new file mode 100644
index 0000000000..556e1d15c4
--- /dev/null
+++ b/block/dirty-bitmap.c
@@ -0,0 +1,387 @@
+/*
+ * Block Dirty Bitmap
+ *
+ * Copyright (c) 2016 Red Hat. Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "config-host.h"
+#include "qemu-common.h"
+#include "trace.h"
+#include "block/block_int.h"
+#include "block/blockjob.h"
+
+/**
+ * A BdrvDirtyBitmap can be in three possible states:
+ * (1) successor is NULL and disabled is false: full r/w mode
+ * (2) successor is NULL and disabled is true: read only mode ("disabled")
+ * (3) successor is set: frozen mode.
+ *     A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
+ *     or enabled. A frozen bitmap can only abdicate() or reclaim().
+ */
+struct BdrvDirtyBitmap {
+    HBitmap *bitmap;            /* Dirty sector bitmap implementation */
+    BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
+    char *name;                 /* Optional non-empty unique ID */
+    int64_t size;               /* Size of the bitmap (Number of sectors) */
+    bool disabled;              /* Bitmap is read-only */
+    QLIST_ENTRY(BdrvDirtyBitmap) list;
+};
+
+BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
+{
+    BdrvDirtyBitmap *bm;
+
+    assert(name);
+    QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
+        if (bm->name && !strcmp(name, bm->name)) {
+            return bm;
+        }
+    }
+    return NULL;
+}
+
+void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
+{
+    assert(!bdrv_dirty_bitmap_frozen(bitmap));
+    g_free(bitmap->name);
+    bitmap->name = NULL;
+}
+
+BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
+                                          uint32_t granularity,
+                                          const char *name,
+                                          Error **errp)
+{
+    int64_t bitmap_size;
+    BdrvDirtyBitmap *bitmap;
+    uint32_t sector_granularity;
+
+    assert((granularity & (granularity - 1)) == 0);
+
+    if (name && bdrv_find_dirty_bitmap(bs, name)) {
+        error_setg(errp, "Bitmap already exists: %s", name);
+        return NULL;
+    }
+    sector_granularity = granularity >> BDRV_SECTOR_BITS;
+    assert(sector_granularity);
+    bitmap_size = bdrv_nb_sectors(bs);
+    if (bitmap_size < 0) {
+        error_setg_errno(errp, -bitmap_size, "could not get length of device");
+        errno = -bitmap_size;
+        return NULL;
+    }
+    bitmap = g_new0(BdrvDirtyBitmap, 1);
+    bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
+    bitmap->size = bitmap_size;
+    bitmap->name = g_strdup(name);
+    bitmap->disabled = false;
+    QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
+    return bitmap;
+}
+
+bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
+{
+    return bitmap->successor;
+}
+
+bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
+{
+    return !(bitmap->disabled || bitmap->successor);
+}
+
+DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
+{
+    if (bdrv_dirty_bitmap_frozen(bitmap)) {
+        return DIRTY_BITMAP_STATUS_FROZEN;
+    } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
+        return DIRTY_BITMAP_STATUS_DISABLED;
+    } else {
+        return DIRTY_BITMAP_STATUS_ACTIVE;
+    }
+}
+
+/**
+ * Create a successor bitmap destined to replace this bitmap after an operation.
+ * Requires that the bitmap is not frozen and has no successor.
+ */
+int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
+                                       BdrvDirtyBitmap *bitmap, Error **errp)
+{
+    uint64_t granularity;
+    BdrvDirtyBitmap *child;
+
+    if (bdrv_dirty_bitmap_frozen(bitmap)) {
+        error_setg(errp, "Cannot create a successor for a bitmap that is "
+                   "currently frozen");
+        return -1;
+    }
+    assert(!bitmap->successor);
+
+    /* Create an anonymous successor */
+    granularity = bdrv_dirty_bitmap_granularity(bitmap);
+    child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
+    if (!child) {
+        return -1;
+    }
+
+    /* Successor will be on or off based on our current state. */
+    child->disabled = bitmap->disabled;
+
+    /* Install the successor and freeze the parent */
+    bitmap->successor = child;
+    return 0;
+}
+
+/**
+ * For a bitmap with a successor, yield our name to the successor,
+ * delete the old bitmap, and return a handle to the new bitmap.
+ */
+BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
+                                            BdrvDirtyBitmap *bitmap,
+                                            Error **errp)
+{
+    char *name;
+    BdrvDirtyBitmap *successor = bitmap->successor;
+
+    if (successor == NULL) {
+        error_setg(errp, "Cannot relinquish control if "
+                   "there's no successor present");
+        return NULL;
+    }
+
+    name = bitmap->name;
+    bitmap->name = NULL;
+    successor->name = name;
+    bitmap->successor = NULL;
+    bdrv_release_dirty_bitmap(bs, bitmap);
+
+    return successor;
+}
+
+/**
+ * In cases of failure where we can no longer safely delete the parent,
+ * we may wish to re-join the parent and child/successor.
+ * The merged parent will be un-frozen, but not explicitly re-enabled.
+ */
+BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
+                                           BdrvDirtyBitmap *parent,
+                                           Error **errp)
+{
+    BdrvDirtyBitmap *successor = parent->successor;
+
+    if (!successor) {
+        error_setg(errp, "Cannot reclaim a successor when none is present");
+        return NULL;
+    }
+
+    if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
+        error_setg(errp, "Merging of parent and successor bitmap failed");
+        return NULL;
+    }
+    bdrv_release_dirty_bitmap(bs, successor);
+    parent->successor = NULL;
+
+    return parent;
+}
+
+/**
+ * Truncates _all_ bitmaps attached to a BDS.
+ */
+void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
+{
+    BdrvDirtyBitmap *bitmap;
+    uint64_t size = bdrv_nb_sectors(bs);
+
+    QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
+        assert(!bdrv_dirty_bitmap_frozen(bitmap));
+        hbitmap_truncate(bitmap->bitmap, size);
+        bitmap->size = size;
+    }
+}
+
+static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
+                                                  BdrvDirtyBitmap *bitmap,
+                                                  bool only_named)
+{
+    BdrvDirtyBitmap *bm, *next;
+    QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
+        if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) {
+            assert(!bdrv_dirty_bitmap_frozen(bm));
+            QLIST_REMOVE(bm, list);
+            hbitmap_free(bm->bitmap);
+            g_free(bm->name);
+            g_free(bm);
+
+            if (bitmap) {
+                return;
+            }
+        }
+    }
+}
+
+void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
+{
+    bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false);
+}
+
+/**
+ * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()).
+ * There must not be any frozen bitmaps attached.
+ */
+void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
+{
+    bdrv_do_release_matching_dirty_bitmap(bs, NULL, true);
+}
+
+void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
+{
+    assert(!bdrv_dirty_bitmap_frozen(bitmap));
+    bitmap->disabled = true;
+}
+
+void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
+{
+    assert(!bdrv_dirty_bitmap_frozen(bitmap));
+    bitmap->disabled = false;
+}
+
+BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
+{
+    BdrvDirtyBitmap *bm;
+    BlockDirtyInfoList *list = NULL;
+    BlockDirtyInfoList **plist = &list;
+
+    QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
+        BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
+        BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
+        info->count = bdrv_get_dirty_count(bm);
+        info->granularity = bdrv_dirty_bitmap_granularity(bm);
+        info->has_name = !!bm->name;
+        info->name = g_strdup(bm->name);
+        info->status = bdrv_dirty_bitmap_status(bm);
+        entry->value = info;
+        *plist = entry;
+        plist = &entry->next;
+    }
+
+    return list;
+}
+
+int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+                   int64_t sector)
+{
+    if (bitmap) {
+        return hbitmap_get(bitmap->bitmap, sector);
+    } else {
+        return 0;
+    }
+}
+
+/**
+ * Chooses a default granularity based on the existing cluster size,
+ * but clamped between [4K, 64K]. Defaults to 64K in the case that there
+ * is no cluster size information available.
+ */
+uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
+{
+    BlockDriverInfo bdi;
+    uint32_t granularity;
+
+    if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
+        granularity = MAX(4096, bdi.cluster_size);
+        granularity = MIN(65536, granularity);
+    } else {
+        granularity = 65536;
+    }
+
+    return granularity;
+}
+
+uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
+{
+    return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
+}
+
+void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
+{
+    hbitmap_iter_init(hbi, bitmap->bitmap, 0);
+}
+
+void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
+                           int64_t cur_sector, int nr_sectors)
+{
+    assert(bdrv_dirty_bitmap_enabled(bitmap));
+    hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
+}
+
+void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
+                             int64_t cur_sector, int nr_sectors)
+{
+    assert(bdrv_dirty_bitmap_enabled(bitmap));
+    hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
+}
+
+void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
+{
+    assert(bdrv_dirty_bitmap_enabled(bitmap));
+    if (!out) {
+        hbitmap_reset_all(bitmap->bitmap);
+    } else {
+        HBitmap *backup = bitmap->bitmap;
+        bitmap->bitmap = hbitmap_alloc(bitmap->size,
+                                       hbitmap_granularity(backup));
+        *out = backup;
+    }
+}
+
+void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
+{
+    HBitmap *tmp = bitmap->bitmap;
+    assert(bdrv_dirty_bitmap_enabled(bitmap));
+    bitmap->bitmap = in;
+    hbitmap_free(tmp);
+}
+
+void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
+                    int nr_sectors)
+{
+    BdrvDirtyBitmap *bitmap;
+    QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
+        if (!bdrv_dirty_bitmap_enabled(bitmap)) {
+            continue;
+        }
+        hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
+    }
+}
+
+/**
+ * Advance an HBitmapIter to an arbitrary offset.
+ */
+void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
+{
+    assert(hbi->hb);
+    hbitmap_iter_init(hbi, hbi->hb, offset);
+}
+
+int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
+{
+    return hbitmap_count(bitmap->bitmap);
+}
diff --git a/block/parallels.c b/block/parallels.c
index 645521d783..0d1a60c972 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -30,6 +30,7 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "block/block_int.h"
+#include "sysemu/block-backend.h"
 #include "qemu/module.h"
 #include "qemu/bitmap.h"
 #include "qapi/util.h"
@@ -461,7 +462,7 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
     int64_t total_size, cl_size;
     uint8_t tmp[BDRV_SECTOR_SIZE];
     Error *local_err = NULL;
-    BlockDriverState *file;
+    BlockBackend *file;
     uint32_t bat_entries, bat_sectors;
     ParallelsHeader header;
     int ret;
@@ -477,14 +478,17 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
         return ret;
     }
 
-    file = NULL;
-    ret = bdrv_open(&file, filename, NULL, NULL,
-                    BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
-    if (ret < 0) {
+    file = blk_new_open("image", filename, NULL, NULL,
+                        BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
+                        &local_err);
+    if (file == NULL) {
         error_propagate(errp, local_err);
-        return ret;
+        return -EIO;
     }
-    ret = bdrv_truncate(file, 0);
+
+    blk_set_allow_write_beyond_eof(file, true);
+
+    ret = blk_truncate(file, 0);
     if (ret < 0) {
         goto exit;
     }
@@ -508,18 +512,18 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
     memset(tmp, 0, sizeof(tmp));
     memcpy(tmp, &header, sizeof(header));
 
-    ret = bdrv_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE);
+    ret = blk_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE);
     if (ret < 0) {
         goto exit;
     }
-    ret = bdrv_write_zeroes(file, 1, bat_sectors - 1, 0);
+    ret = blk_write_zeroes(file, 1, bat_sectors - 1, 0);
     if (ret < 0) {
         goto exit;
     }
     ret = 0;
 
 done:
-    bdrv_unref(file);
+    blk_unref(file);
     return ret;
 
 exit:
diff --git a/block/qapi.c b/block/qapi.c
index db2d3fb915..6a4869a8d9 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -355,100 +355,116 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
     qapi_free_BlockInfo(info);
 }
 
-static BlockStats *bdrv_query_stats(const BlockDriverState *bs,
-                                    bool query_backing)
+static BlockStats *bdrv_query_stats(BlockBackend *blk,
+                                    const BlockDriverState *bs,
+                                    bool query_backing);
+
+static void bdrv_query_blk_stats(BlockStats *s, BlockBackend *blk)
 {
-    BlockStats *s;
+    BlockAcctStats *stats = blk_get_stats(blk);
+    BlockAcctTimedStats *ts = NULL;
 
-    s = g_malloc0(sizeof(*s));
+    s->has_device = true;
+    s->device = g_strdup(blk_name(blk));
 
-    if (bdrv_get_device_name(bs)[0]) {
-        s->has_device = true;
-        s->device = g_strdup(bdrv_get_device_name(bs));
-    }
+    s->stats->rd_bytes = stats->nr_bytes[BLOCK_ACCT_READ];
+    s->stats->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE];
+    s->stats->rd_operations = stats->nr_ops[BLOCK_ACCT_READ];
+    s->stats->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE];
 
-    if (bdrv_get_node_name(bs)[0]) {
-        s->has_node_name = true;
-        s->node_name = g_strdup(bdrv_get_node_name(bs));
+    s->stats->failed_rd_operations = stats->failed_ops[BLOCK_ACCT_READ];
+    s->stats->failed_wr_operations = stats->failed_ops[BLOCK_ACCT_WRITE];
+    s->stats->failed_flush_operations = stats->failed_ops[BLOCK_ACCT_FLUSH];
+
+    s->stats->invalid_rd_operations = stats->invalid_ops[BLOCK_ACCT_READ];
+    s->stats->invalid_wr_operations = stats->invalid_ops[BLOCK_ACCT_WRITE];
+    s->stats->invalid_flush_operations =
+        stats->invalid_ops[BLOCK_ACCT_FLUSH];
+
+    s->stats->rd_merged = stats->merged[BLOCK_ACCT_READ];
+    s->stats->wr_merged = stats->merged[BLOCK_ACCT_WRITE];
+    s->stats->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH];
+    s->stats->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE];
+    s->stats->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ];
+    s->stats->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH];
+
+    s->stats->has_idle_time_ns = stats->last_access_time_ns > 0;
+    if (s->stats->has_idle_time_ns) {
+        s->stats->idle_time_ns = block_acct_idle_time_ns(stats);
     }
 
-    s->stats = g_malloc0(sizeof(*s->stats));
-    if (bs->blk) {
-        BlockAcctStats *stats = blk_get_stats(bs->blk);
-        BlockAcctTimedStats *ts = NULL;
-
-        s->stats->rd_bytes = stats->nr_bytes[BLOCK_ACCT_READ];
-        s->stats->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE];
-        s->stats->rd_operations = stats->nr_ops[BLOCK_ACCT_READ];
-        s->stats->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE];
-
-        s->stats->failed_rd_operations = stats->failed_ops[BLOCK_ACCT_READ];
-        s->stats->failed_wr_operations = stats->failed_ops[BLOCK_ACCT_WRITE];
-        s->stats->failed_flush_operations = stats->failed_ops[BLOCK_ACCT_FLUSH];
-
-        s->stats->invalid_rd_operations = stats->invalid_ops[BLOCK_ACCT_READ];
-        s->stats->invalid_wr_operations = stats->invalid_ops[BLOCK_ACCT_WRITE];
-        s->stats->invalid_flush_operations =
-            stats->invalid_ops[BLOCK_ACCT_FLUSH];
-
-        s->stats->rd_merged = stats->merged[BLOCK_ACCT_READ];
-        s->stats->wr_merged = stats->merged[BLOCK_ACCT_WRITE];
-        s->stats->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH];
-        s->stats->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE];
-        s->stats->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ];
-        s->stats->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH];
-
-        s->stats->has_idle_time_ns = stats->last_access_time_ns > 0;
-        if (s->stats->has_idle_time_ns) {
-            s->stats->idle_time_ns = block_acct_idle_time_ns(stats);
-        }
+    s->stats->account_invalid = stats->account_invalid;
+    s->stats->account_failed = stats->account_failed;
 
-        s->stats->account_invalid = stats->account_invalid;
-        s->stats->account_failed = stats->account_failed;
+    while ((ts = block_acct_interval_next(stats, ts))) {
+        BlockDeviceTimedStatsList *timed_stats =
+            g_malloc0(sizeof(*timed_stats));
+        BlockDeviceTimedStats *dev_stats = g_malloc0(sizeof(*dev_stats));
+        timed_stats->next = s->stats->timed_stats;
+        timed_stats->value = dev_stats;
+        s->stats->timed_stats = timed_stats;
 
-        while ((ts = block_acct_interval_next(stats, ts))) {
-            BlockDeviceTimedStatsList *timed_stats =
-                g_malloc0(sizeof(*timed_stats));
-            BlockDeviceTimedStats *dev_stats = g_malloc0(sizeof(*dev_stats));
-            timed_stats->next = s->stats->timed_stats;
-            timed_stats->value = dev_stats;
-            s->stats->timed_stats = timed_stats;
+        TimedAverage *rd = &ts->latency[BLOCK_ACCT_READ];
+        TimedAverage *wr = &ts->latency[BLOCK_ACCT_WRITE];
+        TimedAverage *fl = &ts->latency[BLOCK_ACCT_FLUSH];
 
-            TimedAverage *rd = &ts->latency[BLOCK_ACCT_READ];
-            TimedAverage *wr = &ts->latency[BLOCK_ACCT_WRITE];
-            TimedAverage *fl = &ts->latency[BLOCK_ACCT_FLUSH];
+        dev_stats->interval_length = ts->interval_length;
 
-            dev_stats->interval_length = ts->interval_length;
+        dev_stats->min_rd_latency_ns = timed_average_min(rd);
+        dev_stats->max_rd_latency_ns = timed_average_max(rd);
+        dev_stats->avg_rd_latency_ns = timed_average_avg(rd);
 
-            dev_stats->min_rd_latency_ns = timed_average_min(rd);
-            dev_stats->max_rd_latency_ns = timed_average_max(rd);
-            dev_stats->avg_rd_latency_ns = timed_average_avg(rd);
+        dev_stats->min_wr_latency_ns = timed_average_min(wr);
+        dev_stats->max_wr_latency_ns = timed_average_max(wr);
+        dev_stats->avg_wr_latency_ns = timed_average_avg(wr);
 
-            dev_stats->min_wr_latency_ns = timed_average_min(wr);
-            dev_stats->max_wr_latency_ns = timed_average_max(wr);
-            dev_stats->avg_wr_latency_ns = timed_average_avg(wr);
+        dev_stats->min_flush_latency_ns = timed_average_min(fl);
+        dev_stats->max_flush_latency_ns = timed_average_max(fl);
+        dev_stats->avg_flush_latency_ns = timed_average_avg(fl);
 
-            dev_stats->min_flush_latency_ns = timed_average_min(fl);
-            dev_stats->max_flush_latency_ns = timed_average_max(fl);
-            dev_stats->avg_flush_latency_ns = timed_average_avg(fl);
+        dev_stats->avg_rd_queue_depth =
+            block_acct_queue_depth(ts, BLOCK_ACCT_READ);
+        dev_stats->avg_wr_queue_depth =
+            block_acct_queue_depth(ts, BLOCK_ACCT_WRITE);
+    }
+}
 
-            dev_stats->avg_rd_queue_depth =
-                block_acct_queue_depth(ts, BLOCK_ACCT_READ);
-            dev_stats->avg_wr_queue_depth =
-                block_acct_queue_depth(ts, BLOCK_ACCT_WRITE);
-        }
+static void bdrv_query_bds_stats(BlockStats *s, const BlockDriverState *bs,
+                                 bool query_backing)
+{
+    if (bdrv_get_node_name(bs)[0]) {
+        s->has_node_name = true;
+        s->node_name = g_strdup(bdrv_get_node_name(bs));
     }
 
     s->stats->wr_highest_offset = bs->wr_highest_offset;
 
     if (bs->file) {
         s->has_parent = true;
-        s->parent = bdrv_query_stats(bs->file->bs, query_backing);
+        s->parent = bdrv_query_stats(NULL, bs->file->bs, query_backing);
     }
 
     if (query_backing && bs->backing) {
         s->has_backing = true;
-        s->backing = bdrv_query_stats(bs->backing->bs, query_backing);
+        s->backing = bdrv_query_stats(NULL, bs->backing->bs, query_backing);
+    }
+
+}
+
+static BlockStats *bdrv_query_stats(BlockBackend *blk,
+                                    const BlockDriverState *bs,
+                                    bool query_backing)
+{
+    BlockStats *s;
+
+    s = g_malloc0(sizeof(*s));
+    s->stats = g_malloc0(sizeof(*s->stats));
+
+    if (blk) {
+        bdrv_query_blk_stats(s, blk);
+    }
+    if (bs) {
+        bdrv_query_bds_stats(s, bs, query_backing);
     }
 
     return s;
@@ -477,22 +493,38 @@ BlockInfoList *qmp_query_block(Error **errp)
     return head;
 }
 
+static bool next_query_bds(BlockBackend **blk, BlockDriverState **bs,
+                           bool query_nodes)
+{
+    if (query_nodes) {
+        *bs = bdrv_next_node(*bs);
+        return !!*bs;
+    }
+
+    *blk = blk_next(*blk);
+    *bs = *blk ? blk_bs(*blk) : NULL;
+
+    return !!*blk;
+}
+
 BlockStatsList *qmp_query_blockstats(bool has_query_nodes,
                                      bool query_nodes,
                                      Error **errp)
 {
     BlockStatsList *head = NULL, **p_next = &head;
+    BlockBackend *blk = NULL;
     BlockDriverState *bs = NULL;
 
     /* Just to be safe if query_nodes is not always initialized */
     query_nodes = has_query_nodes && query_nodes;
 
-    while ((bs = query_nodes ? bdrv_next_node(bs) : bdrv_next(bs))) {
+    while (next_query_bds(&blk, &bs, query_nodes)) {
         BlockStatsList *info = g_malloc0(sizeof(*info));
-        AioContext *ctx = bdrv_get_aio_context(bs);
+        AioContext *ctx = blk ? blk_get_aio_context(blk)
+                              : bdrv_get_aio_context(bs);
 
         aio_context_acquire(ctx);
-        info->value = bdrv_query_stats(bs, !query_nodes);
+        info->value = bdrv_query_stats(blk, bs, !query_nodes);
         aio_context_release(ctx);
 
         *p_next = info;
diff --git a/block/qcow.c b/block/qcow.c
index 251910cc9d..2fd5ee65d4 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -24,6 +24,7 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "block/block_int.h"
+#include "sysemu/block-backend.h"
 #include "qemu/module.h"
 #include <zlib.h>
 #include "qapi/qmp/qerror.h"
@@ -780,7 +781,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
     int flags = 0;
     Error *local_err = NULL;
     int ret;
-    BlockDriverState *qcow_bs;
+    BlockBackend *qcow_blk;
 
     /* Read out options */
     total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
@@ -796,15 +797,18 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
         goto cleanup;
     }
 
-    qcow_bs = NULL;
-    ret = bdrv_open(&qcow_bs, filename, NULL, NULL,
-                    BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
-    if (ret < 0) {
+    qcow_blk = blk_new_open("image", filename, NULL, NULL,
+                            BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
+                            &local_err);
+    if (qcow_blk == NULL) {
         error_propagate(errp, local_err);
+        ret = -EIO;
         goto cleanup;
     }
 
-    ret = bdrv_truncate(qcow_bs, 0);
+    blk_set_allow_write_beyond_eof(qcow_blk, true);
+
+    ret = blk_truncate(qcow_blk, 0);
     if (ret < 0) {
         goto exit;
     }
@@ -844,13 +848,13 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
     }
 
     /* write all the data */
-    ret = bdrv_pwrite(qcow_bs, 0, &header, sizeof(header));
+    ret = blk_pwrite(qcow_blk, 0, &header, sizeof(header));
     if (ret != sizeof(header)) {
         goto exit;
     }
 
     if (backing_file) {
-        ret = bdrv_pwrite(qcow_bs, sizeof(header),
+        ret = blk_pwrite(qcow_blk, sizeof(header),
             backing_file, backing_filename_len);
         if (ret != backing_filename_len) {
             goto exit;
@@ -860,7 +864,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
     tmp = g_malloc0(BDRV_SECTOR_SIZE);
     for (i = 0; i < ((sizeof(uint64_t)*l1_size + BDRV_SECTOR_SIZE - 1)/
         BDRV_SECTOR_SIZE); i++) {
-        ret = bdrv_pwrite(qcow_bs, header_size +
+        ret = blk_pwrite(qcow_blk, header_size +
             BDRV_SECTOR_SIZE*i, tmp, BDRV_SECTOR_SIZE);
         if (ret != BDRV_SECTOR_SIZE) {
             g_free(tmp);
@@ -871,7 +875,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
     g_free(tmp);
     ret = 0;
 exit:
-    bdrv_unref(qcow_bs);
+    blk_unref(qcow_blk);
 cleanup:
     g_free(backing_file);
     return ret;
diff --git a/block/qcow2.c b/block/qcow2.c
index 8babecdab2..1ce6264011 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -24,6 +24,7 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "block/block_int.h"
+#include "sysemu/block-backend.h"
 #include "qemu/module.h"
 #include <zlib.h>
 #include "block/qcow2.h"
@@ -2097,7 +2098,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
      * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file
      * size for any qcow2 image.
      */
-    BlockDriverState* bs;
+    BlockBackend *blk;
     QCowHeader *header;
     uint64_t* refcount_table;
     Error *local_err = NULL;
@@ -2172,14 +2173,16 @@ static int qcow2_create2(const char *filename, int64_t total_size,
         return ret;
     }
 
-    bs = NULL;
-    ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
-                    &local_err);
-    if (ret < 0) {
+    blk = blk_new_open("image", filename, NULL, NULL,
+                       BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
+                       &local_err);
+    if (blk == NULL) {
         error_propagate(errp, local_err);
-        return ret;
+        return -EIO;
     }
 
+    blk_set_allow_write_beyond_eof(blk, true);
+
     /* Write the header */
     QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header));
     header = g_malloc0(cluster_size);
@@ -2207,7 +2210,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
             cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
     }
 
-    ret = bdrv_pwrite(bs, 0, header, cluster_size);
+    ret = blk_pwrite(blk, 0, header, cluster_size);
     g_free(header);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Could not write qcow2 header");
@@ -2217,7 +2220,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
     /* Write a refcount table with one refcount block */
     refcount_table = g_malloc0(2 * cluster_size);
     refcount_table[0] = cpu_to_be64(2 * cluster_size);
-    ret = bdrv_pwrite(bs, cluster_size, refcount_table, 2 * cluster_size);
+    ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size);
     g_free(refcount_table);
 
     if (ret < 0) {
@@ -2225,8 +2228,8 @@ static int qcow2_create2(const char *filename, int64_t total_size,
         goto out;
     }
 
-    bdrv_unref(bs);
-    bs = NULL;
+    blk_unref(blk);
+    blk = NULL;
 
     /*
      * And now open the image and make it consistent first (i.e. increase the
@@ -2235,15 +2238,16 @@ static int qcow2_create2(const char *filename, int64_t total_size,
      */
     options = qdict_new();
     qdict_put(options, "driver", qstring_from_str("qcow2"));
-    ret = bdrv_open(&bs, filename, NULL, options,
-                    BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH,
-                    &local_err);
-    if (ret < 0) {
+    blk = blk_new_open("image-qcow2", filename, NULL, options,
+                       BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH,
+                       &local_err);
+    if (blk == NULL) {
         error_propagate(errp, local_err);
+        ret = -EIO;
         goto out;
     }
 
-    ret = qcow2_alloc_clusters(bs, 3 * cluster_size);
+    ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 "
                          "header and refcount table");
@@ -2255,14 +2259,14 @@ static int qcow2_create2(const char *filename, int64_t total_size,
     }
 
     /* Create a full header (including things like feature table) */
-    ret = qcow2_update_header(bs);
+    ret = qcow2_update_header(blk_bs(blk));
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Could not update qcow2 header");
         goto out;
     }
 
     /* Okay, now that we have a valid image, let's give it the right size */
-    ret = bdrv_truncate(bs, total_size);
+    ret = blk_truncate(blk, total_size);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Could not resize image");
         goto out;
@@ -2270,7 +2274,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
 
     /* Want a backing file? There you go.*/
     if (backing_file) {
-        ret = bdrv_change_backing_file(bs, backing_file, backing_format);
+        ret = bdrv_change_backing_file(blk_bs(blk), backing_file, backing_format);
         if (ret < 0) {
             error_setg_errno(errp, -ret, "Could not assign backing file '%s' "
                              "with format '%s'", backing_file, backing_format);
@@ -2280,9 +2284,9 @@ static int qcow2_create2(const char *filename, int64_t total_size,
 
     /* And if we're supposed to preallocate metadata, do that now */
     if (prealloc != PREALLOC_MODE_OFF) {
-        BDRVQcow2State *s = bs->opaque;
+        BDRVQcow2State *s = blk_bs(blk)->opaque;
         qemu_co_mutex_lock(&s->lock);
-        ret = preallocate(bs);
+        ret = preallocate(blk_bs(blk));
         qemu_co_mutex_unlock(&s->lock);
         if (ret < 0) {
             error_setg_errno(errp, -ret, "Could not preallocate metadata");
@@ -2290,24 +2294,25 @@ static int qcow2_create2(const char *filename, int64_t total_size,
         }
     }
 
-    bdrv_unref(bs);
-    bs = NULL;
+    blk_unref(blk);
+    blk = NULL;
 
     /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning */
     options = qdict_new();
     qdict_put(options, "driver", qstring_from_str("qcow2"));
-    ret = bdrv_open(&bs, filename, NULL, options,
-                    BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_BACKING,
-                    &local_err);
-    if (local_err) {
+    blk = blk_new_open("image-flush", filename, NULL, options,
+                       BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_BACKING,
+                       &local_err);
+    if (blk == NULL) {
         error_propagate(errp, local_err);
+        ret = -EIO;
         goto out;
     }
 
     ret = 0;
 out:
-    if (bs) {
-        bdrv_unref(bs);
+    if (blk) {
+        blk_unref(blk);
     }
     return ret;
 }
diff --git a/block/qed.c b/block/qed.c
index 404be1e9b9..8de7dd0832 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -18,6 +18,7 @@
 #include "qed.h"
 #include "qapi/qmp/qerror.h"
 #include "migration/migration.h"
+#include "sysemu/block-backend.h"
 
 static const AIOCBInfo qed_aiocb_info = {
     .aiocb_size         = sizeof(QEDAIOCB),
@@ -580,7 +581,7 @@ static int qed_create(const char *filename, uint32_t cluster_size,
     size_t l1_size = header.cluster_size * header.table_size;
     Error *local_err = NULL;
     int ret = 0;
-    BlockDriverState *bs;
+    BlockBackend *blk;
 
     ret = bdrv_create_file(filename, opts, &local_err);
     if (ret < 0) {
@@ -588,17 +589,18 @@ static int qed_create(const char *filename, uint32_t cluster_size,
         return ret;
     }
 
-    bs = NULL;
-    ret = bdrv_open(&bs, filename, NULL, NULL,
-                    BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
-                    &local_err);
-    if (ret < 0) {
+    blk = blk_new_open("image", filename, NULL, NULL,
+                       BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
+                       &local_err);
+    if (blk == NULL) {
         error_propagate(errp, local_err);
-        return ret;
+        return -EIO;
     }
 
+    blk_set_allow_write_beyond_eof(blk, true);
+
     /* File must start empty and grow, check truncate is supported */
-    ret = bdrv_truncate(bs, 0);
+    ret = blk_truncate(blk, 0);
     if (ret < 0) {
         goto out;
     }
@@ -614,18 +616,18 @@ static int qed_create(const char *filename, uint32_t cluster_size,
     }
 
     qed_header_cpu_to_le(&header, &le_header);
-    ret = bdrv_pwrite(bs, 0, &le_header, sizeof(le_header));
+    ret = blk_pwrite(blk, 0, &le_header, sizeof(le_header));
     if (ret < 0) {
         goto out;
     }
-    ret = bdrv_pwrite(bs, sizeof(le_header), backing_file,
-                      header.backing_filename_size);
+    ret = blk_pwrite(blk, sizeof(le_header), backing_file,
+                     header.backing_filename_size);
     if (ret < 0) {
         goto out;
     }
 
     l1_table = g_malloc0(l1_size);
-    ret = bdrv_pwrite(bs, header.l1_table_offset, l1_table, l1_size);
+    ret = blk_pwrite(blk, header.l1_table_offset, l1_table, l1_size);
     if (ret < 0) {
         goto out;
     }
@@ -633,7 +635,7 @@ static int qed_create(const char *filename, uint32_t cluster_size,
     ret = 0; /* success */
 out:
     g_free(l1_table);
-    bdrv_unref(bs);
+    blk_unref(blk);
     return ret;
 }
 
diff --git a/block/quorum.c b/block/quorum.c
index 11cc60b713..3d473515a8 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -215,14 +215,16 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
     return acb;
 }
 
-static void quorum_report_bad(QuorumAIOCB *acb, char *node_name, int ret)
+static void quorum_report_bad(QuorumOpType type, uint64_t sector_num,
+                              int nb_sectors, char *node_name, int ret)
 {
     const char *msg = NULL;
     if (ret < 0) {
         msg = strerror(-ret);
     }
-    qapi_event_send_quorum_report_bad(!!msg, msg, node_name,
-                                      acb->sector_num, acb->nb_sectors, &error_abort);
+
+    qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name,
+                                      sector_num, nb_sectors, &error_abort);
 }
 
 static void quorum_report_failure(QuorumAIOCB *acb)
@@ -282,6 +284,7 @@ static void quorum_aio_cb(void *opaque, int ret)
     QuorumChildRequest *sacb = opaque;
     QuorumAIOCB *acb = sacb->parent;
     BDRVQuorumState *s = acb->common.bs->opaque;
+    QuorumOpType type;
     bool rewrite = false;
 
     if (acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO) {
@@ -300,12 +303,14 @@ static void quorum_aio_cb(void *opaque, int ret)
         return;
     }
 
+    type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE;
     sacb->ret = ret;
     acb->count++;
     if (ret == 0) {
         acb->success_count++;
     } else {
-        quorum_report_bad(acb, sacb->aiocb->bs->node_name, ret);
+        quorum_report_bad(type, acb->sector_num, acb->nb_sectors,
+                          sacb->aiocb->bs->node_name, ret);
     }
     assert(acb->count <= s->num_children);
     assert(acb->success_count <= s->num_children);
@@ -338,7 +343,9 @@ static void quorum_report_bad_versions(BDRVQuorumState *s,
             continue;
         }
         QLIST_FOREACH(item, &version->items, next) {
-            quorum_report_bad(acb, s->children[item->index]->bs->node_name, 0);
+            quorum_report_bad(QUORUM_OP_TYPE_READ, acb->sector_num,
+                              acb->nb_sectors,
+                              s->children[item->index]->bs->node_name, 0);
         }
     }
 }
@@ -648,8 +655,9 @@ static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb)
     }
 
     for (i = 0; i < s->num_children; i++) {
-        bdrv_aio_readv(s->children[i]->bs, acb->sector_num, &acb->qcrs[i].qiov,
-                       acb->nb_sectors, quorum_aio_cb, &acb->qcrs[i]);
+        acb->qcrs[i].aiocb = bdrv_aio_readv(s->children[i]->bs, acb->sector_num,
+                                            &acb->qcrs[i].qiov, acb->nb_sectors,
+                                            quorum_aio_cb, &acb->qcrs[i]);
     }
 
     return &acb->common;
@@ -664,9 +672,10 @@ static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb)
     qemu_iovec_init(&acb->qcrs[acb->child_iter].qiov, acb->qiov->niov);
     qemu_iovec_clone(&acb->qcrs[acb->child_iter].qiov, acb->qiov,
                      acb->qcrs[acb->child_iter].buf);
-    bdrv_aio_readv(s->children[acb->child_iter]->bs, acb->sector_num,
-                   &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors,
-                   quorum_aio_cb, &acb->qcrs[acb->child_iter]);
+    acb->qcrs[acb->child_iter].aiocb =
+        bdrv_aio_readv(s->children[acb->child_iter]->bs, acb->sector_num,
+                       &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors,
+                       quorum_aio_cb, &acb->qcrs[acb->child_iter]);
 
     return &acb->common;
 }
@@ -760,19 +769,30 @@ static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
     QuorumVoteValue result_value;
     int i;
     int result = 0;
+    int success_count = 0;
 
     QLIST_INIT(&error_votes.vote_list);
     error_votes.compare = quorum_64bits_compare;
 
     for (i = 0; i < s->num_children; i++) {
         result = bdrv_co_flush(s->children[i]->bs);
-        result_value.l = result;
-        quorum_count_vote(&error_votes, &result_value, i);
+        if (result) {
+            quorum_report_bad(QUORUM_OP_TYPE_FLUSH, 0,
+                              bdrv_nb_sectors(s->children[i]->bs),
+                              s->children[i]->bs->node_name, result);
+            result_value.l = result;
+            quorum_count_vote(&error_votes, &result_value, i);
+        } else {
+            success_count++;
+        }
     }
 
-    winner = quorum_get_vote_winner(&error_votes);
-    result = winner->value.l;
-
+    if (success_count >= s->threshold) {
+        result = 0;
+    } else {
+        winner = quorum_get_vote_winner(&error_votes);
+        result = winner->value.l;
+    }
     quorum_free_vote_list(&error_votes);
 
     return result;
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 05677ed983..a6e98a5a72 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -18,6 +18,7 @@
 #include "qemu/error-report.h"
 #include "qemu/sockets.h"
 #include "block/block_int.h"
+#include "sysemu/block-backend.h"
 #include "qemu/bitops.h"
 
 #define SD_PROTO_VER 0x01
@@ -1636,7 +1637,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,
 
 static int sd_prealloc(const char *filename, Error **errp)
 {
-    BlockDriverState *bs = NULL;
+    BlockBackend *blk = NULL;
     BDRVSheepdogState *base = NULL;
     unsigned long buf_size;
     uint32_t idx, max_idx;
@@ -1645,19 +1646,23 @@ static int sd_prealloc(const char *filename, Error **errp)
     void *buf = NULL;
     int ret;
 
-    ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
-                    errp);
-    if (ret < 0) {
+    blk = blk_new_open("image-prealloc", filename, NULL, NULL,
+                       BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
+                       errp);
+    if (blk == NULL) {
+        ret = -EIO;
         goto out_with_err_set;
     }
 
-    vdi_size = bdrv_getlength(bs);
+    blk_set_allow_write_beyond_eof(blk, true);
+
+    vdi_size = blk_getlength(blk);
     if (vdi_size < 0) {
         ret = vdi_size;
         goto out;
     }
 
-    base = bs->opaque;
+    base = blk_bs(blk)->opaque;
     object_size = (UINT32_C(1) << base->inode.block_size_shift);
     buf_size = MIN(object_size, SD_DATA_OBJ_SIZE);
     buf = g_malloc0(buf_size);
@@ -1669,23 +1674,24 @@ static int sd_prealloc(const char *filename, Error **errp)
          * The created image can be a cloned image, so we need to read
          * a data from the source image.
          */
-        ret = bdrv_pread(bs, idx * buf_size, buf, buf_size);
+        ret = blk_pread(blk, idx * buf_size, buf, buf_size);
         if (ret < 0) {
             goto out;
         }
-        ret = bdrv_pwrite(bs, idx * buf_size, buf, buf_size);
+        ret = blk_pwrite(blk, idx * buf_size, buf, buf_size);
         if (ret < 0) {
             goto out;
         }
     }
 
+    ret = 0;
 out:
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Can't pre-allocate");
     }
 out_with_err_set:
-    if (bs) {
-        bdrv_unref(bs);
+    if (blk) {
+        blk_unref(blk);
     }
     g_free(buf);
 
@@ -1825,7 +1831,7 @@ static int sd_create(const char *filename, QemuOpts *opts,
     }
 
     if (backing_file) {
-        BlockDriverState *bs;
+        BlockBackend *blk;
         BDRVSheepdogState *base;
         BlockDriver *drv;
 
@@ -1837,22 +1843,23 @@ static int sd_create(const char *filename, QemuOpts *opts,
             goto out;
         }
 
-        bs = NULL;
-        ret = bdrv_open(&bs, backing_file, NULL, NULL, BDRV_O_PROTOCOL, errp);
-        if (ret < 0) {
+        blk = blk_new_open("backing", backing_file, NULL, NULL,
+                           BDRV_O_PROTOCOL | BDRV_O_CACHE_WB, errp);
+        if (blk == NULL) {
+            ret = -EIO;
             goto out;
         }
 
-        base = bs->opaque;
+        base = blk_bs(blk)->opaque;
 
         if (!is_snapshot(&base->inode)) {
             error_setg(errp, "cannot clone from a non snapshot vdi");
-            bdrv_unref(bs);
+            blk_unref(blk);
             ret = -EINVAL;
             goto out;
         }
         s->inode.vdi_id = base->inode.vdi_id;
-        bdrv_unref(bs);
+        blk_unref(blk);
     }
 
     s->aio_context = qemu_get_aio_context();
diff --git a/block/vdi.c b/block/vdi.c
index b403243604..662d14b74e 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -52,6 +52,7 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "block/block_int.h"
+#include "sysemu/block-backend.h"
 #include "qemu/module.h"
 #include "migration/migration.h"
 #include "qemu/coroutine.h"
@@ -733,7 +734,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
     size_t bmap_size;
     int64_t offset = 0;
     Error *local_err = NULL;
-    BlockDriverState *bs = NULL;
+    BlockBackend *blk = NULL;
     uint32_t *bmap = NULL;
 
     logout("\n");
@@ -766,13 +767,18 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
         error_propagate(errp, local_err);
         goto exit;
     }
-    ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
-                    &local_err);
-    if (ret < 0) {
+
+    blk = blk_new_open("image", filename, NULL, NULL,
+                       BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
+                       &local_err);
+    if (blk == NULL) {
         error_propagate(errp, local_err);
+        ret = -EIO;
         goto exit;
     }
 
+    blk_set_allow_write_beyond_eof(blk, true);
+
     /* We need enough blocks to store the given disk size,
        so always round up. */
     blocks = DIV_ROUND_UP(bytes, block_size);
@@ -802,7 +808,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
     vdi_header_print(&header);
 #endif
     vdi_header_to_le(&header);
-    ret = bdrv_pwrite_sync(bs, offset, &header, sizeof(header));
+    ret = blk_pwrite(blk, offset, &header, sizeof(header));
     if (ret < 0) {
         error_setg(errp, "Error writing header to %s", filename);
         goto exit;
@@ -823,7 +829,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
                 bmap[i] = VDI_UNALLOCATED;
             }
         }
-        ret = bdrv_pwrite_sync(bs, offset, bmap, bmap_size);
+        ret = blk_pwrite(blk, offset, bmap, bmap_size);
         if (ret < 0) {
             error_setg(errp, "Error writing bmap to %s", filename);
             goto exit;
@@ -832,7 +838,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
     }
 
     if (image_type == VDI_TYPE_STATIC) {
-        ret = bdrv_truncate(bs, offset + blocks * block_size);
+        ret = blk_truncate(blk, offset + blocks * block_size);
         if (ret < 0) {
             error_setg(errp, "Failed to statically allocate %s", filename);
             goto exit;
@@ -840,7 +846,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
     }
 
 exit:
-    bdrv_unref(bs);
+    blk_unref(blk);
     g_free(bmap);
     return ret;
 }
diff --git a/block/vhdx.c b/block/vhdx.c
index 9a51428317..e15020c9be 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -18,6 +18,7 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "block/block_int.h"
+#include "sysemu/block-backend.h"
 #include "qemu/module.h"
 #include "qemu/crc32c.h"
 #include "block/vhdx.h"
@@ -1772,7 +1773,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
 
     gunichar2 *creator = NULL;
     glong creator_items;
-    BlockDriverState *bs;
+    BlockBackend *blk;
     char *type = NULL;
     VHDXImageType image_type;
     Error *local_err = NULL;
@@ -1837,14 +1838,17 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
         goto exit;
     }
 
-    bs = NULL;
-    ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
-                    &local_err);
-    if (ret < 0) {
+    blk = blk_new_open("image", filename, NULL, NULL,
+                       BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
+                       &local_err);
+    if (blk == NULL) {
         error_propagate(errp, local_err);
+        ret = -EIO;
         goto exit;
     }
 
+    blk_set_allow_write_beyond_eof(blk, true);
+
     /* Create (A) */
 
     /* The creator field is optional, but may be useful for
@@ -1852,13 +1856,13 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
     creator = g_utf8_to_utf16("QEMU v" QEMU_VERSION, -1, NULL,
                               &creator_items, NULL);
     signature = cpu_to_le64(VHDX_FILE_SIGNATURE);
-    ret = bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature));
+    ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature));
     if (ret < 0) {
         goto delete_and_exit;
     }
     if (creator) {
-        ret = bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET + sizeof(signature),
-                          creator, creator_items * sizeof(gunichar2));
+        ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET + sizeof(signature),
+                         creator, creator_items * sizeof(gunichar2));
         if (ret < 0) {
             goto delete_and_exit;
         }
@@ -1866,13 +1870,13 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
 
 
     /* Creates (B),(C) */
-    ret = vhdx_create_new_headers(bs, image_size, log_size);
+    ret = vhdx_create_new_headers(blk_bs(blk), image_size, log_size);
     if (ret < 0) {
         goto delete_and_exit;
     }
 
     /* Creates (D),(E),(G) explicitly. (F) created as by-product */
-    ret = vhdx_create_new_region_table(bs, image_size, block_size, 512,
+    ret = vhdx_create_new_region_table(blk_bs(blk), image_size, block_size, 512,
                                        log_size, use_zero_blocks, image_type,
                                        &metadata_offset);
     if (ret < 0) {
@@ -1880,7 +1884,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
     }
 
     /* Creates (H) */
-    ret = vhdx_create_new_metadata(bs, image_size, block_size, 512,
+    ret = vhdx_create_new_metadata(blk_bs(blk), image_size, block_size, 512,
                                    metadata_offset, image_type);
     if (ret < 0) {
         goto delete_and_exit;
@@ -1888,7 +1892,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
 
 
 delete_and_exit:
-    bdrv_unref(bs);
+    blk_unref(blk);
 exit:
     g_free(type);
     g_free(creator);
diff --git a/block/vmdk.c b/block/vmdk.c
index a8db5d9ec2..23bd57e20e 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -26,6 +26,7 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "block/block_int.h"
+#include "sysemu/block-backend.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/error-report.h"
 #include "qemu/module.h"
@@ -242,15 +243,17 @@ static void vmdk_free_last_extent(BlockDriverState *bs)
 
 static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
 {
-    char desc[DESC_SIZE];
+    char *desc;
     uint32_t cid = 0xffffffff;
     const char *p_name, *cid_str;
     size_t cid_str_size;
     BDRVVmdkState *s = bs->opaque;
     int ret;
 
+    desc = g_malloc0(DESC_SIZE);
     ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
     if (ret < 0) {
+        g_free(desc);
         return 0;
     }
 
@@ -269,41 +272,45 @@ static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
         sscanf(p_name, "%" SCNx32, &cid);
     }
 
+    g_free(desc);
     return cid;
 }
 
 static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
 {
-    char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
+    char *desc, *tmp_desc;
     char *p_name, *tmp_str;
     BDRVVmdkState *s = bs->opaque;
-    int ret;
+    int ret = 0;
 
+    desc = g_malloc0(DESC_SIZE);
+    tmp_desc = g_malloc0(DESC_SIZE);
     ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
     if (ret < 0) {
-        return ret;
+        goto out;
     }
 
     desc[DESC_SIZE - 1] = '\0';
     tmp_str = strstr(desc, "parentCID");
     if (tmp_str == NULL) {
-        return -EINVAL;
+        ret = -EINVAL;
+        goto out;
     }
 
-    pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str);
+    pstrcpy(tmp_desc, DESC_SIZE, tmp_str);
     p_name = strstr(desc, "CID");
     if (p_name != NULL) {
         p_name += sizeof("CID");
-        snprintf(p_name, sizeof(desc) - (p_name - desc), "%" PRIx32 "\n", cid);
-        pstrcat(desc, sizeof(desc), tmp_desc);
+        snprintf(p_name, DESC_SIZE - (p_name - desc), "%" PRIx32 "\n", cid);
+        pstrcat(desc, DESC_SIZE, tmp_desc);
     }
 
     ret = bdrv_pwrite_sync(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
-    if (ret < 0) {
-        return ret;
-    }
 
-    return 0;
+out:
+    g_free(desc);
+    g_free(tmp_desc);
+    return ret;
 }
 
 static int vmdk_is_cid_valid(BlockDriverState *bs)
@@ -337,15 +344,16 @@ static int vmdk_reopen_prepare(BDRVReopenState *state,
 static int vmdk_parent_open(BlockDriverState *bs)
 {
     char *p_name;
-    char desc[DESC_SIZE + 1];
+    char *desc;
     BDRVVmdkState *s = bs->opaque;
     int ret;
 
-    desc[DESC_SIZE] = '\0';
+    desc = g_malloc0(DESC_SIZE + 1);
     ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
     if (ret < 0) {
-        return ret;
+        goto out;
     }
+    ret = 0;
 
     p_name = strstr(desc, "parentFileNameHint");
     if (p_name != NULL) {
@@ -354,16 +362,20 @@ static int vmdk_parent_open(BlockDriverState *bs)
         p_name += sizeof("parentFileNameHint") + 1;
         end_name = strchr(p_name, '\"');
         if (end_name == NULL) {
-            return -EINVAL;
+            ret = -EINVAL;
+            goto out;
         }
         if ((end_name - p_name) > sizeof(bs->backing_file) - 1) {
-            return -EINVAL;
+            ret = -EINVAL;
+            goto out;
         }
 
         pstrcpy(bs->backing_file, end_name - p_name + 1, p_name);
     }
 
-    return 0;
+out:
+    g_free(desc);
+    return ret;
 }
 
 /* Create and append extent to the extent array. Return the added VmdkExtent
@@ -1639,7 +1651,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
                               QemuOpts *opts, Error **errp)
 {
     int ret, i;
-    BlockDriverState *bs = NULL;
+    BlockBackend *blk = NULL;
     VMDK4Header header;
     Error *local_err = NULL;
     uint32_t tmp, magic, grains, gd_sectors, gt_size, gt_count;
@@ -1652,16 +1664,19 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
         goto exit;
     }
 
-    assert(bs == NULL);
-    ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
-                    &local_err);
-    if (ret < 0) {
+    blk = blk_new_open("extent", filename, NULL, NULL,
+                       BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
+                       &local_err);
+    if (blk == NULL) {
         error_propagate(errp, local_err);
+        ret = -EIO;
         goto exit;
     }
 
+    blk_set_allow_write_beyond_eof(blk, true);
+
     if (flat) {
-        ret = bdrv_truncate(bs, filesize);
+        ret = blk_truncate(blk, filesize);
         if (ret < 0) {
             error_setg_errno(errp, -ret, "Could not truncate file");
         }
@@ -1716,18 +1731,18 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
     header.check_bytes[3] = 0xa;
 
     /* write all the data */
-    ret = bdrv_pwrite(bs, 0, &magic, sizeof(magic));
+    ret = blk_pwrite(blk, 0, &magic, sizeof(magic));
     if (ret < 0) {
         error_setg(errp, QERR_IO_ERROR);
         goto exit;
     }
-    ret = bdrv_pwrite(bs, sizeof(magic), &header, sizeof(header));
+    ret = blk_pwrite(blk, sizeof(magic), &header, sizeof(header));
     if (ret < 0) {
         error_setg(errp, QERR_IO_ERROR);
         goto exit;
     }
 
-    ret = bdrv_truncate(bs, le64_to_cpu(header.grain_offset) << 9);
+    ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Could not truncate file");
         goto exit;
@@ -1740,8 +1755,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
          i < gt_count; i++, tmp += gt_size) {
         gd_buf[i] = cpu_to_le32(tmp);
     }
-    ret = bdrv_pwrite(bs, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE,
-                      gd_buf, gd_buf_size);
+    ret = blk_pwrite(blk, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE,
+                     gd_buf, gd_buf_size);
     if (ret < 0) {
         error_setg(errp, QERR_IO_ERROR);
         goto exit;
@@ -1752,8 +1767,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
          i < gt_count; i++, tmp += gt_size) {
         gd_buf[i] = cpu_to_le32(tmp);
     }
-    ret = bdrv_pwrite(bs, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE,
-                      gd_buf, gd_buf_size);
+    ret = blk_pwrite(blk, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE,
+                     gd_buf, gd_buf_size);
     if (ret < 0) {
         error_setg(errp, QERR_IO_ERROR);
         goto exit;
@@ -1761,8 +1776,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
 
     ret = 0;
 exit:
-    if (bs) {
-        bdrv_unref(bs);
+    if (blk) {
+        blk_unref(blk);
     }
     g_free(gd_buf);
     return ret;
@@ -1811,7 +1826,7 @@ static int filename_decompose(const char *filename, char *path, char *prefix,
 static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
 {
     int idx = 0;
-    BlockDriverState *new_bs = NULL;
+    BlockBackend *new_blk = NULL;
     Error *local_err = NULL;
     char *desc = NULL;
     int64_t total_size = 0, filesize;
@@ -1922,7 +1937,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
         goto exit;
     }
     if (backing_file) {
-        BlockDriverState *bs = NULL;
+        BlockBackend *blk;
         char *full_backing = g_new0(char, PATH_MAX);
         bdrv_get_full_backing_filename_from_filename(filename, backing_file,
                                                      full_backing, PATH_MAX,
@@ -1933,18 +1948,21 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
             ret = -ENOENT;
             goto exit;
         }
-        ret = bdrv_open(&bs, full_backing, NULL, NULL, BDRV_O_NO_BACKING, errp);
+
+        blk = blk_new_open("backing", full_backing, NULL, NULL,
+                           BDRV_O_NO_BACKING | BDRV_O_CACHE_WB, errp);
         g_free(full_backing);
-        if (ret != 0) {
+        if (blk == NULL) {
+            ret = -EIO;
             goto exit;
         }
-        if (strcmp(bs->drv->format_name, "vmdk")) {
-            bdrv_unref(bs);
+        if (strcmp(blk_bs(blk)->drv->format_name, "vmdk")) {
+            blk_unref(blk);
             ret = -EINVAL;
             goto exit;
         }
-        parent_cid = vmdk_read_cid(bs, 0);
-        bdrv_unref(bs);
+        parent_cid = vmdk_read_cid(blk_bs(blk), 0);
+        blk_unref(blk);
         snprintf(parent_desc_line, BUF_SIZE,
                 "parentFileNameHint=\"%s\"", backing_file);
     }
@@ -2002,14 +2020,19 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
             goto exit;
         }
     }
-    assert(new_bs == NULL);
-    ret = bdrv_open(&new_bs, filename, NULL, NULL,
-                    BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
-    if (ret < 0) {
+
+    new_blk = blk_new_open("descriptor", filename, NULL, NULL,
+                           BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
+                           &local_err);
+    if (new_blk == NULL) {
         error_propagate(errp, local_err);
+        ret = -EIO;
         goto exit;
     }
-    ret = bdrv_pwrite(new_bs, desc_offset, desc, desc_len);
+
+    blk_set_allow_write_beyond_eof(new_blk, true);
+
+    ret = blk_pwrite(new_blk, desc_offset, desc, desc_len);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Could not write description");
         goto exit;
@@ -2017,14 +2040,14 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
     /* bdrv_pwrite write padding zeros to align to sector, we don't need that
      * for description file */
     if (desc_offset == 0) {
-        ret = bdrv_truncate(new_bs, desc_len);
+        ret = blk_truncate(new_blk, desc_len);
         if (ret < 0) {
             error_setg_errno(errp, -ret, "Could not truncate file");
         }
     }
 exit:
-    if (new_bs) {
-        bdrv_unref(new_bs);
+    if (new_blk) {
+        blk_unref(new_blk);
     }
     g_free(adapter_type);
     g_free(backing_file);
diff --git a/block/vpc.c b/block/vpc.c
index f504536d1c..0d1524d6f6 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -25,6 +25,7 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "block/block_int.h"
+#include "sysemu/block-backend.h"
 #include "qemu/module.h"
 #include "migration/migration.h"
 #if defined(CONFIG_UUID)
@@ -46,8 +47,14 @@ enum vhd_type {
 // Seconds since Jan 1, 2000 0:00:00 (UTC)
 #define VHD_TIMESTAMP_BASE 946684800
 
+#define VHD_CHS_MAX_C   65535LL
+#define VHD_CHS_MAX_H   16
+#define VHD_CHS_MAX_S   255
+
 #define VHD_MAX_SECTORS       (65535LL * 255 * 255)
-#define VHD_MAX_GEOMETRY      (65535LL *  16 * 255)
+#define VHD_MAX_GEOMETRY      (VHD_CHS_MAX_C * VHD_CHS_MAX_H * VHD_CHS_MAX_S)
+
+#define VPC_OPT_FORCE_SIZE "force_size"
 
 // always big-endian
 typedef struct vhd_footer {
@@ -128,6 +135,8 @@ typedef struct BDRVVPCState {
 
     uint32_t block_size;
     uint32_t bitmap_size;
+    bool force_use_chs;
+    bool force_use_sz;
 
 #ifdef CACHE
     uint8_t *pageentry_u8;
@@ -140,6 +149,22 @@ typedef struct BDRVVPCState {
     Error *migration_blocker;
 } BDRVVPCState;
 
+#define VPC_OPT_SIZE_CALC "force_size_calc"
+static QemuOptsList vpc_runtime_opts = {
+    .name = "vpc-runtime-opts",
+    .head = QTAILQ_HEAD_INITIALIZER(vpc_runtime_opts.head),
+    .desc = {
+        {
+            .name = VPC_OPT_SIZE_CALC,
+            .type = QEMU_OPT_STRING,
+            .help = "Force disk size calculation to use either CHS geometry, "
+                    "or use the disk current_size specified in the VHD footer. "
+                    "{chs, current_size}"
+        },
+        { /* end of list */ }
+    }
+};
+
 static uint32_t vpc_checksum(uint8_t* buf, size_t size)
 {
     uint32_t res = 0;
@@ -159,6 +184,25 @@ static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
     return 0;
 }
 
+static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts,
+                              Error **errp)
+{
+    BDRVVPCState *s = bs->opaque;
+    const char *size_calc;
+
+    size_calc = qemu_opt_get(opts, VPC_OPT_SIZE_CALC);
+
+    if (!size_calc) {
+       /* no override, use autodetect only */
+    } else if (!strcmp(size_calc, "current_size")) {
+        s->force_use_sz = true;
+    } else if (!strcmp(size_calc, "chs")) {
+        s->force_use_chs = true;
+    } else {
+        error_setg(errp, "Invalid size calculation mode: '%s'", size_calc);
+    }
+}
+
 static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
                     Error **errp)
 {
@@ -166,6 +210,9 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
     int i;
     VHDFooter *footer;
     VHDDynDiskHeader *dyndisk_header;
+    QemuOpts *opts = NULL;
+    Error *local_err = NULL;
+    bool use_chs;
     uint8_t buf[HEADER_SIZE];
     uint32_t checksum;
     uint64_t computed_size;
@@ -173,6 +220,21 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
     int disk_type = VHD_DYNAMIC;
     int ret;
 
+    opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    vpc_parse_options(bs, opts, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+        goto fail;
+    }
+
     ret = bdrv_pread(bs->file->bs, 0, s->footer_buf, HEADER_SIZE);
     if (ret < 0) {
         goto fail;
@@ -218,12 +280,36 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
     bs->total_sectors = (int64_t)
         be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
 
-    /* Images that have exactly the maximum geometry are probably bigger and
-     * would be truncated if we adhered to the geometry for them. Rely on
-     * footer->current_size for them. */
-    if (bs->total_sectors == VHD_MAX_GEOMETRY) {
+    /* Microsoft Virtual PC and Microsoft Hyper-V produce and read
+     * VHD image sizes differently.  VPC will rely on CHS geometry,
+     * while Hyper-V and disk2vhd use the size specified in the footer.
+     *
+     * We use a couple of approaches to try and determine the correct method:
+     * look at the Creator App field, and look for images that have CHS
+     * geometry that is the maximum value.
+     *
+     * If the CHS geometry is the maximum CHS geometry, then we assume that
+     * the size is the footer->current_size to avoid truncation.  Otherwise,
+     * we follow the table based on footer->creator_app:
+     *
+     *  Known creator apps:
+     *      'vpc '  :  CHS              Virtual PC (uses disk geometry)
+     *      'qemu'  :  CHS              QEMU (uses disk geometry)
+     *      'qem2'  :  current_size     QEMU (uses current_size)
+     *      'win '  :  current_size     Hyper-V
+     *      'd2v '  :  current_size     Disk2vhd
+     *
+     *  The user can override the table values via drive options, however
+     *  even with an override we will still use current_size for images
+     *  that have CHS geometry of the maximum size.
+     */
+    use_chs = (!!strncmp(footer->creator_app, "win ", 4) &&
+               !!strncmp(footer->creator_app, "qem2", 4) &&
+               !!strncmp(footer->creator_app, "d2v ", 4)) || s->force_use_chs;
+
+    if (!use_chs || bs->total_sectors == VHD_MAX_GEOMETRY || s->force_use_sz) {
         bs->total_sectors = be64_to_cpu(footer->current_size) /
-                            BDRV_SECTOR_SIZE;
+                                        BDRV_SECTOR_SIZE;
     }
 
     /* Allow a maximum disk size of approximately 2 TB */
@@ -673,7 +759,7 @@ static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
     return 0;
 }
 
-static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
+static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
                                int64_t total_sectors)
 {
     VHDDynDiskHeader *dyndisk_header =
@@ -687,13 +773,13 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
     block_size = 0x200000;
     num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
 
-    ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE);
+    ret = blk_pwrite(blk, offset, buf, HEADER_SIZE);
     if (ret) {
         goto fail;
     }
 
     offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
-    ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE);
+    ret = blk_pwrite(blk, offset, buf, HEADER_SIZE);
     if (ret < 0) {
         goto fail;
     }
@@ -703,7 +789,7 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
 
     memset(buf, 0xFF, 512);
     for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) {
-        ret = bdrv_pwrite_sync(bs, offset, buf, 512);
+        ret = blk_pwrite(blk, offset, buf, 512);
         if (ret < 0) {
             goto fail;
         }
@@ -730,7 +816,7 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
     // Write the header
     offset = 512;
 
-    ret = bdrv_pwrite_sync(bs, offset, buf, 1024);
+    ret = blk_pwrite(blk, offset, buf, 1024);
     if (ret < 0) {
         goto fail;
     }
@@ -739,7 +825,7 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
     return ret;
 }
 
-static int create_fixed_disk(BlockDriverState *bs, uint8_t *buf,
+static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
                              int64_t total_size)
 {
     int ret;
@@ -747,12 +833,12 @@ static int create_fixed_disk(BlockDriverState *bs, uint8_t *buf,
     /* Add footer to total size */
     total_size += HEADER_SIZE;
 
-    ret = bdrv_truncate(bs, total_size);
+    ret = blk_truncate(blk, total_size);
     if (ret < 0) {
         return ret;
     }
 
-    ret = bdrv_pwrite_sync(bs, total_size - HEADER_SIZE, buf, HEADER_SIZE);
+    ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE);
     if (ret < 0) {
         return ret;
     }
@@ -773,8 +859,9 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
     int64_t total_size;
     int disk_type;
     int ret = -EIO;
+    bool force_size;
     Error *local_err = NULL;
-    BlockDriverState *bs = NULL;
+    BlockBackend *blk = NULL;
 
     /* Read out options */
     total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
@@ -793,30 +880,44 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
         disk_type = VHD_DYNAMIC;
     }
 
+    force_size = qemu_opt_get_bool_del(opts, VPC_OPT_FORCE_SIZE, false);
+
     ret = bdrv_create_file(filename, opts, &local_err);
     if (ret < 0) {
         error_propagate(errp, local_err);
         goto out;
     }
-    ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
-                    &local_err);
-    if (ret < 0) {
+
+    blk = blk_new_open("image", filename, NULL, NULL,
+                       BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
+                       &local_err);
+    if (blk == NULL) {
         error_propagate(errp, local_err);
+        ret = -EIO;
         goto out;
     }
 
+    blk_set_allow_write_beyond_eof(blk, true);
+
     /*
      * Calculate matching total_size and geometry. Increase the number of
      * sectors requested until we get enough (or fail). This ensures that
      * qemu-img convert doesn't truncate images, but rather rounds up.
      *
-     * If the image size can't be represented by a spec conform CHS geometry,
+     * If the image size can't be represented by a spec conformant CHS geometry,
      * we set the geometry to 65535 x 16 x 255 (CxHxS) sectors and use
      * the image size from the VHD footer to calculate total_sectors.
      */
-    total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE);
-    for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
-        calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl);
+    if (force_size) {
+        /* This will force the use of total_size for sector count, below */
+        cyls         = VHD_CHS_MAX_C;
+        heads        = VHD_CHS_MAX_H;
+        secs_per_cyl = VHD_CHS_MAX_S;
+    } else {
+        total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE);
+        for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
+            calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl);
+        }
     }
 
     if ((int64_t)cyls * heads * secs_per_cyl == VHD_MAX_GEOMETRY) {
@@ -835,8 +936,11 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
     memset(buf, 0, 1024);
 
     memcpy(footer->creator, "conectix", 8);
-    /* TODO Check if "qemu" creator_app is ok for VPC */
-    memcpy(footer->creator_app, "qemu", 4);
+    if (force_size) {
+        memcpy(footer->creator_app, "qem2", 4);
+    } else {
+        memcpy(footer->creator_app, "qemu", 4);
+    }
     memcpy(footer->creator_os, "Wi2k", 4);
 
     footer->features = cpu_to_be32(0x02);
@@ -866,13 +970,13 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
     footer->checksum = cpu_to_be32(vpc_checksum(buf, HEADER_SIZE));
 
     if (disk_type == VHD_DYNAMIC) {
-        ret = create_dynamic_disk(bs, buf, total_sectors);
+        ret = create_dynamic_disk(blk, buf, total_sectors);
     } else {
-        ret = create_fixed_disk(bs, buf, total_size);
+        ret = create_fixed_disk(blk, buf, total_size);
     }
 
 out:
-    bdrv_unref(bs);
+    blk_unref(blk);
     g_free(disk_type_param);
     return ret;
 }
@@ -917,6 +1021,13 @@ static QemuOptsList vpc_create_opts = {
                 "Type of virtual hard disk format. Supported formats are "
                 "{dynamic (default) | fixed} "
         },
+        {
+            .name = VPC_OPT_FORCE_SIZE,
+            .type = QEMU_OPT_BOOL,
+            .help = "Force disk size calculation to use the actual size "
+                    "specified, rather than using the nearest CHS-based "
+                    "calculation"
+        },
         { /* end of list */ }
     }
 };
diff --git a/blockdev.c b/blockdev.c
index 0f20c6511f..322ca03908 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -593,13 +593,6 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
         qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_DIRECT, "off");
         qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_NO_FLUSH, "off");
 
-        if (snapshot) {
-            /* always use cache=unsafe with snapshot */
-            qdict_put(bs_opts, BDRV_OPT_CACHE_WB, qstring_from_str("on"));
-            qdict_put(bs_opts, BDRV_OPT_CACHE_DIRECT, qstring_from_str("off"));
-            qdict_put(bs_opts, BDRV_OPT_CACHE_NO_FLUSH, qstring_from_str("on"));
-        }
-
         if (runstate_check(RUN_STATE_INMIGRATE)) {
             bdrv_flags |= BDRV_O_INACTIVE;
         }
@@ -682,6 +675,13 @@ static BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
         goto fail;
     }
 
+    /* bdrv_open() defaults to the values in bdrv_flags (for compatibility
+     * with other callers) rather than what we want as the real defaults.
+     * Apply the defaults here instead. */
+    qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_WB, "on");
+    qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_DIRECT, "off");
+    qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_NO_FLUSH, "off");
+
     if (runstate_check(RUN_STATE_INMIGRATE)) {
         bdrv_flags |= BDRV_O_INACTIVE;
     }
@@ -1732,10 +1732,15 @@ static void external_snapshot_prepare(BlkActionState *common,
         /* create new image w/backing file */
         mode = s->has_mode ? s->mode : NEW_IMAGE_MODE_ABSOLUTE_PATHS;
         if (mode != NEW_IMAGE_MODE_EXISTING) {
+            int64_t size = bdrv_getlength(state->old_bs);
+            if (size < 0) {
+                error_setg_errno(errp, -size, "bdrv_getlength failed");
+                return;
+            }
             bdrv_img_create(new_image_file, format,
                             state->old_bs->filename,
                             state->old_bs->drv->format_name,
-                            NULL, -1, flags, &local_err, false);
+                            NULL, size, flags, &local_err, false);
             if (local_err) {
                 error_propagate(errp, local_err);
                 return;
@@ -2819,6 +2824,15 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict)
     AioContext *aio_context;
     Error *local_err = NULL;
 
+    bs = bdrv_find_node(id);
+    if (bs) {
+        qmp_x_blockdev_del(false, NULL, true, id, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+        }
+        return;
+    }
+
     blk = blk_by_name(id);
     if (!blk) {
         error_report("Device '%s' not found", id);
@@ -3870,6 +3884,36 @@ out:
     aio_context_release(aio_context);
 }
 
+void hmp_drive_add_node(Monitor *mon, const char *optstr)
+{
+    QemuOpts *opts;
+    QDict *qdict;
+    Error *local_err = NULL;
+
+    opts = qemu_opts_parse_noisily(&qemu_drive_opts, optstr, false);
+    if (!opts) {
+        return;
+    }
+
+    qdict = qemu_opts_to_qdict(opts, NULL);
+
+    if (!qdict_get_try_str(qdict, "node-name")) {
+        error_report("'node-name' needs to be specified");
+        goto out;
+    }
+
+    BlockDriverState *bs = bds_tree_init(qdict, &local_err);
+    if (!bs) {
+        error_report_err(local_err);
+        goto out;
+    }
+
+    QTAILQ_INSERT_TAIL(&monitor_bdrv_states, bs, monitor_list);
+
+out:
+    qemu_opts_del(opts);
+}
+
 void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
 {
     QmpOutputVisitor *ov = qmp_output_visitor_new();
diff --git a/device-hotplug.c b/device-hotplug.c
index 9a7cd669d5..3e5cdaad10 100644
--- a/device-hotplug.c
+++ b/device-hotplug.c
@@ -30,6 +30,7 @@
 #include "qemu/config-file.h"
 #include "sysemu/sysemu.h"
 #include "monitor/monitor.h"
+#include "block/block_int.h"
 
 static DriveInfo *add_init_drive(const char *optstr)
 {
@@ -55,6 +56,12 @@ void hmp_drive_add(Monitor *mon, const QDict *qdict)
 {
     DriveInfo *dinfo = NULL;
     const char *opts = qdict_get_str(qdict, "opts");
+    bool node = qdict_get_try_bool(qdict, "node", false);
+
+    if (node) {
+        hmp_drive_add_node(mon, opts);
+        return;
+    }
 
     dinfo = add_init_drive(opts);
     if (!dinfo) {
diff --git a/docs/qmp-events.txt b/docs/qmp-events.txt
index 4e3eb9e77a..fa7574d671 100644
--- a/docs/qmp-events.txt
+++ b/docs/qmp-events.txt
@@ -325,6 +325,7 @@ Emitted to report a corruption of a Quorum file.
 
 Data:
 
+- "type":          Quorum operation type
 - "error":         Error message (json-string, optional)
                    Only present on failure.  This field contains a human-readable
                    error message.  There are no semantics other than that the
@@ -336,10 +337,18 @@ Data:
 
 Example:
 
+Read operation:
 { "event": "QUORUM_REPORT_BAD",
-     "data": { "node-name": "1.raw", "sector-num": 345435, "sectors-count": 5 },
+     "data": { "node-name": "node0", "sector-num": 345435, "sectors-count": 5,
+               "type": "read" },
      "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
 
+Flush operation:
+{ "event": "QUORUM_REPORT_BAD",
+     "data": { "node-name": "node0", "sector-num": 0, "sectors-count": 2097120,
+               "type": "flush", "error": "Broken pipe" },
+     "timestamp": { "seconds": 1456406829, "microseconds": 291763 } }
+
 Note: this event is rate-limited.
 
 RESET
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 639205b692..4f4f60a0df 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1201,8 +1201,8 @@ ETEXI
 
     {
         .name       = "drive_add",
-        .args_type  = "pci_addr:s,opts:s",
-        .params     = "[[<domain>:]<bus>:]<slot>\n"
+        .args_type  = "node:-n,pci_addr:s,opts:s",
+        .params     = "[-n] [[<domain>:]<bus>:]<slot>\n"
                       "[file=file][,if=type][,bus=n]\n"
                       "[,unit=m][,media=d][,index=i]\n"
                       "[,cyls=c,heads=h,secs=s[,trans=t]]\n"
diff --git a/include/block/block.h b/include/block/block.h
index 1c4f4d8141..eaa64262d9 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -6,8 +6,10 @@
 #include "qemu/option.h"
 #include "qemu/coroutine.h"
 #include "block/accounting.h"
+#include "block/dirty-bitmap.h"
 #include "qapi/qmp/qobject.h"
 #include "qapi-types.h"
+#include "qemu/hbitmap.h"
 
 /* block.c */
 typedef struct BlockDriver BlockDriver;
@@ -215,7 +217,6 @@ BdrvChild *bdrv_open_child(const char *filename,
 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd);
 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
                            const char *bdref_key, Error **errp);
-int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp);
 int bdrv_open(BlockDriverState **pbs, const char *filename,
               const char *reference, QDict *options, int flags, Error **errp);
 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
@@ -320,8 +321,6 @@ BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
                                         const char *node_name, Error **errp);
 
 /* async block I/O */
-typedef void BlockDriverDirtyHandler(BlockDriverState *bs, int64_t sector,
-                                     int sector_num);
 BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
                            QEMUIOVector *iov, int nb_sectors,
                            BlockCompletionFunc *cb, void *opaque);
@@ -475,42 +474,6 @@ void *qemu_try_blockalign(BlockDriverState *bs, size_t size);
 void *qemu_try_blockalign0(BlockDriverState *bs, size_t size);
 bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
 
-struct HBitmapIter;
-typedef struct BdrvDirtyBitmap BdrvDirtyBitmap;
-BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
-                                          uint32_t granularity,
-                                          const char *name,
-                                          Error **errp);
-int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
-                                       BdrvDirtyBitmap *bitmap,
-                                       Error **errp);
-BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
-                                            BdrvDirtyBitmap *bitmap,
-                                            Error **errp);
-BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
-                                           BdrvDirtyBitmap *bitmap,
-                                           Error **errp);
-BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs,
-                                        const char *name);
-void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap);
-void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
-void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
-void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
-BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs);
-uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs);
-uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap);
-bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap);
-bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap);
-DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap);
-int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector);
-void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
-                           int64_t cur_sector, int nr_sectors);
-void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
-                             int64_t cur_sector, int nr_sectors);
-void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi);
-void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset);
-int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);
-
 void bdrv_enable_copy_on_read(BlockDriverState *bs);
 void bdrv_disable_copy_on_read(BlockDriverState *bs);
 
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 9ef823a660..dda5ba0927 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -694,6 +694,8 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
                   BlockCompletionFunc *cb, void *opaque,
                   BlockJobTxn *txn, Error **errp);
 
+void hmp_drive_add_node(Monitor *mon, const char *optstr);
+
 void blk_set_bs(BlockBackend *blk, BlockDriverState *bs);
 
 void blk_dev_change_media_cb(BlockBackend *blk, bool load);
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
new file mode 100644
index 0000000000..80afe603f6
--- /dev/null
+++ b/include/block/dirty-bitmap.h
@@ -0,0 +1,44 @@
+#ifndef BLOCK_DIRTY_BITMAP_H
+#define BLOCK_DIRTY_BITMAP_H
+
+#include "qemu-common.h"
+#include "qemu/hbitmap.h"
+
+BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
+                                          uint32_t granularity,
+                                          const char *name,
+                                          Error **errp);
+int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
+                                       BdrvDirtyBitmap *bitmap,
+                                       Error **errp);
+BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
+                                            BdrvDirtyBitmap *bitmap,
+                                            Error **errp);
+BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
+                                           BdrvDirtyBitmap *bitmap,
+                                           Error **errp);
+BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs,
+                                        const char *name);
+void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap);
+void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
+void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs);
+void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
+void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
+BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs);
+uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs);
+uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap);
+bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap);
+bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap);
+DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap);
+int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+                   int64_t sector);
+void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
+                           int64_t cur_sector, int nr_sectors);
+void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
+                             int64_t cur_sector, int nr_sectors);
+void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi);
+void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset);
+int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);
+void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
+
+#endif
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index 9a5ead69a1..fd039e0e81 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -10,6 +10,7 @@ typedef struct AddressSpace AddressSpace;
 typedef struct AioContext AioContext;
 typedef struct AllwinnerAHCIState AllwinnerAHCIState;
 typedef struct AudioState AudioState;
+typedef struct BdrvDirtyBitmap BdrvDirtyBitmap;
 typedef struct BlockBackend BlockBackend;
 typedef struct BlockBackendRootState BlockBackendRootState;
 typedef struct BlockDriverState BlockDriverState;
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 66c5cf22e1..00d69baa07 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -78,6 +78,7 @@ void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs);
 
 void blk_hide_on_behalf_of_hmp_drive_del(BlockBackend *blk);
 
+void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow);
 void blk_iostatus_enable(BlockBackend *blk);
 bool blk_iostatus_is_enabled(const BlockBackend *blk);
 BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk);
diff --git a/monitor.c b/monitor.c
index e99ca8c91e..894f862dd3 100644
--- a/monitor.c
+++ b/monitor.c
@@ -76,6 +76,7 @@
 #include "qapi-event.h"
 #include "qmp-introspect.h"
 #include "sysemu/block-backend.h"
+#include "sysemu/qtest.h"
 
 /* for hmp_info_irq/pic */
 #if defined(TARGET_SPARC)
@@ -232,6 +233,8 @@ static const mon_cmd_t qmp_cmds[];
 
 Monitor *cur_mon;
 
+static QEMUClockType event_clock_type = QEMU_CLOCK_REALTIME;
+
 static void monitor_command_cb(void *opaque, const char *cmdline,
                                void *readline_opaque);
 
@@ -513,7 +516,7 @@ monitor_qapi_event_queue(QAPIEvent event, QDict *qdict, Error **errp)
              * monitor_qapi_event_handler() in evconf->rate ns.  Any
              * events arriving before then will be delayed until then.
              */
-            int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+            int64_t now = qemu_clock_get_ns(event_clock_type);
 
             monitor_qapi_event_emit(event, qdict);
 
@@ -522,7 +525,7 @@ monitor_qapi_event_queue(QAPIEvent event, QDict *qdict, Error **errp)
             evstate->data = data;
             QINCREF(evstate->data);
             evstate->qdict = NULL;
-            evstate->timer = timer_new_ns(QEMU_CLOCK_REALTIME,
+            evstate->timer = timer_new_ns(event_clock_type,
                                           monitor_qapi_event_handler,
                                           evstate);
             g_hash_table_add(monitor_qapi_event_state, evstate);
@@ -547,7 +550,7 @@ static void monitor_qapi_event_handler(void *opaque)
     qemu_mutex_lock(&monitor_lock);
 
     if (evstate->qdict) {
-        int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+        int64_t now = qemu_clock_get_ns(event_clock_type);
 
         monitor_qapi_event_emit(evstate->event, evstate->qdict);
         QDECREF(evstate->qdict);
@@ -572,6 +575,10 @@ static unsigned int qapi_event_throttle_hash(const void *key)
         hash += g_str_hash(qdict_get_str(evstate->data, "id"));
     }
 
+    if (evstate->event == QAPI_EVENT_QUORUM_REPORT_BAD) {
+        hash += g_str_hash(qdict_get_str(evstate->data, "node-name"));
+    }
+
     return hash;
 }
 
@@ -589,11 +596,20 @@ static gboolean qapi_event_throttle_equal(const void *a, const void *b)
                        qdict_get_str(evb->data, "id"));
     }
 
+    if (eva->event == QAPI_EVENT_QUORUM_REPORT_BAD) {
+        return !strcmp(qdict_get_str(eva->data, "node-name"),
+                       qdict_get_str(evb->data, "node-name"));
+    }
+
     return TRUE;
 }
 
 static void monitor_qapi_event_init(void)
 {
+    if (qtest_enabled()) {
+        event_clock_type = QEMU_CLOCK_VIRTUAL;
+    }
+
     monitor_qapi_event_state = g_hash_table_new(qapi_event_throttle_hash,
                                                 qapi_event_throttle_equal);
     qmp_event_set_func_emit(monitor_qapi_event_queue);
diff --git a/qapi/block.json b/qapi/block.json
index 58e6b301bf..937337dce5 100644
--- a/qapi/block.json
+++ b/qapi/block.json
@@ -196,3 +196,19 @@
 ##
 { 'event': 'DEVICE_TRAY_MOVED',
   'data': { 'device': 'str', 'tray-open': 'bool' } }
+
+##
+# @QuorumOpType
+#
+# An enumeration of the quorum operation types
+#
+# @read: read operation
+#
+# @write: write operation
+#
+# @flush: flush operation
+#
+# Since: 2.6
+##
+{ 'enum': 'QuorumOpType',
+  'data': [ 'read', 'write', 'flush' ] }
diff --git a/qapi/event.json b/qapi/event.json
index 1a45a6cb26..8642052ebc 100644
--- a/qapi/event.json
+++ b/qapi/event.json
@@ -325,6 +325,8 @@
 #
 # Emitted to report a corruption of a Quorum file
 #
+# @type: quorum operation type (Since 2.6)
+#
 # @error: #optional, error message. Only present on failure. This field
 #         contains a human-readable error message. There are no semantics other
 #         than that the block layer reported an error and clients should not
@@ -339,7 +341,7 @@
 # Since: 2.0
 ##
 { 'event': 'QUORUM_REPORT_BAD',
-  'data': { '*error': 'str', 'node-name': 'str',
+  'data': { 'type': 'QuorumOpType', '*error': 'str', 'node-name': 'str',
             'sector-num': 'int', 'sectors-count': 'int' } }
 
 ##
diff --git a/qemu-img.c b/qemu-img.c
index 2edb139073..3103150717 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -2775,6 +2775,8 @@ static int img_snapshot(int argc, char **argv)
 static int img_rebase(int argc, char **argv)
 {
     BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL;
+    uint8_t *buf_old = NULL;
+    uint8_t *buf_new = NULL;
     BlockDriverState *bs = NULL;
     char *filename;
     const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
@@ -2957,8 +2959,6 @@ static int img_rebase(int argc, char **argv)
         int64_t new_backing_num_sectors = 0;
         uint64_t sector;
         int n;
-        uint8_t * buf_old;
-        uint8_t * buf_new;
         float local_progress = 0;
 
         buf_old = blk_blockalign(blk, IO_BUF_SIZE);
@@ -3070,9 +3070,6 @@ static int img_rebase(int argc, char **argv)
             }
             qemu_progress_print(local_progress, 100);
         }
-
-        qemu_vfree(buf_old);
-        qemu_vfree(buf_new);
     }
 
     /*
@@ -3108,6 +3105,8 @@ out:
         blk_unref(blk_old_backing);
         blk_unref(blk_new_backing);
     }
+    qemu_vfree(buf_old);
+    qemu_vfree(buf_new);
 
     blk_unref(blk);
     if (ret) {
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 0f38d1eae3..3ea6b294a4 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -2132,6 +2132,10 @@ static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp)
 
     /* Special cases not set in the X86CPUDefinition structs: */
     if (kvm_enabled()) {
+        if (!kvm_irqchip_in_kernel()) {
+            x86_cpu_change_kvm_default("x2apic", "off");
+        }
+
         x86_cpu_apply_props(cpu, kvm_default_props);
     }
 
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 7974acb399..08d6444741 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -639,6 +639,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
         if (cpu->hyperv_crash && has_msr_hv_crash) {
             c->edx |= HV_X64_GUEST_CRASH_MSR_AVAILABLE;
         }
+        c->edx |= HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE;
         if (cpu->hyperv_reset && has_msr_hv_reset) {
             c->eax |= HV_X64_MSR_RESET_AVAILABLE;
         }
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 53dee79afd..dd8d5cc360 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -57,11 +57,17 @@
 #endif
 
 /* For a switch indexed by MODRM, match all memory operands for a given OP.  */
-#define CASE_MEM_OP(OP) \
+#define CASE_MODRM_MEM_OP(OP) \
     case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
     case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
     case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
 
+#define CASE_MODRM_OP(OP) \
+    case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
+    case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
+    case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
+    case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
+
 //#define MACRO_TEST   1
 
 /* global register indexes */
@@ -93,6 +99,7 @@ typedef struct DisasContext {
     int prefix;
     TCGMemOp aflag;
     TCGMemOp dflag;
+    target_ulong pc_start;
     target_ulong pc; /* pc = eip + cs_base */
     int is_jmp; /* 1 = means jump (stop translation), 2 means CPU
                    static state change (stop translation) */
@@ -460,15 +467,15 @@ static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0,
         break;
     case MO_16:
         /* 16 bit address */
-        if (ovr_seg < 0) {
-            ovr_seg = def_seg;
-        }
         tcg_gen_ext16u_tl(cpu_A0, a0);
-        /* ADDSEG will only be false in 16-bit mode for LEA.  */
-        if (!s->addseg) {
-            return;
-        }
         a0 = cpu_A0;
+        if (ovr_seg < 0) {
+            if (s->addseg) {
+                ovr_seg = def_seg;
+            } else {
+                return;
+            }
+        }
         break;
     default:
         tcg_abort();
@@ -2362,6 +2369,30 @@ static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
     s->is_jmp = DISAS_TB_JUMP;
 }
 
+/* Generate #UD for the current instruction.  The assumption here is that
+   the instruction is known, but it isn't allowed in the current cpu mode.  */
+static void gen_illegal_opcode(DisasContext *s)
+{
+    gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
+}
+
+/* Similarly, except that the assumption here is that we don't decode
+   the instruction at all -- either a missing opcode, an unimplemented
+   feature, or just a bogus instruction stream.  */
+static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
+{
+    gen_illegal_opcode(s);
+
+    if (qemu_loglevel_mask(LOG_UNIMP)) {
+        target_ulong pc = s->pc_start, end = s->pc;
+        qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc);
+        for (; pc < end; ++pc) {
+            qemu_log(" %02x", cpu_ldub_code(env, pc));
+        }
+        qemu_log("\n");
+    }
+}
+
 /* an interrupt is different from an exception because of the
    privilege checks */
 static void gen_interrupt(DisasContext *s, int intno,
@@ -2409,22 +2440,29 @@ static void gen_reset_hflag(DisasContext *s, uint32_t mask)
 /* Clear BND registers during legacy branches.  */
 static void gen_bnd_jmp(DisasContext *s)
 {
-    /* Do nothing if BND prefix present, MPX is disabled, or if the
-       BNDREGs are known to be in INIT state already.  The helper
-       itself will check BNDPRESERVE at runtime.  */
+    /* Clear the registers only if BND prefix is missing, MPX is enabled,
+       and if the BNDREGs are known to be in use (non-zero) already.
+       The helper itself will check BNDPRESERVE at runtime.  */
     if ((s->prefix & PREFIX_REPNZ) == 0
-        && (s->flags & HF_MPX_EN_MASK) == 0
-        && (s->flags & HF_MPX_IU_MASK) == 0) {
+        && (s->flags & HF_MPX_EN_MASK) != 0
+        && (s->flags & HF_MPX_IU_MASK) != 0) {
         gen_helper_bnd_jmp(cpu_env);
     }
 }
 
-/* generate a generic end of block. Trace exception is also generated
-   if needed */
-static void gen_eob(DisasContext *s)
+/* Generate an end of block. Trace exception is also generated if needed.
+   If IIM, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
+static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
 {
     gen_update_cc_op(s);
-    gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
+
+    /* If several instructions disable interrupts, only the first does it.  */
+    if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
+        gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
+    } else {
+        gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
+    }
+
     if (s->tb->flags & HF_RF_MASK) {
         gen_helper_reset_rf(cpu_env);
     }
@@ -2438,6 +2476,12 @@ static void gen_eob(DisasContext *s)
     s->is_jmp = DISAS_TB_JUMP;
 }
 
+/* End of block, resetting the inhibit irq flag.  */
+static void gen_eob(DisasContext *s)
+{
+    gen_eob_inhibit_irq(s, false);
+}
+
 /* generate a jump to eip. No segment change must happen before as a
    direct call to the next block may occur */
 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
@@ -2868,7 +2912,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         b1 = 0;
     sse_fn_epp = sse_op_table1[b][b1];
     if (!sse_fn_epp) {
-        goto illegal_op;
+        goto unknown_op;
     }
     if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
         is_xmm = 1;
@@ -2887,15 +2931,19 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
     }
     if (s->flags & HF_EM_MASK) {
     illegal_op:
-        gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base);
+        gen_illegal_opcode(s);
         return;
     }
-    if (is_xmm && !(s->flags & HF_OSFXSR_MASK))
-        if ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))
-            goto illegal_op;
+    if (is_xmm
+        && !(s->flags & HF_OSFXSR_MASK)
+        && ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))) {
+        goto unknown_op;
+    }
     if (b == 0x0e) {
-        if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
-            goto illegal_op;
+        if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
+            /* If we were fully decoding this we might use illegal_op.  */
+            goto unknown_op;
+        }
         /* femms */
         gen_helper_emms(cpu_env);
         return;
@@ -2920,8 +2968,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         b |= (b1 << 8);
         switch(b) {
         case 0x0e7: /* movntq */
-            if (mod == 3)
+            if (mod == 3) {
                 goto illegal_op;
+            }
             gen_lea_modrm(env, s, modrm);
             gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
             break;
@@ -3247,7 +3296,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         case 0x172:
         case 0x173:
             if (b1 >= 2) {
-	        goto illegal_op;
+	        goto unknown_op;
             }
             val = cpu_ldub_code(env, s->pc++);
             if (is_xmm) {
@@ -3266,7 +3315,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
                                        (((modrm >> 3)) & 7)][b1];
             if (!sse_fn_epp) {
-                goto illegal_op;
+                goto unknown_op;
             }
             if (is_xmm) {
                 rm = (modrm & 7) | REX_B(s);
@@ -3490,12 +3539,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             reg = ((modrm >> 3) & 7) | rex_r;
             mod = (modrm >> 6) & 3;
             if (b1 >= 2) {
-                goto illegal_op;
+                goto unknown_op;
             }
 
             sse_fn_epp = sse_op_table6[b].op[b1];
             if (!sse_fn_epp) {
-                goto illegal_op;
+                goto unknown_op;
             }
             if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
                 goto illegal_op;
@@ -3545,7 +3594,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 }
             }
             if (sse_fn_epp == SSE_SPECIAL) {
-                goto illegal_op;
+                goto unknown_op;
             }
 
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
@@ -3913,12 +3962,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                     break;
 
                 default:
-                    goto illegal_op;
+                    goto unknown_op;
                 }
                 break;
 
             default:
-                goto illegal_op;
+                goto unknown_op;
             }
             break;
 
@@ -3930,12 +3979,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             reg = ((modrm >> 3) & 7) | rex_r;
             mod = (modrm >> 6) & 3;
             if (b1 >= 2) {
-                goto illegal_op;
+                goto unknown_op;
             }
 
             sse_fn_eppi = sse_op_table7[b].op[b1];
             if (!sse_fn_eppi) {
-                goto illegal_op;
+                goto unknown_op;
             }
             if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
                 goto illegal_op;
@@ -4137,12 +4186,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 break;
 
             default:
-                goto illegal_op;
+                goto unknown_op;
             }
             break;
 
         default:
-            goto illegal_op;
+        unknown_op:
+            gen_unknown_opcode(env, s);
+            return;
         }
     } else {
         /* generic MMX or SSE operation */
@@ -4218,11 +4269,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
         }
         switch(b) {
         case 0x0f: /* 3DNow! data insns */
-            if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
-                goto illegal_op;
             val = cpu_ldub_code(env, s->pc++);
             sse_fn_epp = sse_op_table5[val];
             if (!sse_fn_epp) {
+                goto unknown_op;
+            }
+            if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
                 goto illegal_op;
             }
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
@@ -4242,7 +4294,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             /* compare insns */
             val = cpu_ldub_code(env, s->pc++);
             if (val >= 8)
-                goto illegal_op;
+                goto unknown_op;
             sse_fn_epp = sse_op_table4[val][b1];
 
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
@@ -4287,7 +4339,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     target_ulong next_eip, tval;
     int rex_w, rex_r;
 
-    s->pc = pc_start;
+    s->pc_start = s->pc = pc_start;
     prefixes = 0;
     s->override = -1;
     rex_w = -1;
@@ -4400,7 +4452,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                     b = 0x13a;
                     break;
                 default:   /* Reserved for future use.  */
-                    goto illegal_op;
+                    goto unknown_op;
                 }
             }
             s->vex_v = (~vex3 >> 3) & 0xf;
@@ -4750,7 +4802,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             }
             break;
         default:
-            goto illegal_op;
+            goto unknown_op;
         }
         break;
 
@@ -4763,7 +4815,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         rm = (modrm & 7) | REX_B(s);
         op = (modrm >> 3) & 7;
         if (op >= 2 && b == 0xfe) {
-            goto illegal_op;
+            goto unknown_op;
         }
         if (CODE64(s)) {
             if (op == 2 || op == 4) {
@@ -4856,7 +4908,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             gen_push_v(s, cpu_T0);
             break;
         default:
-            goto illegal_op;
+            goto unknown_op;
         }
         break;
 
@@ -5171,16 +5223,15 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         ot = gen_pop_T0(s);
         gen_movl_seg_T0(s, reg);
         gen_pop_update(s, ot);
-        if (reg == R_SS) {
-            /* if reg == SS, inhibit interrupts/trace. */
-            /* If several instructions disable interrupts, only the
-               _first_ does it */
-            gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
-            s->tf = 0;
-        }
+        /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
         if (s->is_jmp) {
             gen_jmp_im(s->pc - s->cs_base);
-            gen_eob(s);
+            if (reg == R_SS) {
+                s->tf = 0;
+                gen_eob_inhibit_irq(s, true);
+            } else {
+                gen_eob(s);
+            }
         }
         break;
     case 0x1a1: /* pop fs */
@@ -5238,16 +5289,15 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             goto illegal_op;
         gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
         gen_movl_seg_T0(s, reg);
-        if (reg == R_SS) {
-            /* if reg == SS, inhibit interrupts/trace */
-            /* If several instructions disable interrupts, only the
-               _first_ does it */
-            gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
-            s->tf = 0;
-        }
+        /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
         if (s->is_jmp) {
             gen_jmp_im(s->pc - s->cs_base);
-            gen_eob(s);
+            if (reg == R_SS) {
+                s->tf = 0;
+                gen_eob_inhibit_irq(s, true);
+            } else {
+                gen_eob(s);
+            }
         }
         break;
     case 0x8c: /* mov Gv, seg */
@@ -5727,7 +5777,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 gen_helper_fpop(cpu_env);
                 break;
             default:
-                goto illegal_op;
+                goto unknown_op;
             }
         } else {
             /* register float ops */
@@ -5751,7 +5801,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                     gen_helper_fwait(cpu_env);
                     break;
                 default:
-                    goto illegal_op;
+                    goto unknown_op;
                 }
                 break;
             case 0x0c: /* grp d9/4 */
@@ -5770,7 +5820,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                     gen_helper_fxam_ST0(cpu_env);
                     break;
                 default:
-                    goto illegal_op;
+                    goto unknown_op;
                 }
                 break;
             case 0x0d: /* grp d9/5 */
@@ -5805,7 +5855,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                         gen_helper_fldz_ST0(cpu_env);
                         break;
                     default:
-                        goto illegal_op;
+                        goto unknown_op;
                     }
                 }
                 break;
@@ -5905,7 +5955,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                     gen_helper_fpop(cpu_env);
                     break;
                 default:
-                    goto illegal_op;
+                    goto unknown_op;
                 }
                 break;
             case 0x1c:
@@ -5923,7 +5973,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 case 4: /* fsetpm (287 only, just do nop here) */
                     break;
                 default:
-                    goto illegal_op;
+                    goto unknown_op;
                 }
                 break;
             case 0x1d: /* fucomi */
@@ -5975,7 +6025,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                     gen_helper_fpop(cpu_env);
                     break;
                 default:
-                    goto illegal_op;
+                    goto unknown_op;
                 }
                 break;
             case 0x38: /* ffreep sti, undocumented op */
@@ -5990,7 +6040,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                     gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
                     break;
                 default:
-                    goto illegal_op;
+                    goto unknown_op;
                 }
                 break;
             case 0x3d: /* fucomip */
@@ -6036,7 +6086,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 }
                 break;
             default:
-                goto illegal_op;
+                goto unknown_op;
             }
         }
         break;
@@ -6507,7 +6557,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         val = cpu_ldub_code(env, s->pc++);
         tcg_gen_movi_tl(cpu_T1, val);
         if (op < 4)
-            goto illegal_op;
+            goto unknown_op;
         op -= 4;
         goto bt_op;
     case 0x1a3: /* bt Gv, Ev */
@@ -6773,26 +6823,13 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         }
         break;
     case 0xfb: /* sti */
-        if (!s->vm86) {
-            if (s->cpl <= s->iopl) {
-            gen_sti:
-                gen_helper_sti(cpu_env);
-                /* interruptions are enabled only the first insn after sti */
-                /* If several instructions disable interrupts, only the
-                   _first_ does it */
-                gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
-                /* give a chance to handle pending irqs */
-                gen_jmp_im(s->pc - s->cs_base);
-                gen_eob(s);
-            } else {
-                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
-            }
+        if (s->vm86 ? s->iopl == 3 : s->cpl <= s->iopl) {
+            gen_helper_sti(cpu_env);
+            /* interruptions are enabled only the first insn after sti */
+            gen_jmp_im(s->pc - s->cs_base);
+            gen_eob_inhibit_irq(s, true);
         } else {
-            if (s->iopl == 3) {
-                goto gen_sti;
-            } else {
-                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
-            }
+            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         }
         break;
     case 0x62: /* bound */
@@ -7031,14 +7068,14 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             set_cc_op(s, CC_OP_EFLAGS);
             break;
         default:
-            goto illegal_op;
+            goto unknown_op;
         }
         break;
 
     case 0x101:
         modrm = cpu_ldub_code(env, s->pc++);
         switch (modrm) {
-        CASE_MEM_OP(0): /* sgdt */
+        CASE_MODRM_MEM_OP(0): /* sgdt */
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
             gen_lea_modrm(env, s, modrm);
             tcg_gen_ld32u_tl(cpu_T0,
@@ -7094,7 +7131,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             gen_eob(s);
             break;
 
-        CASE_MEM_OP(1): /* sidt */
+        CASE_MODRM_MEM_OP(1): /* sidt */
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
             gen_lea_modrm(env, s, modrm);
             tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, idt.limit));
@@ -7240,7 +7277,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             gen_helper_invlpga(cpu_env, tcg_const_i32(s->aflag - 1));
             break;
 
-        CASE_MEM_OP(2): /* lgdt */
+        CASE_MODRM_MEM_OP(2): /* lgdt */
             if (s->cpl != 0) {
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                 break;
@@ -7257,7 +7294,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, gdt.limit));
             break;
 
-        CASE_MEM_OP(3): /* lidt */
+        CASE_MODRM_MEM_OP(3): /* lidt */
             if (s->cpl != 0) {
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                 break;
@@ -7274,17 +7311,19 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             tcg_gen_st32_tl(cpu_T1, cpu_env, offsetof(CPUX86State, idt.limit));
             break;
 
-        CASE_MEM_OP(4): /* smsw */
+        CASE_MODRM_OP(4): /* smsw */
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
-#if defined TARGET_X86_64 && defined HOST_WORDS_BIGENDIAN
-            tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, cr[0]) + 4);
-#else
-            tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State, cr[0]));
-#endif
-            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 1);
+            tcg_gen_ld_tl(cpu_T0, cpu_env, offsetof(CPUX86State, cr[0]));
+            if (CODE64(s)) {
+                mod = (modrm >> 6) & 3;
+                ot = (mod != 3 ? MO_16 : s->dflag);
+            } else {
+                ot = MO_16;
+            }
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
             break;
 
-        CASE_MEM_OP(6): /* lmsw */
+        CASE_MODRM_OP(6): /* lmsw */
             if (s->cpl != 0) {
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                 break;
@@ -7296,7 +7335,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             gen_eob(s);
             break;
 
-        CASE_MEM_OP(7): /* invlpg */
+        CASE_MODRM_MEM_OP(7): /* invlpg */
             if (s->cpl != 0) {
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                 break;
@@ -7343,7 +7382,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             break;
 
         default:
-            goto illegal_op;
+            goto unknown_op;
         }
         break;
 
@@ -7467,7 +7506,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         case 3: /* prefetchnt0 */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_nop_modrm(env, s, modrm);
             /* nothing more to do */
             break;
         default: /* nop (multi byte) */
@@ -7712,7 +7751,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 }
                 break;
             default:
-                goto illegal_op;
+                goto unknown_op;
             }
         }
         break;
@@ -7778,7 +7817,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     case 0x1ae:
         modrm = cpu_ldub_code(env, s->pc++);
         switch (modrm) {
-        CASE_MEM_OP(0): /* fxsave */
+        CASE_MODRM_MEM_OP(0): /* fxsave */
             if (!(s->cpuid_features & CPUID_FXSR)
                 || (prefixes & PREFIX_LOCK)) {
                 goto illegal_op;
@@ -7791,7 +7830,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             gen_helper_fxsave(cpu_env, cpu_A0);
             break;
 
-        CASE_MEM_OP(1): /* fxrstor */
+        CASE_MODRM_MEM_OP(1): /* fxrstor */
             if (!(s->cpuid_features & CPUID_FXSR)
                 || (prefixes & PREFIX_LOCK)) {
                 goto illegal_op;
@@ -7804,7 +7843,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             gen_helper_fxrstor(cpu_env, cpu_A0);
             break;
 
-        CASE_MEM_OP(2): /* ldmxcsr */
+        CASE_MODRM_MEM_OP(2): /* ldmxcsr */
             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
                 goto illegal_op;
             }
@@ -7817,7 +7856,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             gen_helper_ldmxcsr(cpu_env, cpu_tmp2_i32);
             break;
 
-        CASE_MEM_OP(3): /* stmxcsr */
+        CASE_MODRM_MEM_OP(3): /* stmxcsr */
             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
                 goto illegal_op;
             }
@@ -7830,7 +7869,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
             break;
 
-        CASE_MEM_OP(4): /* xsave */
+        CASE_MODRM_MEM_OP(4): /* xsave */
             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
                 || (prefixes & (PREFIX_LOCK | PREFIX_DATA
                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
@@ -7842,7 +7881,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             gen_helper_xsave(cpu_env, cpu_A0, cpu_tmp1_i64);
             break;
 
-        CASE_MEM_OP(5): /* xrstor */
+        CASE_MODRM_MEM_OP(5): /* xrstor */
             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
                 || (prefixes & (PREFIX_LOCK | PREFIX_DATA
                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
@@ -7859,7 +7898,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             gen_eob(s);
             break;
 
-        CASE_MEM_OP(6): /* xsaveopt / clwb */
+        CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */
             if (prefixes & PREFIX_LOCK) {
                 goto illegal_op;
             }
@@ -7883,7 +7922,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             }
             break;
 
-        CASE_MEM_OP(7): /* clflush / clflushopt */
+        CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */
             if (prefixes & PREFIX_LOCK) {
                 goto illegal_op;
             }
@@ -7934,7 +7973,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 }
                 break;
             }
-            goto illegal_op;
+            goto unknown_op;
 
         case 0xf8: /* sfence / pcommit */
             if (prefixes & PREFIX_DATA) {
@@ -7956,7 +7995,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             break;
 
         default:
-            goto illegal_op;
+            goto unknown_op;
         }
         break;
 
@@ -7965,8 +8004,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
-        gen_lea_modrm(env, s, modrm);
-        /* ignore for now */
+        gen_nop_modrm(env, s, modrm);
         break;
     case 0x1aa: /* rsm */
         gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM);
@@ -8013,7 +8051,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         gen_sse(env, s, b, pc_start, rex_r);
         break;
     default:
-        goto illegal_op;
+        goto unknown_op;
     }
     /* lock generation */
     if (s->prefix & PREFIX_LOCK)
@@ -8023,7 +8061,13 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     if (s->prefix & PREFIX_LOCK)
         gen_helper_unlock();
     /* XXX: ensure that no lock was generated */
-    gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base);
+    gen_illegal_opcode(s);
+    return s->pc;
+ unknown_op:
+    if (s->prefix & PREFIX_LOCK)
+        gen_helper_unlock();
+    /* XXX: ensure that no lock was generated */
+    gen_unknown_opcode(env, s);
     return s->pc;
 }
 
diff --git a/tests/qemu-iotests/081.out b/tests/qemu-iotests/081.out
index 70632314c8..97df69d71c 100644
--- a/tests/qemu-iotests/081.out
+++ b/tests/qemu-iotests/081.out
@@ -31,7 +31,7 @@ QMP_VERSION
 {"return": {}}
 {"return": {}}
 {"return": {}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "QUORUM_REPORT_BAD", "data": {"node-name": "drive2", "sectors-count": 20480, "sector-num": 0}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "QUORUM_REPORT_BAD", "data": {"node-name": "drive2", "sectors-count": 20480, "sector-num": 0, "type": "read"}}
 read 10485760/10485760 bytes at offset 0
 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 {"return": ""}
diff --git a/tests/qemu-iotests/146 b/tests/qemu-iotests/146
new file mode 100755
index 0000000000..043711be68
--- /dev/null
+++ b/tests/qemu-iotests/146
@@ -0,0 +1,165 @@
+#!/bin/bash
+#
+# Test VHD image format creator detection and override
+#
+# Copyright (C) 2016 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=jcody@redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+status=1    # failure is the default!
+
+_cleanup()
+{
+    _cleanup_qemu
+    _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+. ./common.qemu
+
+_supported_fmt vpc
+_supported_proto file
+_supported_os Linux
+
+
+qemu_comm_method="monitor"
+silent=
+
+echo
+echo === Testing VPC Autodetect ===
+echo
+_use_sample_img virtualpc-dynamic.vhd.bz2
+
+${QEMU_IO} -c "open -o driver=vpc ${TEST_IMG}" -c 'map'
+
+echo
+echo === Testing VPC with current_size force ===
+echo
+
+${QEMU_IO} -c "open -o driver=vpc,force_size_calc=current_size ${TEST_IMG}" -c 'map'
+
+echo
+echo === Testing VPC with chs force ===
+echo
+
+${QEMU_IO} -c "open -o driver=vpc,force_size_calc=chs ${TEST_IMG}" -c 'map'
+
+_cleanup_test_img
+
+echo
+echo === Testing Hyper-V Autodetect ===
+echo
+_use_sample_img hyperv2012r2-dynamic.vhd.bz2
+
+${QEMU_IO} -c "open -o driver=vpc ${TEST_IMG}" -c 'map'
+
+echo
+echo === Testing Hyper-V with current_size force ===
+echo
+
+${QEMU_IO} -c "open -o driver=vpc,force_size_calc=current_size ${TEST_IMG}" -c 'map'
+
+echo
+echo === Testing Hyper-V with chs force ===
+echo
+
+${QEMU_IO} -c "open -o driver=vpc,force_size_calc=chs ${TEST_IMG}" -c 'map'
+
+_cleanup_test_img
+
+echo
+echo === Testing d2v Autodetect ===
+echo
+_use_sample_img d2v-zerofilled.vhd.bz2
+
+${QEMU_IO} -c "open -o driver=vpc ${TEST_IMG}" -c 'map'
+
+echo
+echo === Testing d2v with current_size force ===
+echo
+
+${QEMU_IO} -c "open -o driver=vpc,force_size_calc=current_size ${TEST_IMG}" -c 'map'
+
+echo
+echo === Testing d2v with chs force ===
+echo
+
+${QEMU_IO} -c "open -o driver=vpc,force_size_calc=chs ${TEST_IMG}" -c 'map'
+
+_cleanup_test_img
+
+echo
+echo === Testing Image create, default ===
+echo
+
+TEST_IMG="${TEST_DIR}/vpc-create-test.vpc"
+
+_make_test_img 4G
+
+echo
+echo === Read created image, default opts ====
+echo
+
+${QEMU_IO} -c "open -o driver=vpc ${TEST_IMG}" -c 'map'
+
+echo
+echo === Read created image, force_size_calc=chs ====
+echo
+
+${QEMU_IO} -c "open -o driver=vpc,force_size_calc=chs ${TEST_IMG}" -c 'map'
+
+echo
+echo === Read created image, force_size_calc=current_size ====
+echo
+
+${QEMU_IO} -c "open -o driver=vpc,force_size_calc=current_size ${TEST_IMG}" -c 'map'
+
+echo
+echo === Testing Image create, force_size ===
+echo
+
+_make_test_img -o force_size 4G
+
+echo
+echo === Read created image, default opts ====
+echo
+
+${QEMU_IO} -c "open -o driver=vpc ${TEST_IMG}" -c 'map'
+
+echo
+echo === Read created image, force_size_calc=chs ====
+echo
+
+${QEMU_IO} -c "open -o driver=vpc,force_size_calc=chs ${TEST_IMG}" -c 'map'
+
+echo
+echo === Read created image, force_size_calc=current_size ====
+echo
+
+${QEMU_IO} -c "open -o driver=vpc,force_size_calc=current_size ${TEST_IMG}" -c 'map'
+
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/146.out b/tests/qemu-iotests/146.out
new file mode 100644
index 0000000000..4f334d86bc
--- /dev/null
+++ b/tests/qemu-iotests/146.out
@@ -0,0 +1,70 @@
+QA output created by 146
+
+=== Testing VPC Autodetect ===
+
+[                       0]  266334240/ 266334240 sectors not allocated at offset 0 bytes (0)
+
+=== Testing VPC with current_size force ===
+
+[                       0]  266338304/ 266338304 sectors not allocated at offset 0 bytes (0)
+
+=== Testing VPC with chs force ===
+
+[                       0]  266334240/ 266334240 sectors not allocated at offset 0 bytes (0)
+
+=== Testing Hyper-V Autodetect ===
+
+[                       0]  266338304/ 266338304 sectors not allocated at offset 0 bytes (0)
+
+=== Testing Hyper-V with current_size force ===
+
+[                       0]  266338304/ 266338304 sectors not allocated at offset 0 bytes (0)
+
+=== Testing Hyper-V with chs force ===
+
+[                       0]  266334240/ 266334240 sectors not allocated at offset 0 bytes (0)
+
+=== Testing d2v Autodetect ===
+
+[                       0]   514560/  514560 sectors     allocated at offset 0 bytes (1)
+
+=== Testing d2v with current_size force ===
+
+[                       0]   514560/  514560 sectors     allocated at offset 0 bytes (1)
+
+=== Testing d2v with chs force ===
+
+[                       0]   514560/  514560 sectors     allocated at offset 0 bytes (1)
+
+=== Testing Image create, default ===
+
+Formatting 'TEST_DIR/IMGFMT-create-test.IMGFMT', fmt=IMGFMT size=4294967296
+
+=== Read created image, default opts ====
+
+[                       0]  8389584/ 8389584 sectors not allocated at offset 0 bytes (0)
+
+=== Read created image, force_size_calc=chs ====
+
+[                       0]  8389584/ 8389584 sectors not allocated at offset 0 bytes (0)
+
+=== Read created image, force_size_calc=current_size ====
+
+[                       0]  8389584/ 8389584 sectors not allocated at offset 0 bytes (0)
+
+=== Testing Image create, force_size ===
+
+Formatting 'TEST_DIR/IMGFMT-create-test.IMGFMT', fmt=IMGFMT size=4294967296 force_size=on
+
+=== Read created image, default opts ====
+
+[                       0]  8388608/ 8388608 sectors not allocated at offset 0 bytes (0)
+
+=== Read created image, force_size_calc=chs ====
+
+[                       0]  8388608/ 8388608 sectors not allocated at offset 0 bytes (0)
+
+=== Read created image, force_size_calc=current_size ====
+
+[                       0]  8388608/ 8388608 sectors not allocated at offset 0 bytes (0)
+*** done
diff --git a/tests/qemu-iotests/148 b/tests/qemu-iotests/148
new file mode 100644
index 0000000000..30bc37958e
--- /dev/null
+++ b/tests/qemu-iotests/148
@@ -0,0 +1,129 @@
+#!/usr/bin/env python
+#
+# Test the rate limit of QMP events
+#
+# Copyright (C) 2016 Igalia, S.L.
+# Author: Alberto Garcia <berto@igalia.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import os
+import iotests
+
+imgs = (os.path.join(iotests.test_dir, 'quorum0.img'),
+        os.path.join(iotests.test_dir, 'quorum1.img'),
+        os.path.join(iotests.test_dir, 'quorum2.img'))
+
+img_conf = (os.path.join(iotests.test_dir, 'quorum0.conf'),
+            os.path.join(iotests.test_dir, 'quorum1.conf'),
+            os.path.join(iotests.test_dir, 'quorum2.conf'))
+
+event_rate = 1000000000
+sector_size = 512
+offset = 10
+
+class TestQuorumEvents(iotests.QMPTestCase):
+
+    def create_blkdebug_file(self, blkdebug_file, bad_sector):
+        file = open(blkdebug_file, 'w')
+        file.write('''
+[inject-error]
+event = "read_aio"
+errno = "5"
+sector = "%d"
+''' % bad_sector)
+        file.close()
+
+    def setUp(self):
+        driveopts = ['driver=quorum', 'vote-threshold=2']
+        for i in range(len(imgs)):
+            iotests.qemu_img('create', '-f', iotests.imgfmt, imgs[i], '1M')
+            self.create_blkdebug_file(img_conf[i], i + offset)
+            driveopts.append('children.%d.driver=%s' % (i, iotests.imgfmt))
+            driveopts.append('children.%d.file.driver=blkdebug' % i)
+            driveopts.append('children.%d.file.config=%s' % (i, img_conf[i]))
+            driveopts.append('children.%d.file.image.filename=%s' % (i, imgs[i]))
+            driveopts.append('children.%d.node-name=img%d' % (i, i))
+        self.vm = iotests.VM()
+        self.vm.add_drive(None, opts = ','.join(driveopts))
+        self.vm.launch()
+
+    def tearDown(self):
+        self.vm.shutdown()
+        for i in range(len(imgs)):
+            os.remove(imgs[i])
+            os.remove(img_conf[i])
+
+    def do_check_event(self, node, sector = 0):
+        if node == None:
+            self.assertEqual(self.vm.get_qmp_event(), None)
+            return
+
+        for event in self.vm.get_qmp_events(wait=True):
+            if event['event'] == 'QUORUM_REPORT_BAD':
+                self.assert_qmp(event, 'data/node-name', node)
+                self.assert_qmp(event, 'data/sector-num', sector)
+
+    def testQuorum(self):
+        if not 'quorum' in iotests.qemu_img_pipe('--help'):
+            return
+
+        # Generate an error and get an event
+        self.vm.hmp_qemu_io("drive0", "aio_read %d %d" %
+                            (offset * sector_size, sector_size))
+        self.vm.qtest("clock_step 10")
+        self.do_check_event('img0', offset)
+
+        # I/O errors in the same child: only one event is emitted
+        delay = 10
+        for i in range(3):
+            self.vm.hmp_qemu_io("drive0", "aio_read %d %d" %
+                                (offset * sector_size, sector_size))
+            self.vm.qtest("clock_step %d" % delay)
+            self.do_check_event(None)
+
+        # Wait enough so the event is finally emitted
+        self.vm.qtest("clock_step %d" % (2 * event_rate))
+        self.do_check_event('img0', offset)
+
+        # I/O errors in the same child: all events are emitted
+        delay = 2 * event_rate
+        for i in range(3):
+            self.vm.hmp_qemu_io("drive0", "aio_read %d %d" %
+                                (offset * sector_size, sector_size))
+            self.vm.qtest("clock_step %d" % delay)
+            self.do_check_event('img0', offset)
+
+        # I/O errors in different children: all events are emitted
+        delay = 10
+        for i in range(len(imgs)):
+            self.vm.hmp_qemu_io("drive0", "aio_read %d %d" %
+                                ((offset + i) * sector_size, sector_size))
+            self.vm.qtest("clock_step %d" % delay)
+            self.do_check_event('img%d' % i, offset + i)
+
+        # I/O errors in different children: all events are emitted
+        delay = 2 * event_rate
+        for i in range(len(imgs)):
+            self.vm.hmp_qemu_io("drive0", "aio_read %d %d" %
+                                ((offset + i) * sector_size, sector_size))
+            self.vm.qtest("clock_step %d" % delay)
+            self.do_check_event('img%d' % i, offset + i)
+
+        # No more pending events
+        self.do_check_event(None)
+
+if __name__ == '__main__':
+    iotests.main(supported_fmts=["raw"])
diff --git a/tests/qemu-iotests/148.out b/tests/qemu-iotests/148.out
new file mode 100644
index 0000000000..ae1213e6f8
--- /dev/null
+++ b/tests/qemu-iotests/148.out
@@ -0,0 +1,5 @@
+.
+----------------------------------------------------------------------
+Ran 1 tests
+
+OK
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 47fd40c546..faf0f21397 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -148,3 +148,5 @@
 143 auto quick
 144 rw auto quick
 145 auto quick
+146 auto quick
+148 rw auto quick
diff --git a/tests/qemu-iotests/sample_images/d2v-zerofilled.vhd.bz2 b/tests/qemu-iotests/sample_images/d2v-zerofilled.vhd.bz2
new file mode 100644
index 0000000000..f12cb9203a
--- /dev/null
+++ b/tests/qemu-iotests/sample_images/d2v-zerofilled.vhd.bz2
Binary files differdiff --git a/tests/qemu-iotests/sample_images/hyperv2012r2-dynamic.vhd.bz2 b/tests/qemu-iotests/sample_images/hyperv2012r2-dynamic.vhd.bz2
new file mode 100644
index 0000000000..bfeccf7b9f
--- /dev/null
+++ b/tests/qemu-iotests/sample_images/hyperv2012r2-dynamic.vhd.bz2
Binary files differdiff --git a/tests/qemu-iotests/sample_images/virtualpc-dynamic.vhd.bz2 b/tests/qemu-iotests/sample_images/virtualpc-dynamic.vhd.bz2
new file mode 100644
index 0000000000..783be3c8f0
--- /dev/null
+++ b/tests/qemu-iotests/sample_images/virtualpc-dynamic.vhd.bz2
Binary files differ